(define (last l)
(cond ((null? (rest l)) (first l))
(else (last (rest l)))))
;; states have format (pair world-state agent-position)
(define (sample-action trans start-state goal? ending)
(rejection-query
(define first-action (action-prior))
(define state-action-seq
(rollout trans (pair start-state first-action) ending))
state-action-seq
(goal? state-action-seq)))
;; input and output are state-action pairs so we can run rollout
(define (transition state-action)
(pair (forward-model state-action) (action-prior)))
(define (rollout next init end)
(if (end init)
(list init)
(append (list init) (rollout next (next init) end))))
(define cheat-det .9)
(define (forward-model state-action)
(pair
(if (flip 0.5) 'red-light 'green-light)
(let ((light (first (first state-action)))
(position (rest (first state-action)))
(action (rest state-action)))
(if (eq? action 'go)
(if (and (eq? light 'red-light)
(flip cheat-det))
0
(+ position 1))
position))))
(define discount .95)
(define (ending? symbol)
(flip (- 1 discount)))
(define goal-pos 5)
(define (goal-function state-action-seq)
(> (rest (first (last state-action-seq))) goal-pos))
(define (action-prior) (if (flip 0.5) 'go 'stop))
(define states-and-actions
(sample-action transition (pair 'green-light 1) goal-function ending?))
(for-each display states-and-actions)
References: