plot_value_function('Sarsa(λ) replacing estimates through time vs. true values', np.max(Q_track_rsl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Sarsa(λ) replacing estimates through time vs. true values (log scale)', np.max(Q_track_rsl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Sarsa(λ) replacing estimates through time (close up)', np.max(Q_track_rsl, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
SARSA(\(\lambda\)) 누적
plot_value_function('Sarsa(λ) accumulating estimates through time vs. true values', np.max(Q_track_asl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Sarsa(λ) accumulating estimates through time vs. true values (log scale)', np.max(Q_track_asl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Sarsa(λ) accumulating estimates through time (close up)', np.max(Q_track_asl, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
Q(\(\lambda\)) 대체
plot_value_function('Q(λ) replacing estimates through time vs. true values', np.max(Q_track_rqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Q(λ) replacing estimates through time vs. true values (log scale)', np.max(Q_track_rqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Q(λ) replacing estimates through time (close up)', np.max(Q_track_rqll, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
Q(\(\lambda\)) 누적
plot_value_function('Q(λ) accumulating estimates through time vs. true values', np.max(Q_track_aqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Q(λ) accumulating estimates through time vs. true values (log scale)', np.max(Q_track_aqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Q(λ) accumulating estimates through time (close up)', np.max(Q_track_aqll, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
Dyna-Q
plot_value_function('Dyna-Q estimates through time vs. true values', np.max(Q_track_dq, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Dyna-Q estimates through time vs. true values (log scale)', np.max(Q_track_dq, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Dyna-Q estimates through time (close up)', np.max(Q_track_dq, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
경로 샘플링
plot_value_function('Trajectory Sampling estimates through time vs. true values', np.max(Q_track_ts, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Trajectory Sampling estimates through time vs. true values (log scale)', np.max(Q_track_ts, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Trajectory Sampling estimates through time (close up)', np.max(Q_track_ts, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
plot_value_function('Sarsa(λ) replacing estimates through time vs. true values', np.max(Q_track_rsl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Sarsa(λ) replacing estimates through time vs. true values (log scale)', np.max(Q_track_rsl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Sarsa(λ) replacing estimates through time (close up)', np.max(Q_track_rsl, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
SARSA(\(\lambda\)) 누적
plot_value_function('Sarsa(λ) accumulating estimates through time vs. true values', np.max(Q_track_asl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Sarsa(λ) accumulating estimates through time vs. true values (log scale)', np.max(Q_track_asl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Sarsa(λ) accumulating estimates through time (close up)', np.max(Q_track_asl, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
Q(\(\lambda\)) 대체
plot_value_function('Q(λ) replacing estimates through time vs. true values', np.max(Q_track_rqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Q(λ) replacing estimates through time vs. true values (log scale)', np.max(Q_track_rqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Q(λ) replacing estimates through time (close up)', np.max(Q_track_rqll, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
Q(\(\lambda\)) 누적
plot_value_function('Q(λ) accumulating estimates through time vs. true values', np.max(Q_track_aqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Q(λ) accumulating estimates through time vs. true values (log scale)', np.max(Q_track_aqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Q(λ) accumulating estimates through time (close up)', np.max(Q_track_aqll, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
Dyna-Q
plot_value_function('Dyna-Q estimates through time vs. true values', np.max(Q_track_dq, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Dyna-Q estimates through time vs. true values (log scale)', np.max(Q_track_dq, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Dyna-Q estimates through time (close up)', np.max(Q_track_dq, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
경로 샘플링
plot_value_function('Trajectory Sampling estimates through time vs. true values', np.max(Q_track_ts, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Trajectory Sampling estimates through time vs. true values (log scale)', np.max(Q_track_ts, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Trajectory Sampling estimates through time (close up)', np.max(Q_track_ts, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
plot_value_function('Sarsa(λ) replacing estimates through time vs. true values', np.max(Q_track_rsl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Sarsa(λ) replacing estimates through time vs. true values (log scale)', np.max(Q_track_rsl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Sarsa(λ) replacing estimates through time (close up)', np.max(Q_track_rsl, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
SARSA(\(\lambda\)) 누적
plot_value_function('Sarsa(λ) accumulating estimates through time vs. true values', np.max(Q_track_asl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Sarsa(λ) accumulating estimates through time vs. true values (log scale)', np.max(Q_track_asl, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Sarsa(λ) accumulating estimates through time (close up)', np.max(Q_track_asl, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
Q(\(\lambda\)) 대체
plot_value_function('Q(λ) replacing estimates through time vs. true values', np.max(Q_track_rqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Q(λ) replacing estimates through time vs. true values (log scale)', np.max(Q_track_rqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Q(λ) replacing estimates through time (close up)', np.max(Q_track_rqll, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
Q(\(\lambda\)) 누적
plot_value_function('Q(λ) accumulating estimates through time vs. true values', np.max(Q_track_aqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Q(λ) accumulating estimates through time vs. true values (log scale)', np.max(Q_track_aqll, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Q(λ) accumulating estimates through time (close up)', np.max(Q_track_aqll, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
Dyna-Q
plot_value_function('Dyna-Q estimates through time vs. true values', np.max(Q_track_dq, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Dyna-Q estimates through time vs. true values (log scale)', np.max(Q_track_dq, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Dyna-Q estimates through time (close up)', np.max(Q_track_dq, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)
경로 샘플링
plot_value_function('Trajectory Sampling estimates through time vs. true values', np.max(Q_track_ts, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=False)
plot_value_function('Trajectory Sampling estimates through time vs. true values (log scale)', np.max(Q_track_ts, axis=2), optimal_V, limit_items=limit_items, limit_value=limit_value, log=True)
plot_value_function('Trajectory Sampling estimates through time (close up)', np.max(Q_track_ts, axis=2)[:cu_episodes], None, limit_items=cu_limit_items, limit_value=cu_limit_value, log=False)