1
+ """ Evaluates Wave Lunar Lander model """
2
+
1
3
import common .shutup as shutup
2
4
3
5
shutup .future_warnings ()
11
13
from stable_baselines import A2C # noqa: E402
12
14
from arlie .envs .lunar_lander .score import LunarLanderScore # noqa: E402
13
15
16
+ wave = True
14
17
eval_timesteps = 1e5
15
- multi = True
16
18
num_cpu = 12
17
19
18
20
@@ -23,9 +25,11 @@ def evaluate(env, model, num_steps=1000):
23
25
:param num_steps: (int) number of timesteps to evaluate it
24
26
:return: (float) Mean reward, (int) Number of episodes performed
25
27
"""
26
- scores = [LunarLanderScore () for _ in range (env .num_envs )]
27
- episode_scores = [[0.0 ] for _ in range (env .num_envs )]
28
28
episode_rewards = [[0.0 ] for _ in range (env .num_envs )]
29
+ if wave :
30
+ scores = [LunarLanderScore () for _ in range (env .num_envs )]
31
+ episode_scores = [[0.0 ] for _ in range (env .num_envs )]
32
+
29
33
obs = env .reset ()
30
34
steps = (int )(num_steps // env .num_envs )
31
35
for i in range (steps ):
@@ -37,24 +41,30 @@ def evaluate(env, model, num_steps=1000):
37
41
38
42
# Stats
39
43
for i in range (env .num_envs ):
40
- scores [i ].store_step (obs [i ], actions [i ], info [i ])
41
- episode_scores [i ][- 1 ] = scores [i ].get ()
42
44
episode_rewards [i ][- 1 ] += rewards [i ]
45
+ if wave :
46
+ scores [i ].store_step (obs [i ], actions [i ], info [i ])
47
+ episode_scores [i ][- 1 ] = scores [i ].get ()
43
48
if dones [i ]:
44
- episode_scores [i ].append (0.0 )
45
49
episode_rewards [i ].append (0.0 )
46
- scores [i ].reset ()
50
+ if wave :
51
+ episode_scores [i ].append (0.0 )
52
+ scores [i ].reset ()
47
53
48
- mean_scores = [0.0 for _ in range (env .num_envs )]
49
54
mean_rewards = [0.0 for _ in range (env .num_envs )]
55
+ if wave :
56
+ mean_scores = [0.0 for _ in range (env .num_envs )]
50
57
n_episodes = 0
51
58
for i in range (env .num_envs ):
52
- mean_scores [i ] = np .mean (episode_scores [i ][:- 1 ])
53
59
mean_rewards [i ] = np .mean (episode_rewards [i ][:- 1 ])
60
+ if wave :
61
+ mean_scores [i ] = np .mean (episode_scores [i ][:- 1 ])
54
62
n_episodes += len (episode_rewards [i ]) - 1
55
63
56
64
# Compute mean reward
57
- mean_score = round (np .mean (mean_scores ), 1 )
65
+ mean_score = "NaN"
66
+ if wave :
67
+ mean_score = round (np .mean (mean_scores ), 1 )
58
68
mean_reward = round (np .mean (mean_rewards ), 1 )
59
69
60
70
return mean_score , mean_reward , n_episodes
@@ -70,10 +80,11 @@ def evaluate(env, model, num_steps=1000):
70
80
print ("Path '{}' does not exist." .format (model_path ))
71
81
exit (- 1 )
72
82
83
+ id = "LunarLander" if wave else "LunarLander-v2"
73
84
if num_cpu > 1 :
74
- env = make_multi_env (num_cpu , "LunarLander" , True , render_mode = False )
85
+ env = make_multi_env (num_cpu , id , wave , render_mode = False , reset_mode = "random" )
75
86
else :
76
- env = make_env ("LunarLander" , True , render_mode = False , reset_mode = "random" )
87
+ env = make_env (id , wave , render_mode = False , reset_mode = "random" )
77
88
78
89
if len (sys .argv ) == 1 :
79
90
print ("No model provided" )
0 commit comments