@@ -87,7 +87,8 @@ function RLCore.Experiment(
8787 end ,
8888 DoEveryNEpisode () do t, agent, env
8989 with_logger (lg) do
90- @info " training" reward = total_reward_per_episode. rewards[end ]
90+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
91+ 0
9192 end
9293 end ,
9394 DoEveryNStep (10000 ) do t, agent, env
@@ -180,7 +181,8 @@ function RLCore.Experiment(
180181 end ,
181182 DoEveryNEpisode () do t, agent, env
182183 with_logger (lg) do
183- @info " training" reward = total_reward_per_episode. rewards[end ]
184+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
185+ 0
184186 end
185187 end ,
186188 DoEveryNStep (10000 ) do t, agent, env
@@ -274,7 +276,8 @@ function RLCore.Experiment(
274276 end ,
275277 DoEveryNEpisode () do t, agent, env
276278 with_logger (lg) do
277- @info " training" reward = total_reward_per_episode. rewards[end ]
279+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
280+ 0
278281 end
279282 end ,
280283 DoEveryNStep (10000 ) do t, agent, env
@@ -374,7 +377,8 @@ function RLCore.Experiment(
374377 end ,
375378 DoEveryNEpisode () do t, agent, env
376379 with_logger (lg) do
377- @info " training" reward = total_reward_per_episode. rewards[end ]
380+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
381+ 0
378382 end
379383 end ,
380384 DoEveryNStep (10000 ) do t, agent, env
@@ -476,7 +480,8 @@ function RLCore.Experiment(
476480 end ,
477481 DoEveryNEpisode () do t, agent, env
478482 with_logger (lg) do
479- @info " training" reward = total_reward_per_episode. rewards[end ]
483+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
484+ 0
480485 end
481486 end ,
482487 DoEveryNStep (10000 ) do t, agent, env
@@ -563,8 +568,14 @@ function RLCore.Experiment(
563568 critic_loss = agent. policy. learner. critic_loss,
564569 entropy_loss = agent. policy. learner. entropy_loss,
565570 loss = agent. policy. learner. loss,
566- reward = total_reward_per_episode. reward[end ]
567571 )
572+ for i in 1 : length (env)
573+ if get_terminal (env[i])
574+ @info " training" reward = total_reward_per_episode. rewards[i][end ] log_step_increment =
575+ 0
576+ break
577+ end
578+ end
568579 end
569580 end ,
570581 DoEveryNStep (10000 ) do t, agent, env
@@ -656,8 +667,14 @@ function RLCore.Experiment(
656667 critic_loss = agent. policy. learner. critic_loss,
657668 entropy_loss = agent. policy. learner. entropy_loss,
658669 loss = agent. policy. learner. loss,
659- reward = total_reward_per_episode. reward[end ],
660670 )
671+ for i in 1 : length (env)
672+ if get_terminal (env[i])
673+ @info " training" reward = total_reward_per_episode. rewards[i][end ] log_step_increment =
674+ 0
675+ break
676+ end
677+ end
661678 end
662679 end ,
663680 DoEveryNStep (10000 ) do t, agent, env
@@ -764,7 +781,8 @@ function RLCore.Experiment(
764781 end ,
765782 DoEveryNEpisode () do t, agent, env
766783 with_logger (lg) do
767- @info " training" reward = total_reward_per_episode. rewards[end ]
784+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
785+ 0
768786 end
769787 end ,
770788 DoEveryNStep (10000 ) do t, agent, env
@@ -863,8 +881,14 @@ function RLCore.Experiment(
863881 actor_loss = agent. policy. learner. actor_loss[end , end ],
864882 critic_loss = agent. policy. learner. critic_loss[end , end ],
865883 loss = agent. policy. learner. loss[end , end ],
866- reward = total_reward_per_episode. reward[end ]
867884 )
885+ for i in 1 : length (env)
886+ if get_terminal (env[i])
887+ @info " training" reward = total_reward_per_episode. rewards[i][end ] log_step_increment =
888+ 0
889+ break
890+ end
891+ end
868892 end
869893 end ,
870894 DoEveryNStep (10000 ) do t, agent, env
@@ -944,7 +968,8 @@ function RLCore.Experiment(
944968 end ,
945969 DoEveryNEpisode () do t, agent, env
946970 with_logger (lg) do
947- @info " training" reward = total_reward_per_episode. rewards[end ]
971+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
972+ 0
948973 end
949974 end ,
950975 DoEveryNStep (10000 ) do t, agent, env
@@ -1038,7 +1063,8 @@ function RLCore.Experiment(
10381063 end ,
10391064 DoEveryNEpisode () do t, agent, env
10401065 with_logger (lg) do
1041- @info " training" reward = total_reward_per_episode. rewards[end ]
1066+ @info " training" reward = total_reward_per_episode. rewards[end ] log_step_increment =
1067+ 0
10421068 end
10431069 end ,
10441070 DoEveryNStep (10000 ) do t, agent, env
0 commit comments