JuliaReinforcementLearning
diff --git a/‎docs/logo/logo.jl‎
Lines changed: 90 additions & 54 deletions b/‎docs/logo/logo.jl‎
Lines changed: 90 additions & 54 deletions
diff --git a/‎src/algorithms/cfr/abstract_cfr_policy.jl‎
Lines changed: 7 additions & 2 deletions b/‎src/algorithms/cfr/abstract_cfr_policy.jl‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎src/algorithms/cfr/best_response_policy.jl‎
Lines changed: 25 additions & 15 deletions b/‎src/algorithms/cfr/best_response_policy.jl‎
Lines changed: 25 additions & 15 deletions
@@ -19,69 +19,105 @@ function tangram(obj)
         [O, Point(-2U, 0), Point(0, 2U)]
     elseif obj == :triangle3
         sethue("#F8CB2D")
-        [O, Point(-U,U), Point(U,U)]
+        [O, Point(-U, U), Point(U, U)]
     elseif obj == :triangle4
         sethue("#F195C8")
-        [O, Point(-U, 0), Point(0,-U)]
+        [O, Point(-U, 0), Point(0, -U)]
     elseif obj == :triangle5
         sethue("#F9F224")
         [O, Point(U, 0), Point(0, -U)]
     elseif obj == :box
         sethue("#A796C2")
-        [O, Point(0, -U), Point(U,-U), Point(U, 0)]
+        [O, Point(0, -U), Point(U, -U), Point(U, 0)]
     elseif obj == :parallelogram
         sethue("#EF3E62")
-        [O, Point(U,-U), Point(0, -U), Point(-U, 0)]
+        [O, Point(U, -U), Point(0, -U), Point(-U, 0)]
     end
 end
 
-javis(video, [
-    BackgroundAction(1:400, ground),
-    Action(1:400, (args...)->poly(tangram(:triangle1), :fill, close=true); subactions=[
-        SubAction(1:1, Translation(O, SHIFT)),
-        SubAction(50:60, Translation(O, Point(0, 2U))),
-        SubAction(60:70, Rotation(0., -π/2)),
-        SubAction(70:80, Translation(O, Point(-2U, 0))),
-        SubAction(80:90, Translation(O, Point(U, -U))),
-    ]),
-    Action(1:400, (args...)->poly(tangram(:triangle2), :fill, close=true); subactions=[
-        SubAction(1:1, Translation(O, SHIFT)),
-    ]),
-    Action(1:400, (args...)->poly(tangram(:triangle3), :fill, close=true); subactions=[
-        SubAction(1:1, Translation(O, SHIFT)),
-        SubAction(1:1, Translation(O, Point(0, -2U))),
-        SubAction(100:110, Translation(O, Point(-3U, 0))),
-        SubAction(110:120, Rotation(0., -π/2)),
-        SubAction(120:130, Translation(O, Point(-5.5U, 0))),
-        SubAction(130:140, Translation(O, Point(0, U))),
-    ]),
-    Action(1:400, (args...)->poly(tangram(:triangle5), :fill, close=true); subactions=[
-        SubAction(1:1, Translation(O, SHIFT)),
-        SubAction(1:1, Translation(O, Point(U, 0))),
-        SubAction(150:160, Translation(O, Point(0, 4.5*U))),
-        SubAction(160:170, Rotation(0., -3π/4)),
-        SubAction(170:180, Translation(O, Point(sqrt((2 - √2/2)^2 / 2) * U, - sqrt((2 - √2/2)^2 / 2) * U))),
-    ]),
-    Action(1:400, (args...)->poly(tangram(:parallelogram), :fill, close=true); subactions=[
-        SubAction(1:1, Translation(O, SHIFT)),
-        SubAction(1:1, Translation(O, Point(-U, 0))),
-        SubAction(200:210, Translation(O, Point(U, -U))),
-        SubAction(210:220, Translation(O, Point(U, 0))),
-        SubAction(220:230, Rotation(0., π/2)),
-        SubAction(230:240, Translation(O, Point(2U, 0))),
-        SubAction(240:250, Translation(O, Point(0, U))),
-    ]),
-    Action(1:400, (args...)->poly(tangram(:triangle4), :fill, close=true);subactions=[
-        SubAction(1:1, Translation(O, SHIFT)),
-        SubAction(250:260, Translation(O, Point(0, -U))),
-        SubAction(260:270, Translation(O, Point(2U, 0))),
-        SubAction(270:280, Rotation(0., -π/2)),
-        SubAction(280:290, Translation(O, Point(-4U, 0))),
-        SubAction(290:300, Translation(O, Point(0.5*U, -0.5*U))),
-    ]),
-    Action(1:400, (args...)->poly(tangram(:box), :fill, close=true);subactions=[
-        SubAction(1:1, Translation(O, SHIFT)),
-        SubAction(300:310, Translation(O, Point(-U, 0))),
-        SubAction(310:320, Rotation(0., -π/6)),
-    ]),
-]; pathname="logo.gif")
+javis(
+    video,
+    [
+        BackgroundAction(1:400, ground),
+        Action(
+            1:400,
+            (args...) -> poly(tangram(:triangle1), :fill, close = true);
+            subactions = [
+                SubAction(1:1, Translation(O, SHIFT)),
+                SubAction(50:60, Translation(O, Point(0, 2U))),
+                SubAction(60:70, Rotation(0.0, -π / 2)),
+                SubAction(70:80, Translation(O, Point(-2U, 0))),
+                SubAction(80:90, Translation(O, Point(U, -U))),
+            ],
+        ),
+        Action(
+            1:400,
+            (args...) -> poly(tangram(:triangle2), :fill, close = true);
+            subactions = [SubAction(1:1, Translation(O, SHIFT))],
+        ),
+        Action(
+            1:400,
+            (args...) -> poly(tangram(:triangle3), :fill, close = true);
+            subactions = [
+                SubAction(1:1, Translation(O, SHIFT)),
+                SubAction(1:1, Translation(O, Point(0, -2U))),
+                SubAction(100:110, Translation(O, Point(-3U, 0))),
+                SubAction(110:120, Rotation(0.0, -π / 2)),
+                SubAction(120:130, Translation(O, Point(-5.5U, 0))),
+                SubAction(130:140, Translation(O, Point(0, U))),
+            ],
+        ),
+        Action(
+            1:400,
+            (args...) -> poly(tangram(:triangle5), :fill, close = true);
+            subactions = [
+                SubAction(1:1, Translation(O, SHIFT)),
+                SubAction(1:1, Translation(O, Point(U, 0))),
+                SubAction(150:160, Translation(O, Point(0, 4.5 * U))),
+                SubAction(160:170, Rotation(0.0, -3π / 4)),
+                SubAction(
+                    170:180,
+                    Translation(
+                        O,
+                        Point(sqrt((2 - √2 / 2)^2 / 2) * U, -sqrt((2 - √2 / 2)^2 / 2) * U),
+                    ),
+                ),
+            ],
+        ),
+        Action(
+            1:400,
+            (args...) -> poly(tangram(:parallelogram), :fill, close = true);
+            subactions = [
+                SubAction(1:1, Translation(O, SHIFT)),
+                SubAction(1:1, Translation(O, Point(-U, 0))),
+                SubAction(200:210, Translation(O, Point(U, -U))),
+                SubAction(210:220, Translation(O, Point(U, 0))),
+                SubAction(220:230, Rotation(0.0, π / 2)),
+                SubAction(230:240, Translation(O, Point(2U, 0))),
+                SubAction(240:250, Translation(O, Point(0, U))),
+            ],
+        ),
+        Action(
+            1:400,
+            (args...) -> poly(tangram(:triangle4), :fill, close = true);
+            subactions = [
+                SubAction(1:1, Translation(O, SHIFT)),
+                SubAction(250:260, Translation(O, Point(0, -U))),
+                SubAction(260:270, Translation(O, Point(2U, 0))),
+                SubAction(270:280, Rotation(0.0, -π / 2)),
+                SubAction(280:290, Translation(O, Point(-4U, 0))),
+                SubAction(290:300, Translation(O, Point(0.5 * U, -0.5 * U))),
+            ],
+        ),
+        Action(
+            1:400,
+            (args...) -> poly(tangram(:box), :fill, close = true);
+            subactions = [
+                SubAction(1:1, Translation(O, SHIFT)),
+                SubAction(300:310, Translation(O, Point(-U, 0))),
+                SubAction(310:320, Rotation(0.0, -π / 6)),
+            ],
+        ),
+    ];
+    pathname = "logo.gif",
+)
@@ -1,6 +1,11 @@
 abstract type AbstractCFRPolicy <: AbstractPolicy end
 
-function Base.run(p::AbstractCFRPolicy, env::AbstractEnv, stop_condition=StopAfterStep(1), hook=EmptyHook())
+function Base.run(
+    p::AbstractCFRPolicy,
+    env::AbstractEnv,
+    stop_condition = StopAfterStep(1),
+    hook = EmptyHook(),
+)
     @assert NumAgentStyle(env) isa MultiAgent
     @assert DynamicStyle(env) === SEQUENTIAL
     @assert RewardStyle(env) === TERMINAL_REWARD
@@ -15,4 +20,4 @@ function Base.run(p::AbstractCFRPolicy, env::AbstractEnv, stop_condition=StopAft
         stop_condition(p, env) && break
     end
     update!(p)
-end
+end
@@ -1,9 +1,9 @@
 export BestResponsePolicy
 
-using Flux:onehot
+using Flux: onehot
 
-struct BestResponsePolicy{E, S, A, X, P<:AbstractPolicy} <: AbstractCFRPolicy
-    cfr_reach_prob::Dict{S, Vector{Pair{E, Float64}}}
+struct BestResponsePolicy{E,S,A,X,P<:AbstractPolicy} <: AbstractCFRPolicy
+    cfr_reach_prob::Dict{S,Vector{Pair{E,Float64}}}
     best_response_action_cache::Dict{S,A}
     best_response_value_cache::Dict{E,Float64}
     best_responder::X
@@ -17,23 +17,29 @@ end
 - `env`, the environment to handle.
 - `best_responder`, the player to choose best response action.
 """
-function BestResponsePolicy(policy, env, best_responder; state_type=String, action_type=Int)
+function BestResponsePolicy(
+    policy,
+    env,
+    best_responder;
+    state_type = String,
+    action_type = Int,
+)
     # S = typeof(get_state(env))  # TODO: currently it will break the OpenSpielEnv. Can not get information set for chance player
     # A = eltype(get_actions(env))  # TODO: for chance players it will return ActionProbPair
     S = state_type
     A = action_type
     E = typeof(env)
 
     p = BestResponsePolicy(
-        Dict{S, Vector{Pair{E, Float64}}}(),
-        Dict{S, A}(),
-        Dict{E, Float64}(),
+        Dict{S,Vector{Pair{E,Float64}}}(),
+        Dict{S,A}(),
+        Dict{E,Float64}(),
         best_responder,
-        policy
+        policy,
     )
 
     e = copy(env)
-    @assert e == env  "The copy method doesn't seem to be implemented for environment: $env"
+    @assert e == env "The copy method doesn't seem to be implemented for environment: $env"
     @assert hash(e) == hash(env) "The hash method doesn't seem to be implemented for environment: $env"
     RLBase.reset!(e)  # start from the root!
     init_cfr_reach_prob!(p, e)
@@ -48,7 +54,7 @@ function (p::BestResponsePolicy)(env::AbstractEnv)
     end
 end
 
-function init_cfr_reach_prob!(p, env, reach_prob=1.0)
+function init_cfr_reach_prob!(p, env, reach_prob = 1.0)
     if !get_terminal(env)
         if get_current_player(env) == p.best_responder
             push!(get!(p.cfr_reach_prob, get_state(env), []), env => reach_prob)
@@ -62,7 +68,11 @@ function init_cfr_reach_prob!(p, env, reach_prob=1.0)
             end
         else  # opponents
             for a in get_legal_actions(env)
-                init_cfr_reach_prob!(p, child(env, a), reach_prob * get_prob(p.policy, env, a))
+                init_cfr_reach_prob!(
+                    p,
+                    child(env, a),
+                    reach_prob * get_prob(p.policy, env, a),
+                )
             end
         end
     end
@@ -73,16 +83,16 @@ function best_response_value(p, env)
         if get_terminal(env)
             get_reward(env, p.best_responder)
         elseif get_current_player(env) == p.best_responder
-                a = best_response_action(p, env)
-                best_response_value(p, child(env, a))
+            a = best_response_action(p, env)
+            best_response_value(p, child(env, a))
         elseif get_current_player(env) == get_chance_player(env)
-            v = 0.
+            v = 0.0
             for a::ActionProbPair in get_actions(env)
                 v += a.prob * best_response_value(p, child(env, a))
             end
             v
         else
-            v = 0.
+            v = 0.0
             for a in get_legal_actions(env)
                 v += get_prob(p.policy, env, a) * best_response_value(p, child(env, a))
             end