Skip to content
This repository was archived by the owner on May 6, 2021. It is now read-only.

Commit 096535a

Browse files
Format .jl files (#98)
Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com>
1 parent 58e3044 commit 096535a

16 files changed

Lines changed: 594 additions & 374 deletions

docs/logo/logo.jl

Lines changed: 90 additions & 54 deletions
Original file line numberDiff line numberDiff line change
@@ -19,69 +19,105 @@ function tangram(obj)
1919
[O, Point(-2U, 0), Point(0, 2U)]
2020
elseif obj == :triangle3
2121
sethue("#F8CB2D")
22-
[O, Point(-U,U), Point(U,U)]
22+
[O, Point(-U, U), Point(U, U)]
2323
elseif obj == :triangle4
2424
sethue("#F195C8")
25-
[O, Point(-U, 0), Point(0,-U)]
25+
[O, Point(-U, 0), Point(0, -U)]
2626
elseif obj == :triangle5
2727
sethue("#F9F224")
2828
[O, Point(U, 0), Point(0, -U)]
2929
elseif obj == :box
3030
sethue("#A796C2")
31-
[O, Point(0, -U), Point(U,-U), Point(U, 0)]
31+
[O, Point(0, -U), Point(U, -U), Point(U, 0)]
3232
elseif obj == :parallelogram
3333
sethue("#EF3E62")
34-
[O, Point(U,-U), Point(0, -U), Point(-U, 0)]
34+
[O, Point(U, -U), Point(0, -U), Point(-U, 0)]
3535
end
3636
end
3737

38-
javis(video, [
39-
BackgroundAction(1:400, ground),
40-
Action(1:400, (args...)->poly(tangram(:triangle1), :fill, close=true); subactions=[
41-
SubAction(1:1, Translation(O, SHIFT)),
42-
SubAction(50:60, Translation(O, Point(0, 2U))),
43-
SubAction(60:70, Rotation(0., -π/2)),
44-
SubAction(70:80, Translation(O, Point(-2U, 0))),
45-
SubAction(80:90, Translation(O, Point(U, -U))),
46-
]),
47-
Action(1:400, (args...)->poly(tangram(:triangle2), :fill, close=true); subactions=[
48-
SubAction(1:1, Translation(O, SHIFT)),
49-
]),
50-
Action(1:400, (args...)->poly(tangram(:triangle3), :fill, close=true); subactions=[
51-
SubAction(1:1, Translation(O, SHIFT)),
52-
SubAction(1:1, Translation(O, Point(0, -2U))),
53-
SubAction(100:110, Translation(O, Point(-3U, 0))),
54-
SubAction(110:120, Rotation(0., -π/2)),
55-
SubAction(120:130, Translation(O, Point(-5.5U, 0))),
56-
SubAction(130:140, Translation(O, Point(0, U))),
57-
]),
58-
Action(1:400, (args...)->poly(tangram(:triangle5), :fill, close=true); subactions=[
59-
SubAction(1:1, Translation(O, SHIFT)),
60-
SubAction(1:1, Translation(O, Point(U, 0))),
61-
SubAction(150:160, Translation(O, Point(0, 4.5*U))),
62-
SubAction(160:170, Rotation(0., -3π/4)),
63-
SubAction(170:180, Translation(O, Point(sqrt((2 - 2/2)^2 / 2) * U, - sqrt((2 - 2/2)^2 / 2) * U))),
64-
]),
65-
Action(1:400, (args...)->poly(tangram(:parallelogram), :fill, close=true); subactions=[
66-
SubAction(1:1, Translation(O, SHIFT)),
67-
SubAction(1:1, Translation(O, Point(-U, 0))),
68-
SubAction(200:210, Translation(O, Point(U, -U))),
69-
SubAction(210:220, Translation(O, Point(U, 0))),
70-
SubAction(220:230, Rotation(0., π/2)),
71-
SubAction(230:240, Translation(O, Point(2U, 0))),
72-
SubAction(240:250, Translation(O, Point(0, U))),
73-
]),
74-
Action(1:400, (args...)->poly(tangram(:triangle4), :fill, close=true);subactions=[
75-
SubAction(1:1, Translation(O, SHIFT)),
76-
SubAction(250:260, Translation(O, Point(0, -U))),
77-
SubAction(260:270, Translation(O, Point(2U, 0))),
78-
SubAction(270:280, Rotation(0., -π/2)),
79-
SubAction(280:290, Translation(O, Point(-4U, 0))),
80-
SubAction(290:300, Translation(O, Point(0.5*U, -0.5*U))),
81-
]),
82-
Action(1:400, (args...)->poly(tangram(:box), :fill, close=true);subactions=[
83-
SubAction(1:1, Translation(O, SHIFT)),
84-
SubAction(300:310, Translation(O, Point(-U, 0))),
85-
SubAction(310:320, Rotation(0., -π/6)),
86-
]),
87-
]; pathname="logo.gif")
38+
javis(
39+
video,
40+
[
41+
BackgroundAction(1:400, ground),
42+
Action(
43+
1:400,
44+
(args...) -> poly(tangram(:triangle1), :fill, close = true);
45+
subactions = [
46+
SubAction(1:1, Translation(O, SHIFT)),
47+
SubAction(50:60, Translation(O, Point(0, 2U))),
48+
SubAction(60:70, Rotation(0.0, -π / 2)),
49+
SubAction(70:80, Translation(O, Point(-2U, 0))),
50+
SubAction(80:90, Translation(O, Point(U, -U))),
51+
],
52+
),
53+
Action(
54+
1:400,
55+
(args...) -> poly(tangram(:triangle2), :fill, close = true);
56+
subactions = [SubAction(1:1, Translation(O, SHIFT))],
57+
),
58+
Action(
59+
1:400,
60+
(args...) -> poly(tangram(:triangle3), :fill, close = true);
61+
subactions = [
62+
SubAction(1:1, Translation(O, SHIFT)),
63+
SubAction(1:1, Translation(O, Point(0, -2U))),
64+
SubAction(100:110, Translation(O, Point(-3U, 0))),
65+
SubAction(110:120, Rotation(0.0, -π / 2)),
66+
SubAction(120:130, Translation(O, Point(-5.5U, 0))),
67+
SubAction(130:140, Translation(O, Point(0, U))),
68+
],
69+
),
70+
Action(
71+
1:400,
72+
(args...) -> poly(tangram(:triangle5), :fill, close = true);
73+
subactions = [
74+
SubAction(1:1, Translation(O, SHIFT)),
75+
SubAction(1:1, Translation(O, Point(U, 0))),
76+
SubAction(150:160, Translation(O, Point(0, 4.5 * U))),
77+
SubAction(160:170, Rotation(0.0, -3π / 4)),
78+
SubAction(
79+
170:180,
80+
Translation(
81+
O,
82+
Point(sqrt((2 - 2 / 2)^2 / 2) * U, -sqrt((2 - 2 / 2)^2 / 2) * U),
83+
),
84+
),
85+
],
86+
),
87+
Action(
88+
1:400,
89+
(args...) -> poly(tangram(:parallelogram), :fill, close = true);
90+
subactions = [
91+
SubAction(1:1, Translation(O, SHIFT)),
92+
SubAction(1:1, Translation(O, Point(-U, 0))),
93+
SubAction(200:210, Translation(O, Point(U, -U))),
94+
SubAction(210:220, Translation(O, Point(U, 0))),
95+
SubAction(220:230, Rotation(0.0, π / 2)),
96+
SubAction(230:240, Translation(O, Point(2U, 0))),
97+
SubAction(240:250, Translation(O, Point(0, U))),
98+
],
99+
),
100+
Action(
101+
1:400,
102+
(args...) -> poly(tangram(:triangle4), :fill, close = true);
103+
subactions = [
104+
SubAction(1:1, Translation(O, SHIFT)),
105+
SubAction(250:260, Translation(O, Point(0, -U))),
106+
SubAction(260:270, Translation(O, Point(2U, 0))),
107+
SubAction(270:280, Rotation(0.0, -π / 2)),
108+
SubAction(280:290, Translation(O, Point(-4U, 0))),
109+
SubAction(290:300, Translation(O, Point(0.5 * U, -0.5 * U))),
110+
],
111+
),
112+
Action(
113+
1:400,
114+
(args...) -> poly(tangram(:box), :fill, close = true);
115+
subactions = [
116+
SubAction(1:1, Translation(O, SHIFT)),
117+
SubAction(300:310, Translation(O, Point(-U, 0))),
118+
SubAction(310:320, Rotation(0.0, -π / 6)),
119+
],
120+
),
121+
];
122+
pathname = "logo.gif",
123+
)

src/algorithms/cfr/abstract_cfr_policy.jl

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,11 @@
11
abstract type AbstractCFRPolicy <: AbstractPolicy end
22

3-
function Base.run(p::AbstractCFRPolicy, env::AbstractEnv, stop_condition=StopAfterStep(1), hook=EmptyHook())
3+
function Base.run(
4+
p::AbstractCFRPolicy,
5+
env::AbstractEnv,
6+
stop_condition = StopAfterStep(1),
7+
hook = EmptyHook(),
8+
)
49
@assert NumAgentStyle(env) isa MultiAgent
510
@assert DynamicStyle(env) === SEQUENTIAL
611
@assert RewardStyle(env) === TERMINAL_REWARD
@@ -15,4 +20,4 @@ function Base.run(p::AbstractCFRPolicy, env::AbstractEnv, stop_condition=StopAft
1520
stop_condition(p, env) && break
1621
end
1722
update!(p)
18-
end
23+
end

src/algorithms/cfr/best_response_policy.jl

Lines changed: 25 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
export BestResponsePolicy
22

3-
using Flux:onehot
3+
using Flux: onehot
44

5-
struct BestResponsePolicy{E, S, A, X, P<:AbstractPolicy} <: AbstractCFRPolicy
6-
cfr_reach_prob::Dict{S, Vector{Pair{E, Float64}}}
5+
struct BestResponsePolicy{E,S,A,X,P<:AbstractPolicy} <: AbstractCFRPolicy
6+
cfr_reach_prob::Dict{S,Vector{Pair{E,Float64}}}
77
best_response_action_cache::Dict{S,A}
88
best_response_value_cache::Dict{E,Float64}
99
best_responder::X
@@ -17,23 +17,29 @@ end
1717
- `env`, the environment to handle.
1818
- `best_responder`, the player to choose best response action.
1919
"""
20-
function BestResponsePolicy(policy, env, best_responder; state_type=String, action_type=Int)
20+
function BestResponsePolicy(
21+
policy,
22+
env,
23+
best_responder;
24+
state_type = String,
25+
action_type = Int,
26+
)
2127
# S = typeof(get_state(env)) # TODO: currently it will break the OpenSpielEnv. Can not get information set for chance player
2228
# A = eltype(get_actions(env)) # TODO: for chance players it will return ActionProbPair
2329
S = state_type
2430
A = action_type
2531
E = typeof(env)
2632

2733
p = BestResponsePolicy(
28-
Dict{S, Vector{Pair{E, Float64}}}(),
29-
Dict{S, A}(),
30-
Dict{E, Float64}(),
34+
Dict{S,Vector{Pair{E,Float64}}}(),
35+
Dict{S,A}(),
36+
Dict{E,Float64}(),
3137
best_responder,
32-
policy
38+
policy,
3339
)
3440

3541
e = copy(env)
36-
@assert e == env "The copy method doesn't seem to be implemented for environment: $env"
42+
@assert e == env "The copy method doesn't seem to be implemented for environment: $env"
3743
@assert hash(e) == hash(env) "The hash method doesn't seem to be implemented for environment: $env"
3844
RLBase.reset!(e) # start from the root!
3945
init_cfr_reach_prob!(p, e)
@@ -48,7 +54,7 @@ function (p::BestResponsePolicy)(env::AbstractEnv)
4854
end
4955
end
5056

51-
function init_cfr_reach_prob!(p, env, reach_prob=1.0)
57+
function init_cfr_reach_prob!(p, env, reach_prob = 1.0)
5258
if !get_terminal(env)
5359
if get_current_player(env) == p.best_responder
5460
push!(get!(p.cfr_reach_prob, get_state(env), []), env => reach_prob)
@@ -62,7 +68,11 @@ function init_cfr_reach_prob!(p, env, reach_prob=1.0)
6268
end
6369
else # opponents
6470
for a in get_legal_actions(env)
65-
init_cfr_reach_prob!(p, child(env, a), reach_prob * get_prob(p.policy, env, a))
71+
init_cfr_reach_prob!(
72+
p,
73+
child(env, a),
74+
reach_prob * get_prob(p.policy, env, a),
75+
)
6676
end
6777
end
6878
end
@@ -73,16 +83,16 @@ function best_response_value(p, env)
7383
if get_terminal(env)
7484
get_reward(env, p.best_responder)
7585
elseif get_current_player(env) == p.best_responder
76-
a = best_response_action(p, env)
77-
best_response_value(p, child(env, a))
86+
a = best_response_action(p, env)
87+
best_response_value(p, child(env, a))
7888
elseif get_current_player(env) == get_chance_player(env)
79-
v = 0.
89+
v = 0.0
8090
for a::ActionProbPair in get_actions(env)
8191
v += a.prob * best_response_value(p, child(env, a))
8292
end
8393
v
8494
else
85-
v = 0.
95+
v = 0.0
8696
for a in get_legal_actions(env)
8797
v += get_prob(p.policy, env, a) * best_response_value(p, child(env, a))
8898
end

0 commit comments

Comments
 (0)