-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathfixturelib.py
More file actions
362 lines (309 loc) · 13.3 KB
/
Copy pathfixturelib.py
File metadata and controls
362 lines (309 loc) · 13.3 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
# Fixturing Library, to fixture a league of arbitrary size
# Uses the NetworkX library for maximally weighted matching
import pandas as pd
import networkx as nx
import random
import copy
import json
def parseConfig(configFileName):
'''
Take a file name and return a config dict
'''
with open(configFileName, 'r') as configFile:
config = json.load(configFile)
return config
def getDataFromRemote(URL, table):
'''
When passed a valid URL and a table name, will download the URL as a .xls
and perform minimal cleaning, returning a DataFrame
'''
results = pd.read_excel(URL, sheet_name = table).dropna(how='all')
return results
def getResults(URL, table):
'''
Wrapper around getDataFromRemote() for the parsing and cleaning of results-
style data (including results and previous fixtures)
'''
results = getDataFromRemote(URL, table)
results.sort_values(by='Round',inplace=True)
return results
def getRatings(URL, table, teamNameCol, teamEloCol, teamKCol):
'''
Wrapper around getDataFromRemote, returns a dict of the starting Elo scores
of all the teams, a dict of their associated K values and a list of team
names. The returned tuple is formatted (ratingsDict,kValueDict, teamNames)
'''
ratingsDF = getDataFromRemote(URL,table)
ratingsDF.index = ratingsDF[teamNameCol]
# Cast the list of teams to a set to ensure we only have unique teams
teams = set(ratingsDF[teamNameCol])
ratingsDict = {team:ratingsDF.loc[team,teamEloCol] for team in teams}
kValueDict = {team:ratingsDF.loc[team,teamKCol] for team in teams}
return (ratingsDict, kValueDict, teams)
def getExpectedOutcome(eloA: float, eloB: float) -> (float, float):
'''
Returns the expected outcome of two Elos
'''
expectedOutcome = None
if eloA == eloB:
expectedOutcome = 0.5
else:
expectedOutcome = 1/(1+10**-((eloA - eloB)/400.0))
return (expectedOutcome, 1-expectedOutcome)
def getScaledOutcome(outcome: float) -> float:
'''
Scales an expected outcome. Higher means a closer game
'''
return 2*(0.5-abs(outcome - 0.5))
def getGameOutcome(scoreA: int, scoreB: int) -> (float, float):
'''
Returns the scaled outcome of a match as a float. Format is
(outcomeA, outcomeB)
'''
totalScore = float(scoreA + scoreB) # Cast to a float to ensure float division
outcomeA = scoreA / totalScore # Not required in python3 but old habits die hard
outcomeB = scoreB / totalScore
return (outcomeA, outcomeB)
def getDeviation(expected: float, outcome: float, elo:float, kValue: float) -> float:
'''
Compute the deviation of a match from the expected result, and returns the
new Elo.
'''
deviation = outcome - expected
newElo = elo + kValue * deviation
return newElo
def updateElosFromResults(elos: dict, results: pd.DataFrame, kValues: dict) -> dict:
'''
Loops through the dataframe of results, and updates the Elo of each team
Returns a dict of (updated) Elos.
Assumes that:
HomeTeam is kept in column "Home Team"
AwayTeam is kept in column "Away Team"
Home Score is kept in column "Home Score"
Away Score is kept in column "Away Score"
'''
for gameRow in range(len(results.index)):
# Grab data from the df
homeTeam = results.loc[gameRow,'Home Team']
awayTeam = results.loc[gameRow,'Away Team']
homeK = kValues[homeTeam]
awayK = kValues[awayTeam]
homeElo = elos[homeTeam]
awayElo = elos[awayTeam]
homeScore = results.loc[gameRow,'Home Score']
awayScore = results.loc[gameRow,'Away Score']
# Start processing
homeScorePerc, awayScorePerc = getGameOutcome(homeScore, awayScore)
homeExpected, awayExpected = getExpectedOutcome(homeElo, awayElo)
homeNewElo = getDeviation(homeExpected, homeScorePerc, homeElo, homeK)
awayNewElo = getDeviation(awayExpected, awayScorePerc, awayElo, awayK)
# Ensure winning teams don't lose Elo
if homeScore > awayScore:
homeNewElo = max(homeElo, homeNewElo)
if homeScore < awayScore:
awayNewElo = max(awayElo, awayNewElo)
# Create the updated Elos to push to the dict
updatedElos = {homeTeam:homeNewElo, awayTeam: awayNewElo}
elos.update(updatedElos)
return elos
def checkIfGameInList(teamA: str, teamB: str, gamesList: list) -> (bool,int):
'''
Checks if a game is in a list. Pass it two teams and a list of games
(requests, previous games, etc) and it will return whether or not the game
is in the list as a bool and the count as (bool, count)
'''
isIn = False
count = 0
codeA = teamA + " vs " + teamB
codeB = teamB + " vs " + teamB
if codeA in gamesList:
isIn = True
if codeB in gamesList:
isIn = True
if isIn: # if the game is in the list, get the count
count = gamesList.count(codeA) + gamesList.count(codeB)
return (isIn, count)
def createGameRating(teamA: str, teamB: str, elosDict: dict, fixturedGames:
list, requestedGames: list, antiRequestedGames: list) -> float:
'''
Evaluate how good a game will be, based on:
- The teams Elos
- Whether the game has happened before
- Whether the game has been requested
- Whether the game has been requested to not happen
Returns a float indicating how good the game is. Higher is "better"
'''
eloA = elosDict[teamA]
eloB = elosDict[teamB]
# Get the expected outcome. We only use the outcome with respect to team A
# as this should have the same scaled value as the outcome w.r.t. team B.
# We still have the expectedOutcomeB variable available, but it is not used
# currently.
expectedOutcomeA, expectedOutcomeB = getExpectedOutcome(eloA, eloB)
scaledOutcomeA = getScaledOutcome(expectedOutcomeA)
gameFixturedPrev, gameFixturedPrevCount = checkIfGameInList(teamA, teamB,
fixturedGames)
gameRequested, gameRequestedCount = checkIfGameInList(teamA, teamB,
requestedGames)
gameNotRequested, gameNotRequestedCount = checkIfGameInList(teamA, teamB,
antiRequestedGames)
gameRating = 100 # Start with a rating of 100
gameRating = gameRating + scaledOutcomeA # Add by the scaledOutcome
if gameFixturedPrev:
gameRating = gameRating - gameFixturedPrevCount*10
if gameRequested:
gameRating = gameRating + 2
if gameNotRequested:
gameRating = gameRating - 10
# Ensure we don't use negative ratings as they cause issues with maximally
# weighted matching
return max(gameRating,0)
def createGameRatingsGraph(fixturedGames: list, requestedGames: list,
antiRequestedGames: list, elosDict: dict) -> nx.Graph():
'''
Creates and returns a graph (not in the chart sense) of all possible games
between all possible teams. Each node in the graph is a team, and each edge
in the graph represents a game between the two teams, with an edge with a
weight representing "how good" the game will be.
'''
teams = elosDict.keys()
fixtureGraph = nx.Graph()
fixtureGraph.add_nodes_from(teams)
for teamA in teams:
for teamB in teams:
if teamA != teamB and not fixtureGraph.has_edge(teamA, teamB):
edgeWeight = createGameRating(teamA, teamB, elosDict,
fixturedGames, requestedGames, antiRequestedGames)
fixtureGraph.add_edge(teamA, teamB, weight = edgeWeight)
return fixtureGraph
def getHomeGameCounts(teams: set, fixturedGames: list) -> dict:
'''
Given a list of teams and a list of games, returns a dict of how many home
games each tam has had.
'''
homeGameCounts = {team:0 for team in teams}
for team in teams:
for game in fixturedGames:
if game.startswith(team):
homeGameCounts[team] += 1
return homeGameCounts
def createFixturesFromGraph(gameRatings: nx.Graph, homeGameCounts: dict) -> pd.DataFrame:
'''
Returns a df of fixtures, given a graph of ratings and a dict of previous home games.
We use the homeGameCounts to try and even out the home/away split, so that all teams
should get approximately the same number of home/away games over a season.
'''
rawPairings = nx.max_weight_matching(gameRatings)
homeGameCounts["Bye Team"] = 0
fixture = pd.DataFrame(columns=['Home Team','Away Team','Game Code'])
row = 0
for match in rawPairings:
teamA = match[0]
teamB = match[1]
if homeGameCounts[teamA] > homeGameCounts[teamB]:
homeTeam = teamB
awayTeam = teamA
else:
homeTeam = teamA
awayTeam = teamB
fixture.loc[row,'Home Team'] = homeTeam
fixture.loc[row,'Away Team'] = awayTeam
fixture.loc[row,'Game Code'] = homeTeam + " vs " + awayTeam
row+=1
return fixture
def fixtureSingleRound(teams: set, elos: dict, fixtured: list, requested: list,
antiRequested: list, rematchesAllowed: int) -> pd.DataFrame:
'''
Fixture a single round
'''
complete = False
while not complete:
gameRatingsGraph = createGameRatingsGraph(fixtured, requested,
antiRequested, elos)
homeGameCounts = getHomeGameCounts(teams, fixtured)
fixtures = createFixturesFromGraph(gameRatingsGraph, homeGameCounts)
# Check to see if any games have been fixtured previously
maxRepeats = 0
for row in range(len(fixtures.index)):
homeT = fixtures.loc[row,'Home Team']
awayT = fixtures.loc[row,'Away Team']
repeats = checkIfGameInList(homeT, awayT, fixtures)[1]
maxRepeats = max(repeats, maxRepeats)
if maxRepeats <= rematchesAllowed:
complete = True
else:
print("Error: Could not find fixture within maxRepeats")
print("Slightly altering Elos to try and get a different solution")
for team in teams:
currElo = elos[team]
factor = random.uniform(-2.5,2.5)
elos[team] = currElo + factor
return fixtures
def findByeTeam(fixture: pd.DataFrame) -> str:
'''
Given a fixture with a bye team in it, return the team that has been
allocated a bye.
'''
byeRow = fixture[fixture == "Bye Team"].dropna(how = "all").index[0]
byeCol = fixture[fixture == "Bye Team"].dropna(how = "all",axis = 1).columns[0]
if byeCol == "Away Team":
byeTeamCol = "Home Team"
else:
byeTeamCol = "Away Team"
byeTeam = fixture.loc[byeRow,byeTeamCol]
return byeTeam
def fixtureDoubleRound(teams: set, elos: dict, fixtured: list, requested: list,
antiRequested: list, rematchesAllowed: int) -> pd.DataFrame:
'''
Fixture two rounds at once. This is used when there are an odd number of
teams in the league, as we can avoid byes by fixturing two rounds at once.
'''
complete = False
while not complete:
previousFixtured = copy.deepcopy(fixtured)
elos["Bye Team"] = random.choice(list(elos.values()))
fixtureRd1 = fixtureSingleRound(teams,elos,fixtured, requested,
antiRequested,rematchesAllowed)
fixtured.extend(list(fixtureRd1['Game Code']))
fixtureRd2 = fixtureSingleRound(teams, elos, fixtured, requested,
antiRequested, rematchesAllowed)
byeTeam1 = findByeTeam(fixtureRd1)
byeTeam2 = findByeTeam(fixtureRd2)
# Remove the bye team games from the fixtures
fixtureRd1 = fixtureRd1[fixtureRd1['Home Team'] != "Bye Team"]
fixtureRd1 = fixtureRd1[fixtureRd1['Away Team'] != "Bye Team"]
fixtureRd2 = fixtureRd2[fixtureRd2['Home Team'] != "Bye Team"]
fixtureRd2 = fixtureRd2[fixtureRd2['Away Team'] != "Bye Team"]
previousFixtured.extend(list(fixtureRd1['Game Code']))
previousFixtured.extend(list(fixtureRd2['Game Code']))
# Fixture the two bye teams against each other,
homeCount = getHomeGameCounts(teams,previousFixtured)
if homeCount[byeTeam1] > homeCount[byeTeam2]:
homeByeTeam = byeTeam2
awayByeTeam = byeTeam1
else:
homeByeTeam = byeTeam1
awayByeTeam = byeTeam2
totalFixture = pd.concat([fixtureRd1,fixtureRd2])
totalFixture.reset_index(drop=True,inplace=True)
row = len(totalFixture.index)
totalFixture.loc[row,'Home Team'] = homeByeTeam
totalFixture.loc[row,'Away Team'] = awayByeTeam
totalFixture.loc[row,'Game Code'] = homeByeTeam + " vs " + awayByeTeam
# Check if we are within the allowable rematches
maxRepeats = 0
for row in range(len(totalFixture.index)):
homeT = totalFixture.loc[row,'Home Team']
awayT = totalFixture.loc[row,'Away Team']
repeats = checkIfGameInList(homeT, awayT, previousFixtured)[1] - 1
maxRepeats = max(maxRepeats, repeats)
if maxRepeats <= rematchesAllowed:
complete = True
else:
print("Error: Could not find fixture within maxRepeats")
print("Slightly altering Elos to try and get a different solution")
for team in teams:
currElo = elos[team]
factor = random.uniform(-10,10)
elos[team] = currElo + factor
return totalFixture