BraydenMoore commited on
Commit
3231b63
·
1 Parent(s): 1beb833

Initial commit

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +1 -0
  2. Dockerfile.txt +29 -0
  3. Notebook.ipynb +0 -0
  4. Source/Build/__pycache__/build.cpython-311.pyc +0 -0
  5. Source/Build/build.py +206 -0
  6. Source/Build/nfl_data_py +1 -0
  7. Source/Build/update.py +25 -0
  8. Source/Data/gbg.csv +3 -0
  9. Source/Data/gbg_and_odds.csv +3 -0
  10. Source/Data/gbg_and_odds_this_year.csv +3 -0
  11. Source/Data/gbg_this_year.csv +3 -0
  12. Source/Data/pbp.csv +3 -0
  13. Source/Data/pbp_this_year.csv +3 -0
  14. Source/Models/__init__.py +0 -0
  15. Source/Models/xgboost_ML_75.4%.json +0 -0
  16. Source/Models/xgboost_OU_59.3%.json +0 -0
  17. Source/Pickles/team_abbreviation_to_name.pkl +3 -0
  18. Source/Pickles/team_name_to_abbreviation.pkl +3 -0
  19. Source/Pickles/test_games_ML.pkl +3 -0
  20. Source/Pickles/test_games_OU.pkl +3 -0
  21. Source/Pickles/train_games_ML.pkl +3 -0
  22. Source/Pickles/train_games_OU.pkl +3 -0
  23. Source/Predict/__pycache__/predict.cpython-311.pyc +0 -0
  24. Source/Predict/predict.py +201 -0
  25. Source/Test/__init__.py +0 -0
  26. Source/Test/xgboost_ML.py +59 -0
  27. Source/Test/xgboost_ML_75.4%.png +0 -0
  28. Source/Test/xgboost_ML_75.4%_dark.png +0 -0
  29. Source/Test/xgboost_OU.py +59 -0
  30. Source/Test/xgboost_OU_59.3%.png +0 -0
  31. Source/Test/xgboost_OU_59.3%_dark.png +0 -0
  32. Source/Train/xgboost_ML.py +69 -0
  33. Source/Train/xgboost_OU.py +70 -0
  34. Static/Arizona Cardinals.webp +0 -0
  35. Static/Atlanta Falcons.webp +0 -0
  36. Static/Baltimore Ravens.webp +0 -0
  37. Static/Buffalo Bills.webp +0 -0
  38. Static/Carolina Panthers.webp +0 -0
  39. Static/Chicago Bears.webp +0 -0
  40. Static/Cincinnati Bengals.webp +0 -0
  41. Static/Cleveland Browns.webp +0 -0
  42. Static/Dallas Cowboys.webp +0 -0
  43. Static/Denver Broncos.webp +0 -0
  44. Static/Detroit Lions.webp +0 -0
  45. Static/Green Bay Packers.webp +0 -0
  46. Static/Houston Texans.webp +0 -0
  47. Static/Indianapolis Colts.webp +0 -0
  48. Static/Jacksonville Jaguars.webp +0 -0
  49. Static/Kansas City Chiefs.webp +0 -0
  50. Static/Las Vegas Raiders.webp +0 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ *.csv filter=lfs diff=lfs merge=lfs -text
Dockerfile.txt ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use the official lightweight Python image.
2
+ FROM python:3.11
3
+
4
+ # Allow statements and log messages to immediately appear in the logs
5
+ ENV PYTHONUNBUFFERED True
6
+
7
+ # Copy local code to the container image.
8
+ ENV APP_HOME /app
9
+ WORKDIR $APP_HOME
10
+ COPY . ./
11
+
12
+ # Install production dependencies.
13
+ RUN pip install --no-cache-dir -r requirements.txt
14
+
15
+ # Create a non-root user and switch to it
16
+ RUN useradd -m -u 1000 user
17
+ USER user
18
+ ENV HOME=/home/user \
19
+ PATH=/home/user/.local/bin:$PATH
20
+
21
+ # Set work directory
22
+ WORKDIR $APP_HOME
23
+
24
+ # Change ownership of app files to the new user
25
+ COPY --chown=user . $HOME/app
26
+
27
+ # Run the web service on container startup.
28
+ CMD exec gunicorn --bind 0.0.0.0:7860 --workers 9 --threads 16 --timeout 120 main:app
29
+
Notebook.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
Source/Build/__pycache__/build.cpython-311.pyc ADDED
Binary file (20.8 kB). View file
 
Source/Build/build.py ADDED
@@ -0,0 +1,206 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nfl_data_py.nfl_data_py as nfl
2
+ from tqdm import tqdm
3
+ import numpy as np
4
+ import pandas as pd
5
+ pd.set_option('chained_assignment',None)
6
+ pd.set_option('display.max_columns',None)
7
+ import os
8
+ import datetime as dt
9
+
10
+ current_directory = os.path.dirname(os.path.abspath(__file__))
11
+ parent_directory = os.path.dirname(current_directory)
12
+ data_directory = os.path.join(parent_directory, 'Data')
13
+
14
+ def get_pbp_data(get_seasons=[], overwrite_seasons=[]):
15
+ """
16
+ Pull data from nflFastR's Github repo.
17
+ If you choose to overwrite, it will replace the existing pbp data with the data you pull.
18
+
19
+ """
20
+ pbp = nfl.import_pbp_data(get_seasons)
21
+ pbp['TOP_seconds'] = pbp['drive_time_of_possession'].apply(lambda x: int(x.split(':')[0]) * 60 + int(x.split(':')[1]) if pd.notnull(x) else 0)
22
+
23
+ if overwrite_seasons:
24
+ file_path = os.path.join(data_directory, 'pbp.csv')
25
+ old = pd.read_csv(file_path, index_col=0, low_memory=False)
26
+ old = old.loc[~old['season'].isin(overwrite_seasons)]
27
+ pbp = pd.concat([old,pbp])
28
+ pbp.to_csv(file_path)
29
+
30
+ year = dt.datetime.now().year
31
+ month = dt.datetime.now().month
32
+ season = year if month in [8,9,10,11,12] else year-1
33
+ pbp_this_year = pbp.loc[pbp['season']==season]
34
+ file_path = os.path.join(data_directory, 'pbp_this_year.csv')
35
+ pbp_this_year.to_csv(file_path)
36
+
37
+ return pbp
38
+
39
+
40
+ def build_gbg_data(get_seasons=[], overwrite_seasons=[]):
41
+ """
42
+ Using pbp.csv, build a game-by-game dataset to use for prediction models.
43
+ Populate update_seasons with the current year to only update this season's data while preserving historical data.
44
+
45
+ """
46
+ print('Loading play-by-play data.')
47
+
48
+ if overwrite_seasons:
49
+ print('Overwriting data for', overwrite_seasons)
50
+ pbp = get_pbp_data(get_seasons, overwrite_seasons)
51
+
52
+ if not overwrite_seasons:
53
+ file_path = os.path.join(data_directory, 'pbp.csv')
54
+ pbp = pd.read_csv(file_path, index_col=0)
55
+
56
+ pbp = pbp.loc[pbp['season'].isin(get_seasons)]
57
+ game_date_dict = dict(pbp[['game_id','game_date']].values)
58
+ teams = list(set(list(pbp['home_team'].unique()) + list(pbp['away_team'].unique())))
59
+ print(teams)
60
+ seasons = pbp['season'].unique()
61
+
62
+ print('Building game-by-game data.')
63
+ data = pd.DataFrame()
64
+ for season in seasons:
65
+ print(season)
66
+ for team_name in tqdm(teams):
67
+ # create features
68
+ team = pbp.loc[((pbp['home_team']==team_name) | (pbp['away_team']==team_name)) & (pbp['season']==season)]
69
+ team['GP'] = team['week']
70
+ team['W'] = [1 if r>0 and team_name==h else 1 if r<0 and team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
71
+ team['L'] = [0 if r>0 and team_name==h else 0 if r<0 and team_name==a else 1 for r,a,h in team[['result','away_team','home_team']].values]
72
+ team['W_PCT'] = team['W']/team['GP']
73
+ team['TOP'] = [t if team_name==p else 0 for t,p in team[['TOP_seconds','posteam']].values]
74
+ team['FGA'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','field_goal_attempt']].values]
75
+ team['FGM'] = [1 if team_name==p and f=='made' else 0 for p,f in team[['posteam','field_goal_result']].values]
76
+ team['FG_PCT'] = team['FGM']/team['FGA']
77
+ team['PassTD'] = np.where((team['posteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
78
+ team['RushTD'] = np.where((team['posteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
79
+ team['PassTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
80
+ team['RushTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
81
+ team['PassYds'] = [y if p==team_name else 0 for p,y in team[['posteam','passing_yards']].values]
82
+ team['RushYds'] = [y if p==team_name else 0 for p,y in team[['posteam','rushing_yards']].values]
83
+ team['PassYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','passing_yards']].values]
84
+ team['RushYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','rushing_yards']].values]
85
+ team['Fum'] = np.where((team['defteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
86
+ team['Fum_Allowed'] = np.where((team['posteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
87
+ team['INT'] = np.where((team['defteam'] == team_name) & (team['interception'] == 1), 1, 0)
88
+ team['INT_Allowed'] = np.where((team['posteam'] == team_name) & (team['interception'] == 1), 1, 0)
89
+ team['Sacks'] = np.where((team['defteam'] == team_name) & (team['sack'] == 1), 1, 0)
90
+ team['Sacks_Allowed'] = np.where((team['posteam'] == team_name) & (team['sack'] == 1), 1, 0)
91
+ team['Penalties'] = np.where((team['penalty_team'] == team_name), 1, 0)
92
+ team['FirstDowns'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','first_down']].values]
93
+ team['3rdDownConverted'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_converted']].values]
94
+ team['3rdDownFailed'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_failed']].values]
95
+ team['3rdDownAllowed'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_converted']].values]
96
+ team['3rdDownDefended'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_failed']].values]
97
+ team['PTS'] = [ap if at==team_name else hp if ht==team_name else None for ht,at,hp,ap in team[['home_team','away_team','home_score','away_score']].values]
98
+ team['PointDiff'] = [r if team_name==h else -r if team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
99
+
100
+ # aggregate from play-by-play to game-by-game
101
+ features = {
102
+ 'GP':'mean',
103
+ 'W':'mean',
104
+ 'L':'mean',
105
+ 'W_PCT':'mean',
106
+ 'TOP':'sum',
107
+ 'FGA':'sum',
108
+ 'FGM':'sum',
109
+ 'FG_PCT':'mean',
110
+ 'PassTD':'sum',
111
+ 'RushTD':'sum',
112
+ 'PassTD_Allowed':'sum',
113
+ 'RushTD_Allowed':'sum',
114
+ 'PassYds':'sum',
115
+ 'RushYds':'sum',
116
+ 'PassYds_Allowed':'sum',
117
+ 'RushYds_Allowed':'sum',
118
+ 'Fum':'sum',
119
+ 'Fum_Allowed':'sum',
120
+ 'INT':'sum',
121
+ 'INT_Allowed':'sum',
122
+ 'Sacks':'sum',
123
+ 'Sacks_Allowed':'sum',
124
+ 'Penalties':'sum',
125
+ 'FirstDowns':'sum',
126
+ '3rdDownConverted':'sum',
127
+ '3rdDownFailed':'sum',
128
+ '3rdDownAllowed':'sum',
129
+ '3rdDownDefended':'sum',
130
+ 'PTS':'mean',
131
+ 'PointDiff':'mean'
132
+ }
133
+
134
+ game = team.groupby('game_id').agg(features).reset_index()
135
+ game[['W','L']] = game[['W','L']].expanding().sum()
136
+ game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
137
+ game[game.columns[1:]] = game[game.columns[1:]].shift()
138
+ game['TEAM'] = team_name
139
+ game['Season'] = season
140
+
141
+ data = pd.concat([data,game])
142
+
143
+ # separate home and away data and merge
144
+ data = data.merge(pbp[['game_id','home_team','away_team']].drop_duplicates())
145
+ home = data.loc[data['home_team']==data['TEAM']]
146
+ away = data.loc[data['away_team']==data['TEAM']]
147
+ away.columns = [f'{i}.Away' for i in away.columns]
148
+ gbg = home.merge(away,left_on='game_id',right_on='game_id.Away')
149
+ gbg.drop(columns=['TEAM','TEAM.Away','home_team.Away','away_team.Away','Season.Away','game_id.Away'], inplace=True)
150
+ gbg['game_date'] = gbg['game_id'].map(game_date_dict)
151
+
152
+ if overwrite_seasons:
153
+ file_path = os.path.join(data_directory, 'gbg.csv')
154
+ old = pd.read_csv(file_path, index_col=0, low_memory=False)
155
+ old = old.loc[~old['Season'].isin(overwrite_seasons)]
156
+ gbg = pd.concat([old,gbg])
157
+ file_path = os.path.join(data_directory, 'gbg.csv')
158
+ gbg.to_csv(file_path)
159
+
160
+ year = dt.datetime.now().year
161
+ month = dt.datetime.now().month
162
+ season = year if month in [8,9,10,11,12] else year-1
163
+ gbg_this_year = gbg.loc[gbg['Season']==season]
164
+ file_path = os.path.join(data_directory, 'gbg_this_year.csv')
165
+ gbg_this_year.to_csv(file_path)
166
+
167
+ return gbg
168
+
169
+
170
+ def add_odds_data(gbg, overwrite=False):
171
+ """
172
+ Get odds from Australian Sports Betting's free online dataset and merge it with game-by-game data.
173
+
174
+ """
175
+
176
+ # get team abbreviations
177
+ team_descriptions = nfl.import_team_desc()
178
+ team_abbreviation_dict = dict(team_descriptions[['team_name','team_abbr']].values)
179
+
180
+ # get odds
181
+ odds = pd.read_excel('https://www.aussportsbetting.com/historical_data/nfl.xlsx')
182
+ odds['Home Team'] = odds['Home Team'].str.replace('Washington Redskins','Washington Commanders').str.replace('Washington Football Team','Washington Commanders')
183
+ odds['Away Team'] = odds['Away Team'].str.replace('Washington Redskins','Washington Commanders').str.replace('Washington Football Team','Washington Commanders')
184
+ odds['Season'] = [i.year if i.month in [8,9,10,11,12] else i.year-1 for i in odds['Date']]
185
+ odds['Home Team Abbrev'] = odds['Home Team'].map(team_abbreviation_dict)
186
+ odds['Away Team Abbrev'] = odds['Away Team'].map(team_abbreviation_dict)
187
+ odds = odds[['Date','Home Score','Away Score','Home Team Abbrev','Away Team Abbrev','Home Odds Close','Away Odds Close','Total Score Close']]
188
+ odds['Key'] = odds['Date'].astype(str) + odds['Home Team Abbrev'] + odds['Away Team Abbrev']
189
+ odds = odds.drop(columns=['Date','Home Team Abbrev','Away Team Abbrev']).dropna()
190
+ odds['Home Odds'] = [round((i-1)*100) if i>= 2 else round(-100/(i-1)) for i in odds['Home Odds Close']]
191
+ odds['Away Odds'] = [round((i-1)*100) if i>= 2 else round(-100/(i-1)) for i in odds['Away Odds Close']]
192
+ odds['Home Winnings'] = [ho-1 if h>a else -1 if a>h else 0 for ho,h,a in odds[['Home Odds Close','Home Score','Away Score']].values]
193
+ odds['Away Winnings'] = [ao-1 if a>h else -1 if h>a else 0 for ao,h,a in odds[['Away Odds Close','Home Score','Away Score']].values]
194
+
195
+ # merge with gbg
196
+ gbg['Key'] = gbg['game_date'].astype(str) + gbg['home_team'] + gbg['away_team']
197
+ gbg_and_odds = gbg.merge(odds, left_on='Key', right_on='Key')
198
+ gbg_and_odds['Home-Team-Win'] = (gbg_and_odds['Home Score']>gbg_and_odds['Away Score']).astype(int)
199
+ gbg_and_odds['Over'] = ((gbg_and_odds['Home Score'] + gbg_and_odds['Away Score'])>gbg_and_odds['Total Score Close']).astype(int)
200
+
201
+ if overwrite:
202
+ file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
203
+ gbg_and_odds.to_csv(file_path)
204
+
205
+ return gbg_and_odds
206
+
Source/Build/nfl_data_py ADDED
@@ -0,0 +1 @@
 
 
1
+ Subproject commit e4988dc303bc441108dd11f4ae93a8200aab10e1
Source/Build/update.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import nfl_data_py.nfl_data_py as nfl
2
+ import build
3
+ import datetime as dt
4
+ import numpy as np
5
+ import pandas as pd
6
+ pd.set_option('chained_assignment',None)
7
+ pd.set_option('display.max_columns',None)
8
+ import os
9
+
10
+ current_directory = os.path.dirname(os.path.abspath(__file__))
11
+ parent_directory = os.path.dirname(current_directory)
12
+ data_directory = os.path.join(parent_directory, 'Data')
13
+
14
+ # get current season
15
+ year = dt.datetime.now().year
16
+ month = dt.datetime.now().month
17
+ season = year if month in [8,9,10,11,12] else year-1
18
+
19
+ # update current season
20
+ gbg = build.build_gbg_data(get_seasons=[2023], overwrite_seasons=[2023])
21
+ gbg_and_odds = build.add_odds_data(gbg)
22
+ gbg_and_odds_this_year = gbg_and_odds.loc[gbg_and_odds['Season']==season]
23
+
24
+ file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
25
+ gbg_and_odds_this_year.to_csv(file_path)
Source/Data/gbg.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:518ee58f264900f457b6ab0deed9a664607c16bf399fa2a669fc484244c57a92
3
+ size 1792121
Source/Data/gbg_and_odds.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8ec2d7b26b490e1c28de9f9c40b4b4991f6f1ff7bbad0f3e994a7c5c375affe
3
+ size 1567692
Source/Data/gbg_and_odds_this_year.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b848b812a85a74ad20af51565784382f9a9cd97af3b65d77801dd1d009054f91
3
+ size 886
Source/Data/gbg_this_year.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:61d1340b0f2d8f5d4cad8efa0dfa2246adb0748ded9f3841709bde80a7146c74
3
+ size 844
Source/Data/pbp.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:840929401e41f90255f27bb2002791d75ea1aaeee538d586743044fb5065ca96
3
+ size 247394694
Source/Data/pbp_this_year.csv ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca7b56d8e82fad5c40ee396ec129c95f2f213864b190078d03a8ec665a0532c6
3
+ size 405815
Source/Models/__init__.py ADDED
File without changes
Source/Models/xgboost_ML_75.4%.json ADDED
The diff for this file is too large to render. See raw diff
 
Source/Models/xgboost_OU_59.3%.json ADDED
The diff for this file is too large to render. See raw diff
 
Source/Pickles/team_abbreviation_to_name.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d457e4ca669b5000d270669b963ce286a7b8ff0f7139535c7d0bd6439fddd4f
3
+ size 910
Source/Pickles/team_name_to_abbreviation.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdd24bc318fde8622b827dfaa76fdbba5849d11cb61fb99bee50adcebb20fdc1
3
+ size 903
Source/Pickles/test_games_ML.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0b6c58108f994d1f070c6ee85bba812da57d9395646c05e6bf3cb85a16b9f51
3
+ size 7376
Source/Pickles/test_games_OU.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69e2be2359534720fe42752b3e983e327e4e66a0a2bfa5924d4e750db458854e
3
+ size 7354
Source/Pickles/train_games_ML.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d13bfdb558d5753359f56ae4f2450e36ad8b21c10e1cc5e778b786759b83c62
3
+ size 60497
Source/Pickles/train_games_OU.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba28c20549cb0b08e68631bbdce808399dd1ad91f190ba18f6cbfdfeee0a4467
3
+ size 60519
Source/Predict/__pycache__/predict.cpython-311.pyc ADDED
Binary file (18.8 kB). View file
 
Source/Predict/predict.py ADDED
@@ -0,0 +1,201 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import xgboost as xgb
2
+ import numpy as np
3
+ import pandas as pd
4
+ import pickle as pkl
5
+ import os
6
+ import requests
7
+ from bs4 import BeautifulSoup
8
+
9
+ current_directory = os.path.dirname(os.path.abspath(__file__))
10
+ parent_directory = os.path.dirname(current_directory)
11
+ data_directory = os.path.join(parent_directory, 'Data')
12
+ model_directory = os.path.join(parent_directory, 'Models')
13
+ pickle_directory = os.path.join(parent_directory, 'Pickles')
14
+
15
+ file_path = os.path.join(data_directory, 'pbp_this_year.csv')
16
+ pbp = pd.read_csv(file_path, index_col=0, low_memory=False)
17
+
18
+ # get team abbreviations
19
+ file_path = os.path.join(pickle_directory, 'team_name_to_abbreviation.pkl')
20
+ with open(file_path, 'rb') as f:
21
+ team_name_to_abbreviation = pkl.load(f)
22
+
23
+ file_path = os.path.join(pickle_directory, 'team_abbreviation_to_name.pkl')
24
+ with open(file_path, 'rb') as f:
25
+ team_abbreviation_to_name = pkl.load(f)
26
+
27
+ def get_week():
28
+ headers = {
29
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
30
+ 'Accept-Encoding': 'gzip, deflate',
31
+ 'Accept-Language': 'en-US,en;q=0.9',
32
+ 'Cache-Control': 'max-age=0',
33
+ 'Connection': 'keep-alive',
34
+ 'Dnt': '1',
35
+ 'Upgrade-Insecure-Requests': '1',
36
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36'
37
+ }
38
+ url = 'https://www.nfl.com/schedules/'
39
+ resp = requests.get(url,headers=headers)
40
+ soup = BeautifulSoup(resp.text, 'html.parser')
41
+ h2_tags = soup.find_all('h2')
42
+ year = h2_tags[0].getText().split(' ')[0]
43
+ week = h2_tags[0].getText().split(' ')[-1]
44
+ return int(week), int(year)
45
+
46
+
47
+ def get_games():
48
+ # pull from NBC
49
+ url = 'https://www.nbcsports.com/nfl/schedule'
50
+ df = pd.read_html(url)[0]
51
+ df['Away Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Away TeamAway Team']]
52
+ df['Home Team'] = [' '.join(i.split('\xa0')[1:]) for i in df['Home TeamHome Team']]
53
+ df['Date'] = pd.to_datetime(df['Game TimeGame Time'])
54
+ df['Date'] = df['Date'].dt.strftime('%A %d/%m %I:%M %p')
55
+ df['Date'] = df['Date'].apply(lambda x: f"{x.split()[0]} {int(x.split()[1].split('/')[1])}/{int(x.split()[1].split('/')[0])} {x.split()[2]}".capitalize())
56
+
57
+ return df[['Away Team','Home Team','Date']]
58
+
59
+
60
+ def get_one_week(team_name,season,week):
61
+ # create columns
62
+ team = pbp.loc[((pbp['home_team']==team_name) | (pbp['away_team']==team_name)) & (pbp['season']==season)]
63
+ team['GP'] = team['week']
64
+ team['W'] = [1 if r>0 and team_name==h else 1 if r<0 and team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
65
+ team['L'] = [0 if r>0 and team_name==h else 0 if r<0 and team_name==a else 1 for r,a,h in team[['result','away_team','home_team']].values]
66
+ team['W_PCT'] = team['W']/team['GP']
67
+ team['TOP'] = [t if team_name==p else 0 for t,p in team[['TOP_seconds','posteam']].values]
68
+ team['FGA'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','field_goal_attempt']].values]
69
+ team['FGM'] = [1 if team_name==p and f=='made' else 0 for p,f in team[['posteam','field_goal_result']].values]
70
+ team['FG_PCT'] = team['FGM']/team['FGA']
71
+ team['PassTD'] = np.where((team['posteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
72
+ team['RushTD'] = np.where((team['posteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
73
+ team['PassTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['pass_touchdown'] == 1), 1, 0)
74
+ team['RushTD_Allowed'] = np.where((team['defteam'] == team_name) & (team['rush_touchdown'] == 1), 1, 0)
75
+ team['PassYds'] = [y if p==team_name else 0 for p,y in team[['posteam','passing_yards']].values]
76
+ team['RushYds'] = [y if p==team_name else 0 for p,y in team[['posteam','rushing_yards']].values]
77
+ team['PassYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','passing_yards']].values]
78
+ team['RushYds_Allowed'] = [y if d==team_name else 0 for d,y in team[['defteam','rushing_yards']].values]
79
+ team['Fum'] = np.where((team['defteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
80
+ team['Fum_Allowed'] = np.where((team['posteam'] == team_name) & (team['fumble_lost'] == 1), 1, 0)
81
+ team['INT'] = np.where((team['defteam'] == team_name) & (team['interception'] == 1), 1, 0)
82
+ team['INT_Allowed'] = np.where((team['posteam'] == team_name) & (team['interception'] == 1), 1, 0)
83
+ team['Sacks'] = np.where((team['defteam'] == team_name) & (team['sack'] == 1), 1, 0)
84
+ team['Sacks_Allowed'] = np.where((team['posteam'] == team_name) & (team['sack'] == 1), 1, 0)
85
+ team['Penalties'] = np.where((team['penalty_team'] == team_name), 1, 0)
86
+ team['FirstDowns'] = [1 if team_name==p and f==1 else 0 for p,f in team[['posteam','first_down']].values]
87
+ team['3rdDownConverted'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_converted']].values]
88
+ team['3rdDownFailed'] = [1 if p==team_name and t==1 else 0 for p,t in team[['posteam','third_down_failed']].values]
89
+ team['3rdDownAllowed'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_converted']].values]
90
+ team['3rdDownDefended'] = [1 if d==team_name and t==1 else 0 for d,t in team[['defteam','third_down_failed']].values]
91
+ team['PTS'] = [ap if at==team_name else hp if ht==team_name else None for ht,at,hp,ap in team[['home_team','away_team','home_score','away_score']].values]
92
+ team['PointDiff'] = [r if team_name==h else -r if team_name==a else 0 for r,a,h in team[['result','away_team','home_team']].values]
93
+
94
+ # aggregate from play-by-play to game-by-game
95
+ features = {
96
+ 'GP':'mean',
97
+ 'W':'mean',
98
+ 'L':'mean',
99
+ 'W_PCT':'mean',
100
+ 'TOP':'sum',
101
+ 'FGA':'sum',
102
+ 'FGM':'sum',
103
+ 'FG_PCT':'mean',
104
+ 'PassTD':'sum',
105
+ 'RushTD':'sum',
106
+ 'PassTD_Allowed':'sum',
107
+ 'RushTD_Allowed':'sum',
108
+ 'PassYds':'sum',
109
+ 'RushYds':'sum',
110
+ 'PassYds_Allowed':'sum',
111
+ 'RushYds_Allowed':'sum',
112
+ 'Fum':'sum',
113
+ 'Fum_Allowed':'sum',
114
+ 'INT':'sum',
115
+ 'INT_Allowed':'sum',
116
+ 'Sacks':'sum',
117
+ 'Sacks_Allowed':'sum',
118
+ 'Penalties':'sum',
119
+ 'FirstDowns':'sum',
120
+ '3rdDownConverted':'sum',
121
+ '3rdDownFailed':'sum',
122
+ '3rdDownAllowed':'sum',
123
+ '3rdDownDefended':'sum',
124
+ 'PTS':'mean',
125
+ 'PointDiff':'mean'
126
+ }
127
+ game = team.groupby('game_id').agg(features).reset_index()
128
+ game[['W','L']] = game[['W','L']].expanding().sum()
129
+ game[game.columns[4:]] = game[game.columns[4:]].expanding().mean()
130
+ game['TEAM'] = team_name
131
+ game['Season'] = season
132
+ return game.loc[game['GP']==week]
133
+
134
+
135
+ def get_one_week_home_and_away(home,away,season,week):
136
+ home = get_one_week(home,season,week)
137
+ away = get_one_week(away,season,week)
138
+ away.columns = [f'{i}.Away' for i in away.columns]
139
+ gbg = home.merge(away,left_index=True,right_index=True)
140
+ gbg.drop(columns=['TEAM','TEAM.Away','Season.Away','game_id.Away'], inplace=True)
141
+ return gbg
142
+
143
+
144
+ def predict(home,away,season,week,total):
145
+ # finish preparing data
146
+ home_abbrev = team_name_to_abbreviation[home]
147
+ away_abbrev = team_name_to_abbreviation[away]
148
+ gbg = get_one_week_home_and_away(home_abbrev,away_abbrev,season,week)
149
+ gbg['Total Score Close'] = total
150
+
151
+ matrix = xgb.DMatrix(gbg.drop(columns=['game_id','Season']).astype(float).values)
152
+
153
+ # moneyline
154
+ model = 'xgboost_ML_75.4%'
155
+ file_path = os.path.join(model_directory, f'{model}.json')
156
+ xgb_ml = xgb.Booster()
157
+ xgb_ml.load_model(file_path)
158
+ try:
159
+ ml_predicted_proba = xgb_ml.predict(matrix)[0][1]
160
+ winner_proba = max([ml_predicted_proba, 1-ml_predicted_proba])
161
+ moneyline = {'Winner': [home if ml_predicted_proba>0.6 else away if ml_predicted_proba<0.4 else 'Toss-Up'],
162
+ 'Probabilities':[winner_proba]}
163
+ except:
164
+ moneyline = {'Winner': 'NA',
165
+ 'Probabilities':['N/A']}
166
+
167
+ # over/under
168
+ model = 'xgboost_OU_59.3%'
169
+ file_path = os.path.join(model_directory, f'{model}.json')
170
+ xgb_ou = xgb.Booster()
171
+ xgb_ou.load_model(file_path)
172
+ try:
173
+ ou_predicted_proba = xgb_ou.predict(matrix)[0][1]
174
+ over_under = {'Over/Under': ['Over' if ou_predicted_proba>0.5 else 'Under'],
175
+ 'Probability': [ou_predicted_proba]}
176
+ except:
177
+ over_under = {'Over/Under': 'N/A',
178
+ 'Probabilities': ['N/A']}
179
+
180
+ return moneyline, over_under
181
+
182
+
183
+ def update_past_predictions():
184
+ file_path = os.path.join(data_directory, 'gbg_and_odds_this_year.csv')
185
+ gbg_and_odds_this_year = pd.read_csv(file_path, index_col=0, low_memory=False)
186
+ total_dict = dict(gbg_and_odds_this_year[['game_id','Total Score Close']])
187
+ games = pbp.drop_duplicates(subset='game_id')
188
+
189
+ predictions = {}
190
+ for _, i in games.iterrows():
191
+ game_id = i['game_id']
192
+ home = i['home_team']
193
+ away = i['away_team']
194
+ week = i['week']
195
+ season = i['season']
196
+ total = total_dict[game_id]
197
+ predictions[game_id] = predict(home,away,season,week,total)
198
+
199
+ predictions_df = pd.DataFrame(predictions)
200
+ file_path = os.path.join(data_directory, 'predictions_this_year.csv')
201
+ predictions_df.to_csv(file_path)
Source/Test/__init__.py ADDED
File without changes
Source/Test/xgboost_ML.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import xgboost as xgb
2
+ import pandas as pd
3
+ import pickle as pkl
4
+ import numpy as np
5
+ import os
6
+
7
+ model = 'xgboost_ML_75.4%'
8
+
9
+ current_directory = os.path.dirname(os.path.abspath(__file__))
10
+ parent_directory = os.path.dirname(current_directory)
11
+ data_directory = os.path.join(parent_directory, 'Data')
12
+ model_directory = os.path.join(parent_directory, 'Models')
13
+ pickle_directory = os.path.join(parent_directory, 'Pickles')
14
+
15
+ file_path = os.path.join(model_directory, f'{model}.json')
16
+ xgb_ml = xgb.Booster()
17
+ xgb_ml.load_model(file_path)
18
+
19
+ file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
20
+ with open(file_path,'rb') as f:
21
+ test_games = pkl.load(f).tolist()
22
+
23
+ file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
24
+ gbg_and_odds = pd.read_csv(file_path, index_col=0)
25
+ test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
26
+ test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
27
+
28
+ predicted_probas = xgb_ml.predict(test_data_matrix)
29
+ predictions = np.argmax(predicted_probas, axis=1)
30
+ test_data['predicted_proba'] = [i[1] for i in predicted_probas]
31
+ test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
32
+ test_data['correct'] = test_data['Home-Team-Win']==test_data['prediction']
33
+
34
+ bets = test_data.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
35
+ bets['winnings'] = [h if c else a for h,a,c in bets[['Home Winnings','Away Winnings','correct']].values]
36
+
37
+ import matplotlib.pyplot as plt
38
+ fig = plt.figure(facecolor='black')
39
+ ax = fig.add_subplot(1, 1, 1, facecolor='black')
40
+
41
+ # Plot data with line color as RGB(0, 128, 0)
42
+ ax.plot(bets['winnings'].cumsum().values*100, linewidth=3, color=(0/255, 128/255, 0/255))
43
+
44
+ # Set title and labels
45
+ ax.set_title('MARCI 3.0 - MoneyLine w/ 60% Confidence Threshold', color='white')
46
+ ax.set_xlabel('Games Bet On', color='white')
47
+ ax.set_ylabel('Return (%)', color='white')
48
+
49
+ # Change tick colors to white
50
+ ax.tick_params(axis='x', colors='white')
51
+ ax.tick_params(axis='y', colors='white')
52
+
53
+ # Change axis edge colors
54
+ ax.spines['bottom'].set_color('white')
55
+ ax.spines['top'].set_color('white')
56
+ ax.spines['left'].set_color('white')
57
+ ax.spines['right'].set_color('white')
58
+
59
+ plt.savefig(f'{model}_dark.png', facecolor='black')
Source/Test/xgboost_ML_75.4%.png ADDED
Source/Test/xgboost_ML_75.4%_dark.png ADDED
Source/Test/xgboost_OU.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import xgboost as xgb
2
+ import pandas as pd
3
+ import pickle as pkl
4
+ import numpy as np
5
+ import os
6
+
7
+ model = 'xgboost_OU_59.3%'
8
+
9
+ current_directory = os.path.dirname(os.path.abspath(__file__))
10
+ parent_directory = os.path.dirname(current_directory)
11
+ data_directory = os.path.join(parent_directory, 'Data')
12
+ model_directory = os.path.join(parent_directory, 'Models')
13
+ pickle_directory = os.path.join(parent_directory, 'Pickles')
14
+
15
+ file_path = os.path.join(model_directory, f'{model}.json')
16
+ xgb_ou = xgb.Booster()
17
+ xgb_ou.load_model(file_path)
18
+
19
+ file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
20
+ with open(file_path,'rb') as f:
21
+ test_games = pkl.load(f).tolist()
22
+
23
+ file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
24
+ gbg_and_odds = pd.read_csv(file_path, index_col=0)
25
+ test_data = gbg_and_odds.loc[gbg_and_odds['game_id'].isin(test_games)]
26
+ test_data_matrix = xgb.DMatrix(test_data.drop(columns=['game_id','Over','Home-Team-Win','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings']).astype(float).values)
27
+
28
+ predicted_probas = xgb_ou.predict(test_data_matrix)
29
+ predictions = np.argmax(predicted_probas, axis=1)
30
+ test_data['predicted_proba'] = [i[1] for i in predicted_probas]
31
+ test_data['prediction'] = (test_data['predicted_proba']>0.5).astype(int)
32
+ test_data['correct'] = test_data['Over']==test_data['prediction']
33
+
34
+ bets = test_data#.loc[(test_data['predicted_proba']>0.6) | (test_data['predicted_proba']<0.4)]
35
+ bets['winnings'] = [0.91 if c else -1 for c in bets[['correct']].values]
36
+
37
+ import matplotlib.pyplot as plt
38
+ fig = plt.figure(facecolor='black')
39
+ ax = fig.add_subplot(1, 1, 1, facecolor='black')
40
+
41
+ # Plot data with line color as RGB(0, 128, 0)
42
+ ax.plot(bets['winnings'].cumsum().values*100, linewidth=3, color=(0/255, 128/255, 0/255))
43
+
44
+ # Set title and labels
45
+ ax.set_title('MARCI 3.0 - Over/Under', color='white')
46
+ ax.set_xlabel('Games Bet On', color='white')
47
+ ax.set_ylabel('Return (%)', color='white')
48
+
49
+ # Change tick colors to white
50
+ ax.tick_params(axis='x', colors='white')
51
+ ax.tick_params(axis='y', colors='white')
52
+
53
+ # Change axis edge colors
54
+ ax.spines['bottom'].set_color('white')
55
+ ax.spines['top'].set_color('white')
56
+ ax.spines['left'].set_color('white')
57
+ ax.spines['right'].set_color('white')
58
+
59
+ plt.savefig(f'{model}_dark.png', facecolor='black')
Source/Test/xgboost_OU_59.3%.png ADDED
Source/Test/xgboost_OU_59.3%_dark.png ADDED
Source/Train/xgboost_ML.py ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import xgboost as xgb
2
+ import pandas as pd
3
+ import pickle as pkl
4
+ import numpy as np
5
+ from tqdm import tqdm
6
+ from IPython.display import clear_output
7
+ from sklearn.metrics import accuracy_score
8
+ from sklearn.model_selection import train_test_split
9
+ import os
10
+
11
+ current_directory = os.path.dirname(os.path.abspath(__file__))
12
+ parent_directory = os.path.dirname(current_directory)
13
+ data_directory = os.path.join(parent_directory, 'Data')
14
+ model_directory = os.path.join(parent_directory, 'Models')
15
+ pickle_directory = os.path.join(parent_directory, 'Pickles')
16
+
17
+ file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
18
+ data = pd.read_csv(file_path, index_col=0).dropna()
19
+
20
+ margin = data['Home-Team-Win']
21
+ data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
22
+
23
+ acc_results = []
24
+
25
+ for x in tqdm(range(100)):
26
+ X_train, X_test, y_train, y_test = train_test_split(data, margin, test_size=.1)
27
+
28
+ train_games = X_train['game_id']
29
+ test_games = X_test['game_id']
30
+
31
+ X_train.drop(columns=['game_id'], inplace=True)
32
+ X_test.drop(columns=['game_id'], inplace=True)
33
+
34
+ train = xgb.DMatrix(X_train.astype(float).values, label=y_train)
35
+ test = xgb.DMatrix(X_test.astype(float).values, label=y_test)
36
+
37
+ param = {
38
+ 'max_depth': 2,
39
+ 'eta': 0.01,
40
+ 'objective': 'multi:softprob',
41
+ 'num_class': 2
42
+ }
43
+ epochs = 500
44
+
45
+ model = xgb.train(param, train, epochs)
46
+ predictions = model.predict(test)
47
+ y = []
48
+ for z in predictions:
49
+ y.append(np.argmax(z))
50
+
51
+ acc = round(accuracy_score(y_test, y)*100, 1)
52
+ acc_results.append(acc)
53
+ clear_output(wait=True)
54
+ print(f"Best accuracy: {max(acc_results)}%")
55
+
56
+ # only save results if they are the best so far
57
+ if acc == max(acc_results):
58
+ file_path = os.path.join(pickle_directory, 'train_games_ML.pkl')
59
+ with open(file_path,'wb') as f:
60
+ pkl.dump(train_games,f)
61
+
62
+ file_path = os.path.join(pickle_directory, 'test_games_ML.pkl')
63
+ with open(file_path,'wb') as f:
64
+ pkl.dump(test_games,f)
65
+
66
+ file_path = os.path.join(model_directory, f'xgboost_ML_{acc}%.json')
67
+ model.save_model(file_path)
68
+
69
+ print('Done')
Source/Train/xgboost_OU.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import xgboost as xgb
2
+ import pandas as pd
3
+ import pickle as pkl
4
+ import numpy as np
5
+ from tqdm import tqdm
6
+ from IPython.display import clear_output
7
+ from sklearn.metrics import accuracy_score
8
+ from sklearn.model_selection import train_test_split
9
+ import os
10
+
11
+ current_directory = os.path.dirname(os.path.abspath(__file__))
12
+ parent_directory = os.path.dirname(current_directory)
13
+ data_directory = os.path.join(parent_directory, 'Data')
14
+ model_directory = os.path.join(parent_directory, 'Models')
15
+ pickle_directory = os.path.join(parent_directory, 'Pickles')
16
+
17
+ file_path = os.path.join(data_directory, 'gbg_and_odds.csv')
18
+ data = pd.read_csv(file_path, index_col=0).dropna()
19
+
20
+ OU = data['Over']
21
+ data.drop(columns=['Home-Team-Win','Over','Season','home_team','away_team','game_date','Key','Home Score','Away Score','Home Odds Close','Away Odds Close','Home Winnings','Away Winnings'], inplace=True)
22
+
23
+ acc_results = []
24
+
25
+ for x in tqdm(range(100)):
26
+ X_train, X_test, y_train, y_test = train_test_split(data, OU, test_size=.1)
27
+
28
+ train_games = X_train['game_id']
29
+ test_games = X_test['game_id']
30
+
31
+ X_train.drop(columns=['game_id'], inplace=True)
32
+ X_test.drop(columns=['game_id'], inplace=True)
33
+
34
+ train = xgb.DMatrix(X_train.astype(float).values, label=y_train)
35
+ test = xgb.DMatrix(X_test.astype(float).values, label=y_test)
36
+
37
+ param = {
38
+ 'max_depth': 6,
39
+ 'eta': 0.05,
40
+ 'objective': 'multi:softprob',
41
+ 'num_class': 3
42
+ }
43
+ epochs = 300
44
+
45
+ model = xgb.train(param, train, epochs)
46
+ predictions = model.predict(test)
47
+ y = []
48
+
49
+ for z in predictions:
50
+ y.append(np.argmax(z))
51
+
52
+ acc = round(accuracy_score(y_test, y)*100, 1)
53
+ acc_results.append(acc)
54
+ clear_output(wait=True)
55
+ print(f"Best accuracy: {max(acc_results)}%")
56
+
57
+ # only save results if they are the best so far
58
+ if acc == max(acc_results):
59
+ file_path = os.path.join(pickle_directory, 'train_games_OU.pkl')
60
+ with open(file_path,'wb') as f:
61
+ pkl.dump(train_games,f)
62
+
63
+ file_path = os.path.join(pickle_directory, 'test_games_OU.pkl')
64
+ with open(file_path,'wb') as f:
65
+ pkl.dump(test_games,f)
66
+
67
+ file_path = os.path.join(model_directory, f'xgboost_OU_{acc}%.json')
68
+ model.save_model(file_path)
69
+
70
+ print('Done')
Static/Arizona Cardinals.webp ADDED
Static/Atlanta Falcons.webp ADDED
Static/Baltimore Ravens.webp ADDED
Static/Buffalo Bills.webp ADDED
Static/Carolina Panthers.webp ADDED
Static/Chicago Bears.webp ADDED
Static/Cincinnati Bengals.webp ADDED
Static/Cleveland Browns.webp ADDED
Static/Dallas Cowboys.webp ADDED
Static/Denver Broncos.webp ADDED
Static/Detroit Lions.webp ADDED
Static/Green Bay Packers.webp ADDED
Static/Houston Texans.webp ADDED
Static/Indianapolis Colts.webp ADDED
Static/Jacksonville Jaguars.webp ADDED
Static/Kansas City Chiefs.webp ADDED
Static/Las Vegas Raiders.webp ADDED