Best Python code snippet using slash
models.py
Source:models.py
1import pandas as pd2import numpy as np3import math4import code.algorithms.config as config5import random6import statsmodels.api as sm7import statsmodels.formula.api as smf8from scipy.stats import poisson, skellam9logger = config.config_logger(__name__, 10)10random.seed(1111)11class Fixture(object):12 """13 Class for the fixture of a league.14 """15 def __init__(self, fixture, name, local_fixture=True):16 """17 Notes: The are certain headers that the my_fixture arg must have. Careful.18 Args:19 fixture (:obj: `pd.Dataframe`): fixture of a league.20 name (str): name of the fixture.21 local_fixture (bool): True if all local team ids are included in a single22 column named 'localteam_id'. False if locality is determined by a23 column names 'is_home' and team ids are under the column 'team_id'.24 """25 self.local_fixture = local_fixture26 self.fixture = fixture27 self.name = name28 @property29 def fixture(self):30 return self._fixture31 @fixture.setter32 def fixture(self, fixture):33 if self.local_fixture:34 if 'localTeam.data.name' not in list(fixture.columns.values):35 raise ValueError('localTeam.data.name is missing but local_fixture is True')36 else:37 self._fixture = fixture38 else:39 if 'is_home' not in list(fixture.columns.values):40 raise ValueError('is_home is missing but local_fixture is False')41 else:42 self._fixture = fixture43 def __str__(self):44 """45 Returns:46 Print name of the league and dimensions of the fixture DataFrame.47 """48 return 'League: {0} - Shape: {1}'.format(self.name, self.fixture.shape)49 def get_last_match(self):50 return max(pd.to_datetime(self.fixture['time.starting_at.date'], format="%Y-%m-%d"))51 def get_match_years(self):52 return self.fixture['time.starting_at.date'].apply(lambda x: x[:4])53 def get_last_year(self):54 temp_year = max(set(self.get_match_years()))55 last_match = self.get_last_match()56 if last_match.month < 6:57 return int(temp_year)58 else:59 return int(temp_year) + 160 def get_seasons(self):61 """62 Get set with the seasons in the fixture dataset.63 Returns:64 Set with the seasons in the fixture dataset.65 """66 if 'season_id' not in list(self.fixture.columns.values):67 raise ValueError('season_id not in fixture')68 else:69 return set(self.fixture['season_id'])70 def get_team_ids(self):71 if self.local_fixture:72 return set(self.fixture['localteam_id'])73 else:74 return set(self.fixture['team_id'])75 def get_team_names(self):76 if self.local_fixture:77 return set(self.fixture['localTeam.data.name'])78 else:79 return set(self.fixture['Team.data.name'])80 def subset_season(self, season_id):81 """82 Extract matches played in a certain season.83 Args:84 season_id (str): id of the season requested.85 Returns:86 Fixture object with the games played in the season requested.87 """88 df = self.fixture89 local_fixture = self.local_fixture90 output = df.loc[df['season_id'] == season_id]91 name = self.name + ' - season {0}'.format(season_id)92 return Fixture(fixture=output, name=name, local_fixture=local_fixture)93 def get_team_games(self, team_id, home):94 """95 Extract the games of certain team as local, visit or both.96 Args:97 team_id (str): id of the requested team.98 home (int: from 0 to 2): 0 for visit games.99 1 for local games.100 2 for both.101 Returns:102 Fixture object containing the games of certain team.103 """104 df = self.fixture105 local_fixture = self.local_fixture106 if local_fixture:107 team_name = df.loc[df['localteam_id'] == team_id]['localTeam.data.name'].iloc[0]108 name = self.name + ' - {0}'.format(team_name)109 if home == 0:110 output = df.loc[df['visitorteam_id'] == team_id]111 elif home == 1:112 output = df.loc[df['localteam_id'] == team_id]113 elif home == 2:114 output = df.loc[(df['localteam_id'] == team_id) | (df['visitorteam_id'] == team_id)]115 else:116 raise ValueError('home must be an integer between 0 and 2')117 else:118 if home == 0:119 output = df.loc[(df['team_id'] == team_id) & (df['is_home'] == 0)]120 output = output.append(df.loc[(df['op_team_id'] == team_id) & (df['is_home'] == 1)])121 elif home == 1:122 output = df.loc[(df['team_id'] == team_id) & (df['is_home'] == 1)]123 output = output.append(df.loc[(df['op_team_id'] == team_id) & (df['is_home'] == 0)])124 elif home == 2:125 output = df.loc[(df['team_id'] == team_id) | (df['op_team_id'] == team_id)]126 else:127 raise ValueError('home must be an integer between 0 and 2')128 team_name = df.loc[df['team_id'] == team_id]['Team.data.name'].iloc[0]129 name = self.name + ' - {0}'.format(team_name)130 return Fixture(fixture=output, name=name, local_fixture=local_fixture)131 def get_team_scores(self, team_id):132 """133 Convert fixture database from a local_fixture to a non local_fixture for a certain team.134 Args:135 team_id (str): id of the requested team.136 Returns:137 Fixture object containing the games of certain team in a non local_fixture structure.138 """139 local = self.get_team_games(team_id=team_id, home=1)140 local_fixture = local.fixture.copy()141 local_fixture['is_home'] = 1142 local_fixture = local_fixture.rename(columns={'scores.localteam_score': 'score',143 'scores.visitorteam_score': 'op_score',144 'localteam_id': 'team_id',145 'localTeam.data.name': 'Team.data.name',146 'visitorteam_id': 'op_team_id',147 'visitorTeam.data.name': 'op_Team.data.name'})148 visitor = self.get_team_games(team_id=team_id, home=0)149 visitor_fixture = visitor.fixture.copy()150 visitor_fixture['is_home'] = 0151 visitor_fixture = visitor_fixture.rename(columns={'scores.visitorteam_score': 'score',152 'scores.localteam_score': 'op_score',153 'visitorteam_id': 'team_id',154 'visitorTeam.data.name': 'Team.data.name',155 'localteam_id': 'op_team_id',156 'localTeam.data.name': 'op_Team.data.name'})157 output_fixture = local_fixture158 output_fixture = output_fixture.append(visitor_fixture)159 output_fixture = output_fixture.sort_values('time.starting_at.date')160 output = Fixture(name=team_id, fixture=output_fixture, local_fixture=False)161 return output162 def clean_fixture(self):163 """164 Keep only variables selected. Drop matches that have not been played yet. Drop duplicates.165 Drop missing values.166 Returns:167 Fixture object.168 """169 fixture = self.fixture170 season_dict_inv = invert_dictionary(self.get_seasons_dict())171 if not self.local_fixture:172 fixture = fixture.loc[fixture['is_home'] == 1]173 else:174 try:175 condition = (fixture['season_id'] == season_dict_inv[self.get_last_year()]) & \176 (np.isnan(fixture['team_id']))177 fixture = fixture.loc[~condition]178 except KeyError:179 logger.warning('There was a KeyError omited - clean_fixture method')180 if self.local_fixture:181 vars_to_keep = self.variables_to_keep()182 else:183 vars_to_keep = self.variables_to_keep()184 fixture = fixture[vars_to_keep].drop_duplicates().dropna()185 self.fixture = fixture186 return self187 def drop_x_games_first_last(self, x):188 """189 Drop the first and last x games of a Fixture.190 Args:191 x (int): number of games to be dropped at the start/end.192 Returns:193 Fixture object.194 """195 if x == 0:196 return self197 my_fixture = self.fixture198 my_fixture = my_fixture.sort_values('time.starting_at.date')199 if len(my_fixture) > 15:200 output = my_fixture.iloc[x:-x]201 else:202 output = my_fixture.iloc[x:]203 logger.warning('Team has less than 15 matches: {0}'.format(self.name))204 name = self.name + ' - {0} dropped'.format(x)205 return Fixture(fixture=output, name=name, local_fixture=self.local_fixture)206 def remove_x_games(self, n):207 """208 Drop the first and last n matches in each season.209 Args:210 n (int): number of games to be dropped at the start/end.211 Returns:212 Fixture object.213 """214 output = pd.DataFrame([])215 original_name = self.name216 logger.info('Main Fixture original size: {0}'.format(self.fixture.shape))217 for season in self.get_seasons():218 temp_season = self.subset_season(season)219 teams = temp_season.get_team_ids()220 logger.info('Season {1} original size: {0}'.format(temp_season.fixture.shape, season))221 for team_id in teams:222 temp_team = temp_season.get_team_games(team_id, home=1)223 temp_clean = temp_team.drop_x_games_first_last(n)224 output = output.append(temp_clean.fixture)225 output = output.sort_values('time.starting_at.date')226 name = '{0} - {1} games dropped'.format(original_name, n)227 return Fixture(output, name=name, local_fixture=self.local_fixture)228 def get_score_rolling_mean(self, window_scored, window_conceded):229 """230 Generate the rolling mean of goals scored and goals recieved. For the former,231 the window is window_scored, for the latter, window_conceded. Afterwards, the232 result vector is shifted one position. This way, we do not include the current233 result (score of the match) into the rolling mean computation.234 Args:235 window_scored (int): size of the window for goals scored.236 window_conceded (int): size of the window for goals conceded.237 Returns:238 Fixture object with rolling mean included.239 """240 original_name = self.name241 team_ids = self.get_team_ids()242 output = pd.DataFrame([])243 for team_id in team_ids:244 team_fixture = self.get_team_scores(team_id=team_id).fixture245 team_fixture['roll_score'] = team_fixture['score'].rolling(246 window=window_scored).sum().shift(1)247 team_fixture['roll_op_score'] = team_fixture['op_score'].rolling(248 window=window_conceded).sum().shift(1)249 output = output.append(team_fixture)250 only_main_team = output[['team_id', 'Team.data.name', 'time.starting_at.date',251 'roll_op_score']]252 only_main_team = only_main_team.sort_values(['time.starting_at.date',253 'team_id']).reset_index(drop=True)254 only_main_team = only_main_team['roll_op_score']255 output = output.sort_values(['time.starting_at.date', 'op_team_id']).reset_index(drop=True)256 output['roll_op_score'] = only_main_team257 output = output.sort_values('time.starting_at.date')258 name = original_name + ' - roll sum'259 return Fixture(name=name, fixture=output, local_fixture=False)260 def generate_dataset(self, win_scored=10, win_conceded=2, games_removed=0):261 """262 Generate rolling means of goals scored and recieved. Drop the first263 and last games_removed. Drop missing values.264 Args:265 win_scored (int): size of the window for goals scored.266 win_conceded (int): size of the window for goals conceded.267 games_removed (int): number of games to be removed at the start268 and end of the season.269 Returns:270 Fixture object.271 """272 output = self.get_score_rolling_mean(window_scored=win_scored, window_conceded=win_conceded)273 output = output.remove_x_games(games_removed)274 output.fixture = output.fixture.dropna()275 return output276 def get_team_names_and_ids(self):277 if self.local_fixture:278 names_and_ids = set(zip(self.fixture['localteam_id'], self.fixture['localTeam.data.name']))279 else:280 names_and_ids = set(zip(self.fixture['team_id'], self.fixture['Team.data.name']))281 output = {}282 for team_id, team_name in names_and_ids:283 output[team_name] = team_id284 return output285 def add_champion_dummy(self, champions_df):286 origin_name = self.name287 teams_dict = self.get_team_names_and_ids()288 champions_df = champions_df.replace(teams_dict)289 output = pd.DataFrame([])290 for season in self.get_seasons():291 season_dict = self.get_seasons_dict()292 target_year = season_dict[season]293 champions = self.get_champions_in_period(champions_df, target_year, 4)294 temp_fixture = self.subset_season(season).fixture.copy()295 if self.local_fixture:296 temp_fixture['champion'] = temp_fixture['localteam_id'].apply(297 lambda x: create_dummy_for_champions(x, champions))298 else:299 temp_fixture['champion'] = temp_fixture['team_id'].apply(300 lambda x: create_dummy_for_champions(x, champions))301 output = output.append(temp_fixture)302 output = output.sort_values('time.starting_at.date')303 name = origin_name + ' - add champion'304 self.fixture = output305 self.name = name306 return self307 def get_seasons_dict(self):308 my_fixture = self.fixture309 my_fixture['year'] = self.get_match_years().astype('int')310 output = {}311 league_name = self.name.split(' ')[0]312 last_season = self.get_last_season()[league_name]313 for season in self.get_seasons():314 if season == last_season:315 continue316 temp = my_fixture.loc[my_fixture['season_id'] == season, :]317 year_mean = math.ceil(np.mean(temp['year']))318 output.update({season: year_mean})319 output.update({last_season: self.get_last_year()})320 return output321 def train_model(self):322 my_fixture = self.fixture[self.variables_in_model()]323 model = smf.glm(formula='score ~ is_home + roll_score + roll_op_score + champion', data=my_fixture,324 family=sm.families.Poisson()).fit()325 return model326 def exclude_last_x_seasons(self, n):327 seasons_dict = invert_dictionary(self.get_seasons_dict())328 max_year = max(list(seasons_dict.keys()))329 drop_range = range(max_year+1-n, max_year+1)330 my_fixture = self.fixture.copy()331 keep, drop = my_fixture, pd.DataFrame([])332 for year in drop_range:333 target = seasons_dict[year]334 drop = drop.append(my_fixture.loc[my_fixture['season_id'] == target, :])335 keep = keep.loc[keep['season_id'] != target, :]336 drop = drop_single_matches(drop)337 name_keep = self.name + ' - train'338 name_drop = self.name + ' - test'339 keep = Fixture(name=name_keep, fixture=keep, local_fixture=self.local_fixture)340 drop = Fixture(name=name_drop, fixture=drop, local_fixture=self.local_fixture)341 return keep, drop342 def add_predictions(self, predictions):343 my_fixture = self.fixture.copy()344 my_fixture['expected_score'] = predictions345 name = self.name + ' - predicted'346 return Fixture(fixture=my_fixture, name=name, local_fixture=self.local_fixture)347 def convert_to_matches(self):348 my_fixture = self.fixture.copy()349 my_fixture = my_fixture.sort_values(['time.starting_at.date', 'fixture_id'])350 my_fixture = drop_single_matches(my_fixture)351 output = my_fixture.loc[my_fixture['is_home'] == 1].copy().reset_index(drop=True)352 temp = my_fixture.loc[my_fixture['is_home'] == 0].copy().reset_index(drop=True)353 output['op_expected_score'] = temp['expected_score']354 return Fixture(name='match predictions', fixture=output, local_fixture=self.local_fixture)355 def get_matches_prediction(self, model):356 prediction = model.predict(self.fixture)357 my_fixture = self.add_predictions(prediction)358 output = my_fixture.convert_to_matches()359 output = output.get_match_probabilities()360 output.fixture['winner'] = output.fixture.apply(get_winner, axis=1)361 return output362 def get_match_probabilities(self):363 my_fixture = self.fixture.copy()364 local_score = self.fixture['expected_score']365 visitor_score = self.fixture['op_expected_score']366 local_prob_list, visitor_prob_list, tie_prob_list, winner = [], [], [], []367 for i in range(self.fixture.shape[0]):368 match_prob = simulate_match(local_score.iloc[i], visitor_score.iloc[i], max_goals=10)369 local_prob, tie_prob, visitor_prob = sum_triangle_and_diagonal_from_matrix(match_prob)370 local_prob_list.append(local_prob)371 tie_prob_list.append(tie_prob)372 visitor_prob_list.append(visitor_prob)373 if local_prob == max(local_prob, tie_prob, visitor_prob):374 winner.append(self.fixture['Team.data.name'].iloc[i])375 elif visitor_prob == max(local_prob, tie_prob, visitor_prob):376 winner.append(self.fixture['op_Team.data.name'].iloc[i])377 else:378 winner.append('tie')379 my_fixture['local_prob'] = pd.Series(local_prob_list, index=my_fixture.index)380 my_fixture['tie_prob'] = pd.Series(tie_prob_list, index=my_fixture.index)381 my_fixture['visitor_prob'] = pd.Series(visitor_prob_list, index=my_fixture.index)382 my_fixture['expected_winner'] = pd.Series(winner, index=my_fixture.index)383 return Fixture(fixture=my_fixture, name=self.name, local_fixture=self.local_fixture)384 def clean_results(self):385 self.fixture = self.fixture[self.result_variables()]\386 .sort_values(['time.starting_at.date', 'fixture_id'])387 return388 def get_accuracy(self):389 my_fixture = self.fixture.copy()390 total = my_fixture.shape[0]391 good = np.sum([my_fixture['expected_winner'] == my_fixture['winner']])392 return good/total393 def determine_winner(self):394 output = self395 output.fixture['winner_mod'] = output.fixture.apply(get_winner_mod, axis=1)396 return output397 def variables_to_keep(self):398 if self.local_fixture:399 return ['league_id', 'season_id', 'fixture_id', 'localteam_id', 'visitorteam_id',400 'time.starting_at.date', 'localTeam.data.name', 'visitorTeam.data.name',401 'scores.localteam_score', 'scores.visitorteam_score']402 else:403 return ['league_id', 'season_id', 'fixture_id', 'team_id', 'op_team_id', 'is_home',404 'time.starting_at.date', 'Team.data.name', 'op_Team.data.name', 'score', 'op_score']405 def convert_2match_to_1match(self):406 if self.local_fixture:407 raise ValueError('local_fixture is True, fixture already in 1match format')408 else:409 my_df = self.fixture410 my_df = my_df[self.variables_to_keep()].sort_values('fixture_id')411 output = my_df.loc[my_df['is_home'] == 1]412 output = output.rename(columns={'Team.data.name': 'localTeam.data.name',413 'op_Team.data.name': 'visitorTeam.data.name',414 'team_id': 'localteam_id',415 'op_team_id': 'visitorteam_id',416 'score': 'scores.localteam_score',417 'op_score': 'scores.visitorteam_score'})418 del output['is_home']419 self.local_fixture = True420 self.fixture = output421 return self422 @staticmethod423 def get_last_season():424 last_season = {'82Bundesliga': 8026,425 '8Premier_League': 6397,426 '564La_Liga': 8442,427 '301Ligue_1': 6405,428 'MLS': 2014,429 'la_liga': 2014,430 'premier': 2014,431 'comebol': 2,432 'wc2018': 2019}433 return last_season434 @staticmethod435 def get_champions_in_period(champions_df, year, window):436 output = set()437 for i in range(year-window, year):438 output.update(champions_df[str(i)].tolist())439 return output440 @staticmethod441 def variables_in_model():442 output = ['score', 'roll_score', 'roll_op_score', 'champion', 'is_home']443 return output444 @staticmethod445 def result_variables():446 output = ['fixture_id', 'time.starting_at.date', 'Team.data.name', 'op_Team.data.name', 'expected_score',447 'op_expected_score', 'local_prob', 'tie_prob', 'visitor_prob', 'expected_winner', 'winner',448 'score', 'op_score', 'is_home']449 return output450def convert_format_time(my_series):451 return my_series.apply(lambda x: x[:10])452def get_results_frequency(fixture):453 seasons_dict = fixture.get_seasons_dict()454 my_league = fixture.determine_winner().fixture[['season_id', 'winner_mod']]455 my_league = pd.get_dummies(my_league, columns=['winner_mod'])456 my_league['total'] = 1457 output = my_league.groupby('season_id').sum().reset_index().replace({'season_id': seasons_dict})458 output['season_id'] = output['season_id'].astype('int')459 output = output.sort_values('season_id')460 return output461def get_league_dictionary():462 league_dict = {'82Bundesliga': 82,463 '8Premier_League': 8,464 '564La_Liga': 564,465 '301Ligue_1': 301}466 return league_dict467def create_dummy_for_champions(team_id, champion_set):468 if team_id in champion_set:469 return 1470 else:471 return 0472def invert_dictionary(my_dict):473 return {v: k for k, v in my_dict.items()}474def simulate_match(home_goals_avg, away_goals_avg, max_goals=10):475 team_pred = [[poisson.pmf(i, team_avg) for i in range(0, max_goals+1)] for476 team_avg in [home_goals_avg, away_goals_avg]]477 return np.outer(np.array(team_pred[0]), np.array(team_pred[1]))478def drop_single_matches(df):479 df = df.sort_values('fixture_id')480 i = 0481 output = pd.DataFrame([])482 fixture_id = df['fixture_id']483 while i < df.shape[0]-1:484 if fixture_id.iloc[i] == fixture_id.iloc[i+1]:485 output = output.append(df.iloc[i])486 output = output.append(df.iloc[i+1])487 i += 2488 else:489 i += 1490 return output491def convert_2match_to_1match(my_df):492 my_df = my_df.sort_values('fixture_id')493 local = my_df.loc[my_df['is_home'] == 1]494 visitor = my_df.loc[my_df['is_home'] == 0]495 visitor = visitor.rename(columns={'expected_goals': 'op_expected_goals'})496 del visitor['is_home']497 output = pd.merge(local, visitor, how='inner', on=['fixture_id'])498 return output499def sum_triangle_and_diagonal_from_matrix(my_matrix):500 upper, lower, diagonal = 0, 0, 0501 for i in range(my_matrix.shape[0]):502 for j in range(my_matrix.shape[1]):503 if i > j:504 lower += my_matrix[i, j]505 if i < j:506 upper += my_matrix[i, j]507 if i == j:508 diagonal += my_matrix[i, j]509 return lower, diagonal, upper510def get_winner(row):511 local = row['score']512 visitor = row['op_score']513 if local > visitor:514 return row['Team.data.name']515 elif local < visitor:516 return row['op_Team.data.name']517 else:518 return 'tie'519def get_winner_mod(row):520 local = row['scores.localteam_score']521 visitor = row['scores.visitorteam_score']522 if local > visitor:523 return 'local'524 elif local < visitor:525 return 'visit'526 else:527 return 'tie'528def predict_model(model, test, ignore_cols):529 """ Runs a simple predictor that will predict if we expect a team to530 win.531 """532 x_test = _splice(_coerce(_clone_and_drop(test, ignore_cols)))533 x_test['intercept'] = 1.0534 predicted = model.predict(x_test)535 result = test.copy()536 result['predicted'] = predicted537 return result538def _clone_and_drop(data, drop_cols):539 """ Returns a copy of a dataframe that doesn't have certain columns. """540 clone = data.copy()541 for col in drop_cols:542 if col in clone.columns:543 del clone[col]544 return clone545def _splice(data):546 """ Splice both rows representing a game into a single one. """547 data = data.copy()548 opp = data.copy()549 opp_cols = ['opp_%s' % (col,) for col in opp.columns]550 opp.columns = opp_cols551 opp = opp.apply(_swap_pairwise)552 del opp['opp_is_home']553 return data.join(opp)554def _swap_pairwise(col):555 """ Swap rows pairwise; i.e. swap row 0 and 1, 2 and 3, etc. """556 col = pd.np.array(col)557 for index in range(0, len(col), 2):558 val = col[index]559 col[index] = col[index + 1]560 col[index+1] = val561 return col562def _coerce_types(vals):563 """ Makes sure all of the values in a list are floats. """564 return [1.0 * val for val in vals]565def _coerce(data):566 """ Coerces a dataframe to all floats, and standardizes the values. """567 return _standardize(data.apply(_coerce_types))568def non_feature_cols():569 return ['league_id', 'season_id', 'matchid', 'time.starting_at.date', 'teamid', 'op_teamid',570 'op_Team.data.name', 'Team.data.name', 'op_team_name', 'score', 'op_score',571 'op_points', 'round_id', 'referee_id', 'formation', 'op_formation', 'points',572 'time.minute']573def train_model(data, ignore_cols):574 """ Trains a logistic regression model over the data. Columns that575 are passed in ignore_cols are considered metadata and not used576 in the model building.577 """578 # Validate the data579 data = prepare_data(data)580 logger.info('Observations used in the model: {0}'.format(len(data)))581 target_col = 'points'582 (train, test) = split(data)583 train = train.loc[data['points'] != 1]584 (y_train, x_train) = _extract_target(train, target_col)585 x_train2 = _splice(_coerce(_clone_and_drop(x_train, ignore_cols)))586 y_train2 = [int(yval) == 3 for yval in y_train]587 logger.info('Training model')588 model = build_model_logistic(y_train2, x_train2, alpha=8.0)589 return model, test590def prepare_data(data):591 """ Drops all matches where we don't have data for both teams. """592 data = data.copy()593 data = _drop_unbalanced_matches(data)594 _check_data(data)595 return data596L1_ALPHA = 16.0597def build_model_logistic(target, data, acc=0.00000001, alpha=L1_ALPHA):598 """ Trains a logistic regresion model. target is the target.599 data is a dataframe of samples for training. The length of600 target must match the number of rows in data.601 """602 data = data.copy()603 data['intercept'] = 1.0604 logit = sm.Logit(target, data, disp=False)605 return logit.fit_regularized(maxiter=1024, alpha=alpha, acc=acc, disp=False)606def _drop_unbalanced_matches(data):607 """ Because we don't have data on both teams during a match, we608 want to drop any match we don't have info about both teams.609 This can happen if we have fewer than 10 previous games from610 a particular team.611 """612 keep = []613 index = 0614 data = data.dropna()615 while index < len(data) - 1:616 skipped = False617 for col in data:618 if isinstance(col, float) and math.isnan(col):619 keep.append(False)620 index += 1621 skipped = True622 if skipped:623 pass624 elif data.iloc[index]['matchid'] == data.iloc[index + 1]['matchid']:625 keep.append(True)626 keep.append(True)627 index += 2628 else:629 keep.append(False)630 index += 1631 while len(keep) < len(data):632 keep.append(False)633 results = data[keep]634 if len(results) % 2 != 0:635 raise Exception('Unexpected results')636 return results637def _check_data(data):638 """ Walks a dataframe and make sure that all is well. """639 i = 0640 if len(data) % 2 != 0:641 raise Exception('Unexpeted length')642 matches = data['matchid']643 teams = data['teamid']644 op_teams = data['op_teamid']645 while i < len(data) - 1:646 if matches.iloc[i] != matches.iloc[i + 1]:647 raise Exception('Match mismatch: %s vs %s ' % (648 matches.iloc[i], matches.iloc[i + 1]))649 if teams.iloc[i] != op_teams.iloc[i + 1]:650 raise Exception('Team mismatch: match %s team %s vs %s' % (651 matches.iloc[i], teams.iloc[i],652 op_teams.iloc[i + 1]))653 if teams.iloc[i + 1] != op_teams.iloc[i]:654 raise Exception('Team mismatch: match %s team %s vs %s' % (655 matches.iloc[i], teams.iloc[i + 1],656 op_teams.iloc[i]))657 i += 2658def split(data, test_proportion=0.2):659 """ Splits a dataframe into a training set and a test set.660 Must be careful because back-to-back rows are expeted to661 represent the same game, so they both must go in the662 test set or both in the training set.663 """664 train_vec = []665 if len(data) % 2 != 0:666 raise Exception('Unexpected data length')667 while len(train_vec) < len(data):668 rnd = random.random()669 train_vec.append(rnd > test_proportion)670 train_vec.append(rnd > test_proportion)671 test_vec = [not val for val in train_vec]672 train = data[train_vec]673 test = data[test_vec]674 if len(train) % 2 != 0:675 raise Exception('Unexpected train length')676 if len(test) % 2 != 0:677 raise Exception('Unexpected test length')678 return (train, test)679def _extract_target(data, target_col):680 """ Removes the target column from a data frame, returns the target681 col and a new data frame minus the target. """682 target = data[target_col]683 train_df = data.copy()684 del train_df[target_col]685 return target, train_df686def _standardize_col(col):687 """ Standardizes a single column (subtracts mean and divides by std688 dev).689 """690 std = np.std(col)691 mean = np.mean(col)692 if abs(std) > 0.001:693 return col.apply(lambda val: (val - mean)/std)694 else:695 return col696def _standardize(data):697 """ Standardizes a dataframe. All fields must be numeric. """698 return data.apply(_standardize_col)699def get_expected_winner(row):700 if row['predicted'] > 0.5:701 return row['Team.data.name']702 else:703 return row['op_Team.data.name']704def get_winners(my_df, tie_prob):705 my_df['winner'] = my_df.apply(get_winner, axis=1)706 output = my_df.loc[my_df['is_home'] == 1].copy().reset_index(drop=True)707 temp = my_df.loc[my_df['is_home'] == 0].copy().reset_index(drop=True)708 output['op_predicted'] = temp['predicted']709 my_df = output.copy()710 my_df['expected_winner'] = my_df.apply(get_expected_winner, axis=1)711 my_df = normalize_predictions(my_df, tie_prob)712 my_df = my_df[result_variables()].rename(columns={'matchid': 'fixture_id'})713 my_df = my_df.sort_values(['time.starting_at.date', 'fixture_id'])714 return my_df715def get_accuracy(my_df, prefix):716 total = my_df.shape[0]717 expected_winner = prefix + 'expected_winner'718 good = np.sum([my_df[expected_winner] == my_df['winner']])719 return good / total720def result_variables():721 output = ['matchid', 'time.starting_at.date', 'Team.data.name', 'op_Team.data.name',722 'expected_winner', 'winner', 'predicted', 'op_predicted', 'tie_predicted']723 return output724def normalize_predictions(my_df, tie_prob):725 my_df = my_df.copy()726 my_df['tie_predicted'] = tie_prob727 my_df['a'] = my_df['predicted'] * (1-my_df['tie_predicted']) / (my_df['predicted'] + my_df['op_predicted'])728 my_df['b'] = my_df['op_predicted'] * (1-my_df['tie_predicted']) / (my_df['predicted'] + my_df['op_predicted'])729 my_df['predicted'] = my_df['a']730 my_df['op_predicted'] = my_df['b']731 del my_df['a']732 del my_df['b']733 return my_df734def squared_error(a, b):735 if len(a) != len(b):736 raise ValueError('Squared error fuction: vectors do not have same lenght')737 dif = a-b738 return dif.dot(dif)739def get_squared_error(my_df):740 my_df = my_df.copy()741 gs_local = squared_error(my_df['gs_expected_score'], my_df['score'])742 gs_visit = squared_error(my_df['gs_op_expected_score'], my_df['op_score'])743 google_local = squared_error(my_df['expected_goals'], my_df['score'])744 google_visit = squared_error(my_df['op_expected_goals'], my_df['op_score'])745 gs_output = (gs_local + gs_visit)/my_df.shape[0]746 google_output = (google_local + google_visit)/my_df.shape[0]...
simple_fixture.py
Source:simple_fixture.py
1# example 4 :2# same role as setup and teardown method of unittest.TestCase3import pytest4def test_with_local_fixture(local_fixture): # local_fixture define later this code5 '''6 fixtures can be invoked simply by having a positional arg7 with the same name as fixture8 '''9 print('running test_with_local_fixture')10 assert True11# local fixture : local to module12@pytest.fixture13def local_fixture():14 '''fixtures are callables(functions) decoratred with @fixture'''15 print('doing local fixture setup stuff')16# globla fixture : other module can access17@pytest.fixture(scope='module') # important18def global_fixture():19 '''fixtures are callables(functions) decoratred with @fixture'''20 print('doing global fixture setup stuff')21########################22# in another file , to use global_fixture23# import pytest24# from simple_fixture import global_fixture25# def test_with_global_fixture1(global_fixture): # local_fixture define later this code26# print('running test_with_global_fixture')27# assert True...
03_simple_fixture_test.py
Source:03_simple_fixture_test.py
1import pytest2def test_with_local_fixture(local_fixture):3 """4 Fixtures can be invoked simple by having a positional arg5 :param local_fixture:6 :return:7 """8 print("Running test_with_local_fixture...")9 assert True10@pytest.fixture11def local_fixture():12 """13 Fixtures are callables decorated with @fixture14 :return:15 """16 print("\n(Doing local fixture setup stuff!)")17def test_with_global_fixture(global_fixture):18 """"19 Fixtures can also be shared across test files (see confest.py)20 """...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!