Best Python code snippet using pytest-bdd_python
rand_forr2.py
Source:rand_forr2.py
1#Joseph Day2#3/5/173#Cleaning the data and building a classifier 4import random5import csv6import pandas as pd7import pickle8import numpy as np9import sklearn10from sklearn.metrics import roc_auc_score11from sklearn.metrics import accuracy_score12from sklearn.ensemble import GradientBoostingClassifier as gbc13from sklearn.ensemble import RandomForestClassifier as rfc14df_tour = pd.read_csv('TourneyCompactResults.csv')15df_tour.drop(labels=['Daynum', 'Wscore', 'Lscore', 'Wloc', 'Numot'], inplace=True, axis=1)16df_teams = pd.read_csv('season_averages.csv')17df_seeds = pd.read_csv('TourneySeeds.csv')18def seed_to_int(seed):19 """Get just the digits from the seeding. Return as int"""20 s_int = int(seed[1:3])21 return s_int22df_seeds['n_seed'] = df_seeds.Seed.apply(seed_to_int)23df_seeds.drop(labels=['Seed'], inplace=True, axis=1) 24df_teams['fgp'] = df_teams['fgm']/df_teams['fga']25df_teams['3p'] = df_teams['fgm3']/df_teams['fga3']26df_teams['ftp'] = df_teams['ftm']/df_teams['fta']27#df_teams['seed'] = df_seeds['n_seed']28df_winseeds = df_seeds.rename(columns={'Team':'Wteam', 'n_seed':'win_seed'})29df_lossseeds = df_seeds.rename(columns={'Team':'Lteam', 'n_seed':'loss_seed'})30df_dummy = pd.merge(left=df_tour, right=df_winseeds, how='left', on=['Season', 'Wteam'])31#print(df_dummy.head)32df_concat1 = pd.merge(left=df_dummy, right=df_lossseeds, on=['Season', 'Lteam'])33df_concat1['seed_diff'] = df_concat1.win_seed - df_concat1.loss_seed34#print(df_concat.head)35#df_teams = pd.merge(left=df_teams, right=df_concat, how='left', on=['Season', 'Wteam'])36df_winteams = df_teams.rename(columns={'Team_Id':'Wteam', 'score':'w_score', 'fgp':'w_fgp', '3p':'w_3p',37 'ftp':'w_ftp', 'or':'w_or', 'dr':'w_dr', 'ast':'w_ast', 'to':'w_to', 'stl':'w_stl','blk':'w_blk',38 'pf':'w_pf'})39df_lossteams = df_teams.rename(columns={'Team_Id':'Lteam', 'score':'l_score','fgp':'l_fgp', '3p':'l_3p',40 'ftp':'l_ftp', 'or':'l_or', 'dr':'l_dr', 'ast':'l_ast', 'to':'l_to', 'stl':'l_stl','blk':'l_blk',41 'pf':'l_pf'})42df_dummy = pd.merge(left=df_tour, right=df_winteams, how='left', on=['Season', 'Wteam'])43df_concat = pd.merge(left=df_dummy, right=df_lossteams, on=['Season', 'Lteam'])44df_concat.drop(labels=['Unnamed: 0_x', 'Unnamed: 0_y', 'fgm_x', 'fgm_y', 'fga_x', 'fga_y',45 'fgm3_x', 'fgm3_y', 'fga3_x', 'fga3_y', 'ftm_x', 'ftm_y', 'fta_y', 'fta_x',46 'Team_Name_x', 'Team_Name_y'], inplace=True, axis=1) # This is the string label47df_concat = pd.merge(left=df_concat, right = df_concat1, how='left', on=['Season', 'Wteam', 'Lteam'])48#print(df_concat.head)49df_concat.drop(labels=['win_seed','loss_seed'], inplace=True, axis=1) 50df_concat['pt_diff'] = df_concat.w_score - df_concat.l_score51df_concat['ast_diff'] = df_concat.w_ast - df_concat.l_ast52df_concat['or_diff'] = df_concat.w_or - df_concat.l_or53df_concat['dr_diff'] = df_concat.w_dr - df_concat.l_dr54df_concat['to_diff'] = df_concat.w_to - df_concat.l_to55df_concat['stl_diff'] = df_concat.w_stl - df_concat.l_stl56df_concat['blk_diff'] = df_concat.w_blk - df_concat.l_blk57df_concat['pf_diff'] = df_concat.w_pf - df_concat.l_pf58df_concat['fgp_diff'] = df_concat.w_fgp - df_concat.l_fgp59df_concat['3p_diff'] = df_concat.w_3p - df_concat.l_3p60df_concat['ftp_diff'] = df_concat.w_ftp - df_concat.l_ftp61#df_concat['seed_diff'] = df_concat.w_seed - df_concat.l_seed62df_wins = df_concat[['pt_diff', 'ast_diff', 'or_diff', 'dr_diff', 'to_diff', 'stl_diff', 'blk_diff',63'pf_diff', 'fgp_diff', '3p_diff', 'ftp_diff', 'seed_diff', 'Season', 'Wteam', 'Lteam']]64df_wins['result'] = 165df_losses = -df_concat[['pt_diff', 'ast_diff', 'or_diff', 'dr_diff', 'to_diff', 'stl_diff', 'blk_diff',66'pf_diff', 'fgp_diff', '3p_diff', 'ftp_diff', 'seed_diff', 'Season', 'Wteam', 'Lteam']]67df_losses['result'] = 068df_for_predictions = pd.concat((df_wins, df_losses))69columns = df_for_predictions.columns.tolist()70columns = columns[:-4]71given_data = df_for_predictions.as_matrix(columns = columns)72target_data = np.array(df_for_predictions['result'].tolist())73clf = gbc(n_estimators = 10)74clf = clf.fit(given_data, target_data)75#76df_sample_sub = pd.read_csv('sample_submission.csv')77n_test_games = len(df_sample_sub)78def get_year_t1_t2(id):79 """Return a tuple with ints `year`, `team1` and `team2`."""80 return (int(x) for x in id.split('_'))81X_test = np.zeros(shape=(n_test_games, 12))82for ii, row in df_sample_sub.iterrows():83 year, t1, t2 = get_year_t1_t2(row.id)84 # There absolutely must be a better way of doing this!85 t1_score = df_teams[(df_teams.Team_Id == t1) & (df_teams.Season == year)][['score','ast','or','dr','to','stl','blk','pf','fgp','3p','ftp']].values[0]86 t2_score = df_teams[(df_teams.Team_Id == t2) & (df_teams.Season == year)][['score','ast','or','dr','to','stl','blk','pf','fgp','3p','ftp']].values[0]87 t1_seed = df_seeds[(df_seeds.Team == t1) & (df_seeds.Season == year)].n_seed.values[0]88 t2_seed = df_seeds[(df_seeds.Team == t2) & (df_seeds.Season == year)].n_seed.values[0]89 for i in range(0,11):90 #diff_score.append(t1_score[i] - t2_score[i])91 X_test[ii, i] = t1_score[i] - t2_score[i]92 X_test[ii,11]= t1_seed - t2_seed93preds = clf.predict_proba(X_test)94clipped_preds = np.clip(preds, 0.05, 0.95)95df_sample_sub.pred = preds96df_sample_sub.head()97df_sample_sub.to_csv('rand_forr5.csv', index=False)98def calc_errors(forest, test_file):99 '''Calculates typeI and typeII errors,100 useful in ensuring not too many of either101 '''102 clf = forest103 test_given, test_target = process(test_file, 'responded')104 predictions = clf.predict(test_given)105 typeI = 0106 typeII = 0107 total = 0108 for i in range(1, len(test_target)):109 if predictions[i] == test_target[i]:110 pass111 else:112 if predictions[i] == 'yes':113 typeI += 1114 else: 115 typeII += 1116 total+=1117 typeI = typeI/total118 typeII = typeII/total119 return typeI,typeII120def roc_auc(forest, test_file):121 '''Returns area under ROC curve for 122 classifier provided / built from test data provided123 '''124 clf = forest125 test_given, test_target = process(test_file, 'responded')126 predictions = clf.predict_proba(test_given)127 test_target = pd.DataFrame(test_target)128 test_target.replace(['yes', 'no'], [1,0], inplace=True)129 test_target = np.array(test_target[0].tolist())130 predictions = predictions[:,1]131 return roc_auc_score(test_target, predictions)132def score(forest, test_file):133 '''Function takes in a forest/gbc and a test file134 and returns various metrics for evaluation135 '''136 roc_score = roc_auc(forest, test_file)137 test_given, test_target = process(test_file, 'responded')138 predictions = forest.predict(test_given)139 140 typeI, typeII = calc_errors(forest, test_file)141 accuracy = accuracy_score(test_target, predictions)142 143 print("Roc_Auc_Score is ", roc_score)144 print("Type I Error Rate is ", typeI)145 print("Type II Error Rate is ", typeII)...
analyze.py
Source:analyze.py
1import numpy as np2ratio = 0.9; train_ratio = int(ratio*50)3feature_name = ['annual police funding in $/resident', '% of people 25 years+ with 4 yrs. of high school', '% of 16 to 19 year-olds not in highschool and not highschool graduates', '% of 18 to 24 year-olds in college', '% of people 25 years+ with at least 4 years of college']4train_x = []; test_x = []; train_total = []; test_total = []; train_violent = []; test_violent = []5with open('data.txt', 'r') as to_read:6 did_read = to_read.readlines()7 for line in did_read[:int(ratio*50)]:8 train_x.append(map(lambda x: np.float64(x), line.split()[2:]))9 train_total.append(np.float64(line.split()[0]))10 train_violent.append(np.float64(line.split()[1]))11 for line in did_read[int(ratio*50):]:12 test_x.append(map(lambda x: np.float64(x), line.split()[2:]))13 test_total.append(np.float64(line.split()[0]))14 test_violent.append(np.float64(line.split()[1]))15#print("test_total:"); print(test_total); print(type(test_total))16train_x = np.matrix([d for d in train_x], dtype = float)17test_x = np.matrix([d for d in test_x], dtype = float)18train_total = np.matrix([d for d in train_total], dtype = float)19train_violent = np.matrix([d for d in train_violent], dtype = float)20test_total = np.matrix([d for d in test_total], dtype = float)21test_violent = np.matrix([d for d in test_violent], dtype = float)22#train_x = np.matrix(train_x)23#print(train_x_temp == train_x)24#test_x = np.matrix(test_x)25#train_total = np.matrix(train_total)26#test_total = np.matrix(test_total)27#train_violent = np.matrix(train_violent)28#test_violent = np.matrix(test_violent)29#print("test_total:"); print(test_total); print(type(test_total))30train_x -= np.mean(train_x, 0)31train_x /= np.sqrt(np.sum(np.square(train_x), 0))32test_x -= np.mean(test_x, 0)33test_x /= np.sqrt(np.sum(np.square(test_x), 0))34train_total -= np.mean(train_total)35train_violent -= np.mean(train_violent)36test_total -= np.mean(test_total)37test_violent -= np.mean(test_violent)38#-------------------------------------------------------------------------------------------------------------39def calc_beta(test_given, train_given):40 beta = np.zeros((1000, 5, 1), dtype = float)41 #y = np.zeros((1000, len(test_given), 1), dtype = float)42 y = []43 mse = np.zeros((1000, 1), dtype = float)44 lambdas = np.logspace(-10, 1, 1000)45 #print('train length = {}, test length = {}'.format(len(train_x), len(test_x)))46 for i in range(len(lambdas)):47 beta[i] = (train_x.T*train_x + lambdas[i]*np.identity(5)).I*train_x.T*train_given.T48 #print('beta[i] = ', beta[i].shape, type(beta[i]))49 #print('test_given.shape = ',test_given.shape, type(test_given))50 #print('test_x.shape = ',test_x.shape, type(test_x))51 #y[i] = test_given*beta[i]52 #y[i] = test_x*beta[i]53 to_app = []54 for j in range(5):55 tmp = 056 for k in range(5):57 tmp += (float(test_x.item(j,k)) * float(beta[i][k]))58 to_app.append(tmp)59 y.append(to_app)60 #to_app = map(lambda x:[x], to_app)61 #print('to_app:', to_app, type(to_app))62 #y[i] = np.matrix(to_app)63 #print(y[i]) 64 #print('np.square(y[i].shape: '),;print(np.square(y[i]).shape)65 #print('test_given.T.shape'),; print(test_given.T.shape)66 acc = []67 for j in range(5):68 acc.append(y[i][j] - test_given.item(j))69 mse[i]= sum(np.square(acc))70 #mse[i] = sum(np.square(y[i] - test_given.T))/len(test_given)71 idx = np.argmin(mse)72 #print(mse)73 #print 'mse: ',; print(mse[idx]); print 'lambda',; print(lambdas[idx])74 return beta[idx]75#-------------------------------------------------------------------------------------------------------------76if __name__ == "__main__":77 beta = calc_beta(test_total, train_total)78 #print('test_x: '); print(test_x); print('')79 #print('beta: '); print(beta); print('')80 #print('test_total: '); print(test_total); print('')81 err = 0; abs_err = 082 print('Total overall reported crime rate per 1 million residents predictions:')83 for i in range(len(np.array(test_total)[0])):84 predicted = 085 for j in range(5):86 predicted += (beta[j] * test_x.item(i,j))87 err += (np.array(test_total)[0][i] - predicted[0])88 abs_err += abs(np.array(test_total)[0][i] - predicted[0])89 print('actual value: {}, predicted value: {}'.format(np.array(test_total)[0][i], predicted[0]))90 print('Total signed error: {}, absolute error per prediction: {}'.format(err, abs_err/5))91 print 'Prediction coefficients: '92 for i in range(5):93 print('{}) {}: {}'.format(i+1, feature_name[i], beta[i][0]))94 print('----------------------------------------------------------------------------------------------------') 95 beta = calc_beta(test_violent, train_violent)96 err = 0; abs_err = 097 print('Reported violent crime rate per 100,000 residents predictions:')98 for i in range(len(np.array(test_total)[0])):99 predicted = 0100 for j in range(5):101 predicted += (beta[j] * test_x.item(i,j))102 err += (np.array(test_violent)[0][i] - predicted[0])103 abs_err += abs(np.array(test_violent)[0][i] - predicted[0])104 print('actual value: {}, predicted value: {}'.format(np.array(test_violent)[0][i], predicted[0]))105 print('Total signed error: {}, absolute error per prediction: {}'.format(err, abs_err/5))106 print 'Prediction coefficients: '107 for i in range(5):...
main.py
Source:main.py
1import argparse2import reader3from keras.models import Sequential4from keras.layers.core import Dense, Activation, Dropout5from keras.layers.recurrent import LSTM6from keras.utils.data_utils import get_file7from keras.layers import Embedding8import json9import numpy as np10import random11import sys12import os13import shutil14import requests15parser = argparse.ArgumentParser()16parser.add_argument("--test", help="path of test.txt file")17args = parser.parse_args()18f=open('word-ix.txt', 'r')19word_to_ix=json.loads(f.read())20f.close()21model = Sequential()22model.add(Embedding(10000, output_dim=64))23model.add(LSTM(100, return_sequences=False))24model.add(Dropout(0.2))25model.add(Dense(10000))26model.add(Activation('softmax'))27model.compile(loss='categorical_crossentropy', optimizer='rmsprop')28if args.test:29 test_given=reader._file_to_word_ids(args.test, word_to_ix)30 maxlen = 1531 step = 532 sentences=[]33 next_words=[]34 for i in range(0,len(test_given)-maxlen, 1):35 sentences2 = (test_given[i: i + maxlen])36 sentences.append(sentences2)37 next_words.append((test_given[i + maxlen]))38 X_test = np.zeros((len(sentences), maxlen))39 y_test = np.zeros((len(sentences)), dtype=np.int32)40 for i, sentence in enumerate(sentences):41 for t, word in enumerate(sentence):42 X_test[i, t] = word43 y_test[i]=next_words[i]44 if os.path.isfile('weights'):45 model.load_weights('weights')46 else:47 print ("Downloading Model ......")48 url = 'https://raw.githubusercontent.com/shivam207/deep_learning/master/prog_asg3/weights/model_cnn.h5'49 response = requests.get(url, stream=True)50 with open('model_cnn.h5', 'w') as out_file:51 shutil.copyfileobj(response.raw, out_file)52 del response53 preds = model.predict(X_test, verbose=0)54 loss = 055 for i, pred in enumerate(preds):56 loss += -pred[y_test[i]]*np.log(pred[y_test[i]])57 print loss58else:59 train_data, valid_data, test_data, vocabulary, word_to_ix = reader.ptb_raw_data("data")60 words = set(train_data)61 words.union(valid_data)62 words.union(test_data)63 maxlen = 1564 step = 565 print("maxlen:",maxlen,"step:", step)66 sentences = []67 next_words = []68 sentences2=[]69 for i in range(0,len(train_data)-maxlen, step):70 sentences2 = (train_data[i: i + maxlen])71 sentences.append(sentences2)72 next_words.append((train_data[i + maxlen]))73 print('nb sequences(length of sentences):', len(sentences))74 print("length of next_word",len(next_words))75 print('Vectorization...')76 X = np.zeros((len(sentences), maxlen))77 y = np.zeros((len(sentences), len(words)), dtype=np.bool)78 for i, sentence in enumerate(sentences):79 for t, word in enumerate(sentence):80 #print(i,t,word)81 X[i, t] = word82 y[i, next_words[i]] = 183 for iteration in range(1, 300):84 print()85 print('-' * 50)86 print('Iteration', iteration)87 model.fit(X, y, batch_size=200 ,nb_epoch=2)...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!