Best Python code snippet using locust
cross_val.py
Source:cross_val.py
1"""2Utilities for cross validation.3taken from scikits.learn4# Author: Alexandre Gramfort <alexandre.gramfort@inria.fr>,5# Gael Varoquaux <gael.varoquaux@normalesup.org>6# License: BSD Style.7# $Id$8changes to code by josef-pktd:9 - docstring formatting: underlines of headers10"""11from statsmodels.compat.python import range, lrange12import numpy as np13from itertools import combinations14################################################################################15class LeaveOneOut(object):16 """17 Leave-One-Out cross validation iterator:18 Provides train/test indexes to split data in train test sets19 """20 def __init__(self, n):21 """22 Leave-One-Out cross validation iterator:23 Provides train/test indexes to split data in train test sets24 Parameters25 ----------26 n: int27 Total number of elements28 Examples29 --------30 >>> from scikits.learn import cross_val31 >>> X = [[1, 2], [3, 4]]32 >>> y = [1, 2]33 >>> loo = cross_val.LeaveOneOut(2)34 >>> for train_index, test_index in loo:35 ... print "TRAIN:", train_index, "TEST:", test_index36 ... X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)37 ... print X_train, X_test, y_train, y_test38 TRAIN: [False True] TEST: [ True False]39 [[3 4]] [[1 2]] [2] [1]40 TRAIN: [ True False] TEST: [False True]41 [[1 2]] [[3 4]] [1] [2]42 """43 self.n = n44 def __iter__(self):45 n = self.n46 for i in range(n):47 test_index = np.zeros(n, dtype=np.bool)48 test_index[i] = True49 train_index = np.logical_not(test_index)50 yield train_index, test_index51 def __repr__(self):52 return '%s.%s(n=%i)' % (self.__class__.__module__,53 self.__class__.__name__,54 self.n,55 )56################################################################################57class LeavePOut(object):58 """59 Leave-P-Out cross validation iterator:60 Provides train/test indexes to split data in train test sets61 """62 def __init__(self, n, p):63 """64 Leave-P-Out cross validation iterator:65 Provides train/test indexes to split data in train test sets66 Parameters67 ----------68 n: int69 Total number of elements70 p: int71 Size test sets72 Examples73 --------74 >>> from scikits.learn import cross_val75 >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]]76 >>> y = [1, 2, 3, 4]77 >>> lpo = cross_val.LeavePOut(4, 2)78 >>> for train_index, test_index in lpo:79 ... print "TRAIN:", train_index, "TEST:", test_index80 ... X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)81 TRAIN: [False False True True] TEST: [ True True False False]82 TRAIN: [False True False True] TEST: [ True False True False]83 TRAIN: [False True True False] TEST: [ True False False True]84 TRAIN: [ True False False True] TEST: [False True True False]85 TRAIN: [ True False True False] TEST: [False True False True]86 TRAIN: [ True True False False] TEST: [False False True True]87 """88 self.n = n89 self.p = p90 def __iter__(self):91 n = self.n92 p = self.p93 comb = combinations(lrange(n), p)94 for idx in comb:95 test_index = np.zeros(n, dtype=np.bool)96 test_index[np.array(idx)] = True97 train_index = np.logical_not(test_index)98 yield train_index, test_index99 def __repr__(self):100 return '%s.%s(n=%i, p=%i)' % (101 self.__class__.__module__,102 self.__class__.__name__,103 self.n,104 self.p,105 )106################################################################################107class KFold(object):108 """109 K-Folds cross validation iterator:110 Provides train/test indexes to split data in train test sets111 """112 def __init__(self, n, k):113 """114 K-Folds cross validation iterator:115 Provides train/test indexes to split data in train test sets116 Parameters117 ----------118 n: int119 Total number of elements120 k: int121 number of folds122 Examples123 --------124 >>> from scikits.learn import cross_val125 >>> X = [[1, 2], [3, 4], [1, 2], [3, 4]]126 >>> y = [1, 2, 3, 4]127 >>> kf = cross_val.KFold(4, k=2)128 >>> for train_index, test_index in kf:129 ... print "TRAIN:", train_index, "TEST:", test_index130 ... X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)131 TRAIN: [False False True True] TEST: [ True True False False]132 TRAIN: [ True True False False] TEST: [False False True True]133 Notes134 -----135 All the folds have size trunc(n/k), the last one has the complementary136 """137 assert k>0, ValueError('cannot have k below 1')138 assert k<n, ValueError('cannot have k=%d greater than %d'% (k, n))139 self.n = n140 self.k = k141 def __iter__(self):142 n = self.n143 k = self.k144 j = int(np.ceil(n/k))145 for i in range(k):146 test_index = np.zeros(n, dtype=np.bool)147 if i<k-1:148 test_index[i*j:(i+1)*j] = True149 else:150 test_index[i*j:] = True151 train_index = np.logical_not(test_index)152 yield train_index, test_index153 def __repr__(self):154 return '%s.%s(n=%i, k=%i)' % (155 self.__class__.__module__,156 self.__class__.__name__,157 self.n,158 self.k,159 )160################################################################################161class LeaveOneLabelOut(object):162 """163 Leave-One-Label_Out cross-validation iterator:164 Provides train/test indexes to split data in train test sets165 """166 def __init__(self, labels):167 """168 Leave-One-Label_Out cross validation:169 Provides train/test indexes to split data in train test sets170 Parameters171 ----------172 labels : list173 List of labels174 Examples175 --------176 >>> from scikits.learn import cross_val177 >>> X = [[1, 2], [3, 4], [5, 6], [7, 8]]178 >>> y = [1, 2, 1, 2]179 >>> labels = [1, 1, 2, 2]180 >>> lol = cross_val.LeaveOneLabelOut(labels)181 >>> for train_index, test_index in lol:182 ... print "TRAIN:", train_index, "TEST:", test_index183 ... X_train, X_test, y_train, y_test = cross_val.split(train_index, \184 test_index, X, y)185 ... print X_train, X_test, y_train, y_test186 TRAIN: [False False True True] TEST: [ True True False False]187 [[5 6]188 [7 8]] [[1 2]189 [3 4]] [1 2] [1 2]190 TRAIN: [ True True False False] TEST: [False False True True]191 [[1 2]192 [3 4]] [[5 6]193 [7 8]] [1 2] [1 2]194 """195 self.labels = labels196 def __iter__(self):197 # We make a copy here to avoid side-effects during iteration198 labels = np.array(self.labels, copy=True)199 for i in np.unique(labels):200 test_index = np.zeros(len(labels), dtype=np.bool)201 test_index[labels==i] = True202 train_index = np.logical_not(test_index)203 yield train_index, test_index204 def __repr__(self):205 return '%s.%s(labels=%s)' % (206 self.__class__.__module__,207 self.__class__.__name__,208 self.labels,209 )210def split(train_indexes, test_indexes, *args):211 """212 For each arg return a train and test subsets defined by indexes provided213 in train_indexes and test_indexes214 """215 ret = []216 for arg in args:217 arg = np.asanyarray(arg)218 arg_train = arg[train_indexes]219 arg_test = arg[test_indexes]220 ret.append(arg_train)221 ret.append(arg_test)222 return ret223'''224 >>> cv = cross_val.LeaveOneLabelOut(X, y) # y making y optional and225possible to add other arrays of the same shape[0] too226 >>> for X_train, y_train, X_test, y_test in cv:227 ... print np.sqrt((model.fit(X_train, y_train).predict(X_test)228- y_test) ** 2).mean())229'''230################################################################################231#below: Author: josef-pktd232class KStepAhead(object):233 """234 KStepAhead cross validation iterator:235 Provides fit/test indexes to split data in sequential sets236 """237 def __init__(self, n, k=1, start=None, kall=True, return_slice=True):238 """239 KStepAhead cross validation iterator:240 Provides train/test indexes to split data in train test sets241 Parameters242 ----------243 n: int244 Total number of elements245 k : int246 number of steps ahead247 start : int248 initial size of data for fitting249 kall : boolean250 if true. all values for up to k-step ahead are included in the test index.251 If false, then only the k-th step ahead value is returnd252 Notes253 -----254 I don't think this is really useful, because it can be done with255 a very simple loop instead.256 Useful as a plugin, but it could return slices instead for faster array access.257 Examples258 --------259 >>> from scikits.learn import cross_val260 >>> X = [[1, 2], [3, 4]]261 >>> y = [1, 2]262 >>> loo = cross_val.LeaveOneOut(2)263 >>> for train_index, test_index in loo:264 ... print "TRAIN:", train_index, "TEST:", test_index265 ... X_train, X_test, y_train, y_test = cross_val.split(train_index, test_index, X, y)266 ... print X_train, X_test, y_train, y_test267 TRAIN: [False True] TEST: [ True False]268 [[3 4]] [[1 2]] [2] [1]269 TRAIN: [ True False] TEST: [False True]270 [[1 2]] [[3 4]] [1] [2]271 """272 self.n = n273 self.k = k274 if start is None:275 start = int(np.trunc(n*0.25)) # pick something arbitrary276 self.start = start277 self.kall = kall278 self.return_slice = return_slice279 def __iter__(self):280 n = self.n281 k = self.k282 start = self.start283 if self.return_slice:284 for i in range(start, n-k):285 train_slice = slice(None, i, None)286 if self.kall:287 test_slice = slice(i, i+k)288 else:289 test_slice = slice(i+k-1, i+k)290 yield train_slice, test_slice291 else: #for compatibility with other iterators292 for i in range(start, n-k):293 train_index = np.zeros(n, dtype=np.bool)294 train_index[:i] = True295 test_index = np.zeros(n, dtype=np.bool)296 if self.kall:297 test_index[i:i+k] = True # np.logical_not(test_index)298 else:299 test_index[i+k-1:i+k] = True300 #or faster to return np.arange(i,i+k) ?301 #returning slice should be faster in this case302 yield train_index, test_index303 def __repr__(self):304 return '%s.%s(n=%i)' % (self.__class__.__module__,305 self.__class__.__name__,306 self.n,...
entrenar.py
Source:entrenar.py
1from utilities import *2from scipy.sparse import coo_matrix, hstack3from sklearn.tree import DecisionTreeClassifier4from sklearn.cross_validation import cross_val_score, KFold5from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB6from sklearn.neighbors import KNeighborsClassifier7from sklearn import svm8from sklearn import ensemble9from sklearn.grid_search import GridSearchCV10from sklearn.metrics import fbeta_score, make_scorer11import pickle12# Leo los mails (poner los paths correctos).13ham_txt= json.load(open('../../dataset_json/ham_dev.json'))14spam_txt= json.load(open('../../dataset_json/spam_dev.json'))15# Pongo todos los mails en minusculas16ham_txt = map(lambda x: x.lower(), ham_txt)17spam_txt = map(lambda x: x.lower(), spam_txt)18# Armo el data frame19df = pd.DataFrame(ham_txt+spam_txt, columns=['text'])20# 0 = ham, 1 = spam (es necesario binarizar para poder usar el score f_0.5)21df['class'] = [0 for _ in range(len(ham_txt))]+[1 for _ in range(len(spam_txt))]22print "Lei json y arme data frame"23# Extraigo atributos simples24# Agrego feature que clasifica los mails segun tienen o no html25HTML = coo_matrix(map(hasHTML, df.text)).transpose()26#) Agrego feature que clasifica los mails segun tienen o no subject27SUBJ = coo_matrix(map(hasSubject, df.text)).transpose()28# Longitud del mail.29LEN = coo_matrix(map(len, df.text)).transpose()30# Cantidad de espacios en el mail.31SPACES = coo_matrix(map(count_spaces, df.text)).transpose()32print "Clasifique por atributos simples"33vectorizer = obtenerVectorizer()34word_freq_matrix = vectorizer.transform(df.text)35print "Arme matriz"36X = hstack([HTML, SUBJ, LEN, SPACES, word_freq_matrix]).toarray()37y = df['class']38# Definimos F-Beta score con Beta=0.539# (favorecemos precision sobre recall)40f05_scorer = make_scorer(fbeta_score, beta=0.5)41print "Defino clasificadores"42# Decision Tree43'''44print "Decision Tree"45clf = DecisionTreeClassifier(max_features = None, max_leaf_nodes = 100, min_samples_split = 2, criterion = 'gini')46kf = KFold(72000, n_folds=10, shuffle=True)47best_score = 048best_clf = 049for train_index, test_index in kf:50 print("TRAIN:", train_index, "TEST:", test_index)51 X_train, X_test = X[train_index], X[test_index]52 y_train, y_test = y[train_index], y[test_index]53 clf.fit(X_train, y_train)54 score = f05_scorer(clf, X_test, y_test)55 if score > best_score:56 best_clf = clf57 best_score = score58fout = open('dectree.pickle','w')59pickle.dump(best_clf,fout)60fout.close()61# Random Forest62print "Random Forest"63clf = ensemble.RandomForestClassifier(max_features = 0.5, max_leaf_nodes = None, min_samples_split = 4, criterion = 'gini', n_estimators = 20, n_jobs=4)64kf = KFold(72000, n_folds=10, shuffle=True)65best_score = 066best_clf = 067for train_index, test_index in kf:68 print("TRAIN:", train_index, "TEST:", test_index)69 X_train, X_test = X[train_index], X[test_index]70 y_train, y_test = y[train_index], y[test_index]71 clf.fit(X_train, y_train)72 score = f05_scorer(clf, X_test, y_test)73 if score > best_score:74 best_clf = clf75 best_score = score76fout = open('ranfor.pickle','w')77pickle.dump(clf,fout)78fout.close()79'''80# SVM81print "SVM"82clf = svm.SVC(kernel = 'rbf', C = 1, gamma = 1.0)83kf = KFold(72000, n_folds=3, shuffle=True)84best_score = 085best_clf = 086for train_index, test_index in kf:87 print("TRAIN:", train_index, "TEST:", test_index)88 X_train, X_test = X[train_index], X[test_index]89 y_train, y_test = y[train_index], y[test_index]90 clf.fit(X_train, y_train)91 score = f05_scorer(clf, X_test, y_test)92 if score > best_score:93 best_clf = clf94 best_score = score95fout = open('svm.pickle','w')96pickle.dump(clf,fout)97fout.close()98'''99# Naive Bayes100print "Gaussian NB"101clf = GaussianNB()102kf = KFold(72000, n_folds=10, shuffle=True)103best_score = 0104best_clf = 0105for train_index, test_index in kf:106 print("TRAIN:", train_index, "TEST:", test_index)107 X_train, X_test = X[train_index], X[test_index]108 y_train, y_test = y[train_index], y[test_index]109 clf.fit(X_train, y_train)110 score = f05_scorer(clf, X_test, y_test)111 if score > best_score:112 best_clf = clf113 best_score = score114fout = open('gaussianNB.pickle','w')115pickle.dump(clf,fout)116fout.close()117#########################118print "Multinomial NB"119clf = MultinomialNB(alpha = 0.25, fit_prior = False)120kf = KFold(72000, n_folds=10, shuffle=True)121best_score = 0122best_clf = 0123for train_index, test_index in kf:124 print("TRAIN:", train_index, "TEST:", test_index)125 X_train, X_test = X[train_index], X[test_index]126 y_train, y_test = y[train_index], y[test_index]127 clf.fit(X_train, y_train)128 score = f05_scorer(clf, X_test, y_test)129 if score > best_score:130 best_clf = clf131 best_score = score132fout = open('multinomialNB.pickle','w')133pickle.dump(clf,fout)134fout.close()135#######################136print "Bernoulli NB"137clf = BernoulliNB(binarize = 0.0, alpha = 0.25, fit_prior = False)138kf = KFold(72000, n_folds=10, shuffle=True)139best_score = 0140best_clf = 0141for train_index, test_index in kf:142 print("TRAIN:", train_index, "TEST:", test_index)143 X_train, X_test = X[train_index], X[test_index]144 y_train, y_test = y[train_index], y[test_index]145 clf.fit(X_train, y_train)146 score = f05_scorer(clf, X_test, y_test)147 if score > best_score:148 best_clf = clf149 best_score = score150fout = open('bernoulliNB.pickle','w')151pickle.dump(clf,fout)152fout.close()153# KNN154print "KNN"155clf = KNeighborsClassifier(n_neighbors = 1, weights = 'uniform', leaf_size = 15, algorithm = 'kd_tree')156kf = KFold(72000, n_folds=10, shuffle=True)157best_score = 0158best_clf = 0159for train_index, test_index in kf:160 print("TRAIN:", train_index, "TEST:", test_index)161 X_train, X_test = X[train_index], X[test_index]162 y_train, y_test = y[train_index], y[test_index]163 clf.fit(X_train, y_train)164 score = f05_scorer(clf, X_test, y_test)165 if score > best_score:166 best_clf = clf167 best_score = score168fout = open('knn.pickle','w')169pickle.dump(clf,fout)170fout.close()...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!