How to use test_inner method in hypothesis

Best Python code snippet using hypothesis

regression.py

Source:regression.py Github

copy

Full Screen

1import numpy as np2import sklearn3from sklearn.linear_model import ARDRegression, LinearRegression4from sklearn.metrics import roc_curve, auc5import sklearn.linear_model6import util7import metrics as ranking_metrics8import predict91011def ARDRegression_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options):12 '''13 '''14 clf = ARDRegression()15 clf.fit(X[train], y[train][:, 0])16 y_pred = clf.predict(X[test])[:, None]17 return y_pred, clf181920def train_linreg_model(alpha, l1r, learn_options, fold, X, y, y_all):21 '''22 fold is something like train_inner (boolean array specifying what is in the fold)23 '''24 if learn_options["penalty"] == "L2":25 clf = sklearn.linear_model.Ridge(alpha=alpha, fit_intercept=True, normalize=True, copy_X=True, max_iter=None, tol=0.001, solver='auto')26 weights = get_weights(learn_options, fold, y, y_all)27 clf.fit(X[fold], y[fold], sample_weight=weights)28 elif learn_options["penalty"] == 'EN' or learn_options["penalty"] == 'L1':29 if learn_options["loss"] == "squared":30 clf = sklearn.linear_model.ElasticNet(alpha=alpha, l1_ratio=l1r, fit_intercept=True, normalize=True, max_iter=3000)31 elif learn_options["loss"] == "huber":32 clf = sklearn.linear_model.SGDRegressor('huber', epsilon=0.7, alpha=alpha,33 l1_ratio=l1r, fit_intercept=True, n_iter=10,34 penalty='elasticnet', shuffle=True)35 clf.fit(X[fold], y[fold])36 return clf373839def logreg_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options):40 '''41 (L1/L2 penalized) logistic reggresion using scikitlearn42 '''4344 assert len(np.unique(y)) <= 2, "if using logreg need binary targets"45 assert learn_options["weighted"] is None, "cannot do weighted Log reg"46 assert learn_options['feature_select'] is False, "cannot do feature selection yet in logistic regression--see linreg_on_fold to implement"4748 cv, n_folds = set_up_folds(learn_options, y_all.iloc[train])4950 assert learn_options['penalty'] == "L1" or learn_options['penalty'] == "L2", "can only use L1 or L2 with logistic regression"5152 performance = np.zeros((len(learn_options["alpha"]), 1))53 # degenerate_pred = np.zeros((len(learn_options["alpha"])))54 for train_inner, test_inner in cv:55 for i, alpha in enumerate(learn_options["alpha"]):56 clf = sklearn.linear_model.LogisticRegression(penalty=learn_options['penalty'], dual=False,57 fit_intercept=True, class_weight='auto', tol=0.001, C=1.0/alpha)58 clf.fit(X[train][train_inner], y[train][train_inner].flatten())59 tmp_pred = clf.predict(X[train][test_inner])6061 if learn_options["training_metric"] == "AUC":62 fpr, tpr, _ = roc_curve(y_all[learn_options["ground_truth_label"]][train][test_inner], tmp_pred)63 assert ~np.any(np.isnan(fpr)), "found nan fpr"64 assert ~np.any(np.isnan(tpr)), "found nan tpr"65 tmp_auc = auc(fpr, tpr)66 performance[i] += tmp_auc67 else:68 raise Exception("can only use AUC metric for cv with classification")6970 performance /= n_folds7172 max_score_ind = np.where(performance == np.nanmax(performance))73 assert max_score_ind != len(performance), "enlarge alpha range as hitting max boundary"7475 # in the unlikely event of tied scores, take the first one.76 if len(max_score_ind[0]) > 1:77 max_score_ind = [max_score_ind[0][0], max_score_ind[1][0]]7879 best_alpha = learn_options["alpha"][max_score_ind[0]]8081 print "\t\tbest alpha is %f from range=%s" % (best_alpha, learn_options["alpha"][[0, -1]])82 max_perf = np.nanmax(performance)8384 if max_perf < 0.0:85 raise Exception("performance is negative")8687 print "\t\tbest performance is %f" % np.nanmax(performance)8889 clf = sklearn.linear_model.LogisticRegression(penalty=learn_options['penalty'],90 dual=False, fit_intercept=True, class_weight='auto',91 tol=0.001, C=1.0/best_alpha)92 clf.fit(X[train], y[train].flatten())93 y_pred = clf.predict(X[test])94 y_pred = y_pred[:, None]95 return y_pred, clf969798def linreg_on_fold(feature_sets, train, test, y, y_all, X, dim, dimsum, learn_options):99 '''100 linreg using scikitlearn, using more standard regression models with penalization requiring101 nested-cross-validation102 '''103104 if learn_options["weighted"] is not None and (learn_options["penalty"] != "L2" or learn_options["method"] != "linreg"):105 raise NotImplementedError("weighted prediction not implemented for any methods by L2 at the moment")106107 cv, n_folds = set_up_folds(learn_options, y_all.iloc[train])108109 if learn_options['penalty'] == "L1":110 l1_ratio = [1.0]111 elif learn_options['penalty'] == "L2":112 l1_ratio = [0.0]113 elif learn_options['penalty'] == "EN": # elastic net114 l1_ratio = np.linspace(0.0, 1.0, 20)115116 performance = np.zeros((len(learn_options["alpha"]), len(l1_ratio)))117 degenerate_pred = np.zeros((len(learn_options["alpha"])))118 for train_inner, test_inner in cv:119 for i, alpha in enumerate(learn_options["alpha"]):120 for j, l1r in enumerate(l1_ratio):121 clf = train_linreg_model(alpha, l1r, learn_options, train_inner, X[train], y[train], y_all.iloc[train])122 if learn_options["feature_select"]:123 clf, tmp_pred = feature_select(clf, learn_options, test_inner, train_inner, X[train], y[train])124 else:125 tmp_pred = clf.predict(X[train][test_inner])126127 if learn_options["training_metric"] == "AUC":128 fpr, tpr, _ = roc_curve(y_all[learn_options["ground_truth_label"]][train][test_inner], tmp_pred)129 assert ~np.any(np.isnan(fpr)), "found nan fpr"130 assert ~np.any(np.isnan(tpr)), "found nan tpr"131 tmp_auc = auc(fpr, tpr)132 performance[i, j] += tmp_auc133134 elif learn_options['training_metric'] == 'spearmanr':135 spearman = util.spearmanr_nonan(y_all[learn_options['ground_truth_label']][train][test_inner], tmp_pred.flatten())[0]136 performance[i, j] += spearman137138 elif learn_options['training_metric'] == 'score':139 performance[i, j] += clf.score(X[test_inner], y_all[learn_options['ground_truth_label']][train][test_inner])140141 elif learn_options["training_metric"] == "NDCG":142 assert "thresh" not in learn_options["ground_truth_label"], "for NDCG must not use thresholded ranks, but pure ranks"143144 # sorted = tmp_pred[np.argsort(y_all[ground_truth_label].values[test_inner])[::-1]].flatten()145 # sortedgt = np.sort(y_all[ground_truth_label].values[test_inner])[::-1].flatten()146 # tmp_perf = ranking_metrics.ndcg_at_k_ties(sorted, learn_options["NDGC_k"], sortedgt)147 tmp_truth = y_all[learn_options["ground_truth_label"]].values[train][test_inner].flatten()148 tmp_perf = ranking_metrics.ndcg_at_k_ties(tmp_truth, tmp_pred.flatten(), learn_options["NDGC_k"])149 performance[i, j] += tmp_perf150151 degenerate_pred_tmp = len(np.unique(tmp_pred)) < len(tmp_pred)/2.0152 degenerate_pred[i] += degenerate_pred_tmp153154 # sanity checking metric wrt ties, etc.155 # rmse = np.sqrt(np.mean((tmp_pred - tmp_truth)**2))156 tmp_pred_r, tmp_truth_r = ranking_metrics.rank_data(tmp_pred, tmp_truth)157 # rmse_r = np.sqrt(np.mean((tmp_pred_r-tmp_truth_r)**2))158159 performance /= n_folds160161 max_score_ind = np.where(performance == np.nanmax(performance))162 assert max_score_ind != len(performance), "enlarge alpha range as hitting max boundary"163 # assert degenerate_pred[max_score_ind[0][0]]==0, "found degenerate predictions at max score"164165 # in the unlikely event of tied scores, take the first one.166 if len(max_score_ind[0]) > 1:167 max_score_ind = [max_score_ind[0][0], max_score_ind[1][0]]168169 best_alpha, best_l1r = learn_options["alpha"][max_score_ind[0]], l1_ratio[max_score_ind[1]]170171 print "\t\tbest alpha is %f from range=%s" % (best_alpha, learn_options["alpha"][[0, -1]])172 if learn_options['penalty'] == "EN":173 print "\t\tbest l1_ratio is %f from range=%s" % (best_l1r, l1_ratio[[0, -1]])174 max_perf = np.nanmax(performance)175176 if max_perf < 0.0:177 raise Exception("performance is negative")178179 print "\t\tbest performance is %f" % max_perf180181 clf = train_linreg_model(best_alpha, l1r, learn_options, train, X, y, y_all)182 if learn_options["feature_select"]:183 raise Exception("untested in a long time, should double check")184 clf, y_pred = feature_select(clf, learn_options, test, train, X, y)185 else:186 y_pred = clf.predict(X[test])187188 if learn_options["penalty"] != "L2":189 y_pred = y_pred[:, None]190191 return y_pred, clf192193194def feature_select(clf, learn_options, test_inner, train_inner, X, y):195 assert not learn_options["weighted"] is not None, "cannot currently do feature selection with weighted regression"196 assert learn_options["loss"] is not "huber", "won't use huber loss function with feature selection"197 non_zero_coeff = (clf.coef_ != 0.0)198 if non_zero_coeff.sum() > 0:199 clf = LinearRegression()200 clf.fit(X[train_inner][:, non_zero_coeff.flatten()], y[train_inner])201 tmp_pred = clf.predict(X[test_inner][:, non_zero_coeff.flatten()])202 else:203 tmp_pred = np.ones_like(test_inner)204 return clf, tmp_pred205206207def get_weights(learn_options, fold, y, y_all):208 '''209 fold is an object like train_inner which is boolean for which indexes are in the fold210 '''211 weights = None212 if learn_options["weighted"] == "variance":213 weights = 1.0/y_all["variance"].values[fold]214 elif learn_options["weighted"] == "ndcg":215 # DCG: r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))216 N = len(fold)217 r = np.ones(N)218 discount = np.concatenate((np.array([r[0]]), r[1:] / np.log2(np.arange(2, r.size + 1))))[::1]219 ind = np.argsort(y[fold], axis=0).flatten()220 weights = np.ones(len(ind))221 weights[ind] = discount222 elif learn_options["weighted"] == "rank":223 N = len(y[fold])224 inverse_ranks = (np.arange(N) + 1.0)[::-1]225 ind = np.argsort(y[fold], axis=0).flatten()226 weights = np.ones(len(ind))227 weights[ind] = inverse_ranks228 elif learn_options["weighted"] == "score":229 N = len(y[fold])230 score = y[fold] + np.abs(np.min(y[fold]))231 ind = np.argsort(y[fold], axis=0).flatten()232 weights = np.ones(len(ind))233 weights[ind] = score234 elif learn_options["weighted"] == "random":235 N = len(y[fold])236 weights = np.random.rand(N)237 elif learn_options["weighted"] is not None:238 raise Exception("invalid weighted type, %s" % learn_options["weighted"])239 # plt.plot(weights, y[train_inner],'.')240 return weights241242243def set_up_folds(learn_options, y):244 if learn_options["cv"] == "stratified":245 label_encoder = sklearn.preprocessing.LabelEncoder()246 label_encoder.fit(y['Target gene'].values)247 gene_classes = label_encoder.transform(y['Target gene'].values)248 n_folds = len(np.unique(gene_classes))249 cv = sklearn.cross_validation.StratifiedKFold(gene_classes, n_folds=n_folds, shuffle=True)250 elif learn_options["cv"] == "gene":251 gene_list = np.unique(y['Target gene'].values)252 cv = []253 for gene in gene_list:254 cv.append(predict.get_train_test(gene, y))255 n_folds = len(cv) ...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run hypothesis automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful