Best Python code snippet using lettuce_webdriver_python
CRScope.py
Source:CRScope.py
1#!/usr/bin/python2import os3import sys4import time5import pandas as pd6import numpy as np7from multiprocessing import Process, Manager8from multiprocessing.managers import BaseManager9from sklearn.model_selection import TimeSeriesSplit, train_test_split10from imblearn.over_sampling import *11from imblearn.under_sampling import *12sys.path.append('./src')13import arg14import util15from model import *16from case import Case17from log import Logger18from data import Dataset19from docs import Docx, Xlsx20from joblib import dump, load21def load_dataset(file_name):22 dataset = Dataset(logger, file_name)23 columns = ['date', 'security', 'feature']24 dataset.drop_na(columns)25 dataset.factorize_label()26 dataset.sort_date()27 return dataset28def preprocess_data(df, flag_preprocess):29 df['crash_function_full'] = df.crash_function.str.split('(').str[0].str.split('<').str[0]30 def preprocess_crash_function(df):31 df['crash_function_namespace'], df['crash_function_class'], df['crash_function_func'] = \32 zip(*df.crash_function.apply(lambda x: util.parse_function(x)))33 return df34 split_backtrace = df.backtrace.str.split(' - ')35 full_bt = []36 for backtrace in split_backtrace:37 full_bt.append(' - '.join([bt.split('(')[0].split('<')[0] for bt in backtrace]))38 df['backtrace_full'] = full_bt39 def preprocess_backtrace(df):40 name_bt = []41 for backtrace in split_backtrace:42 name_bt.append(' - '.join([util.parse_function(bt)[2] for bt in backtrace]))43 df['backtrace_func'] = name_bt44 return df45 if flag_preprocess:46 df = preprocess_crash_function(df)47 df = preprocess_backtrace(df)48 return df49def extract_features(df_train, df_test, flag_preprocess, flag_tfidvector, flag_countvector):50 if True:51 from sklearn.preprocessing import LabelEncoder52 from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer53 54 df_train = preprocess_data(df_train, flag_preprocess)55 df_test = preprocess_data(df_test, flag_preprocess)56 train_features = pd.DataFrame()57 test_features = pd.DataFrame()58 label = LabelEncoder()59 tfidf = TfidfVectorizer(sublinear_tf=True, norm='l2', ngram_range=(1, 5), stop_words='english')60 count = CountVectorizer(ngram_range=(1, 5), stop_words='english')61 def update(new, label_encoder):62 label_encoder.classes_ = np.append(label_encoder.classes_, new)63 return len(label_encoder.classes_) - 164 def process_engine(train_features, test_features):65 train_features['engine'] = label.fit_transform(df_train['engine'])66 test_features['engine'] = df_test['engine'].map(lambda s: update(s, label) if not s in label.classes_ else np.where(label.classes_ == s)[0][0])67 return train_features, test_features68 def process_arch(train_features, test_features):69 train_features['arch'] = label.fit_transform(df_train['arch'])70 test_features['arch'] = df_test['arch'].map(lambda s: update(s, label) if not s in label.classes_ else np.where(label.classes_ == s)[0][0])71 return train_features, test_features72 def process_mode(train_features, test_features):73 train_features['mode'] = label.fit_transform(df_train['mode'])74 test_features['mode'] = df_test['mode'].map(lambda s: update(s, label) if not s in label.classes_ else np.where(label.classes_ == s)[0][0])75 return train_features, test_features76 def process_signal(train_features, test_features):77 train_features['signal'] = label.fit_transform(df_train['signal'])78 test_features['signal'] = df_test['signal'].map(lambda s: update(s, label) if not s in label.classes_ else np.where(label.classes_ == s)[0][0])79 return train_features, test_features80 def process_crash_type(train_features, test_features):81 train_features['crash_type'] = label.fit_transform(df_train['crash_type'])82 test_features['crash_type'] = df_test['crash_type'].map(lambda s: update(s, label) if not s in label.classes_ else np.where(label.classes_ == s)[0][0])83 return train_features, test_features84 def process_crash_instruction(train_features, test_features):85 if flag_tfidvector:86 train_crash_instruction_tfidf = tfidf.fit_transform(df_train['crash_instruction']).toarray()87 test_crash_instruction_tfidf = tfidf.transform(df_test['crash_instruction']).toarray()88 names = tfidf.get_feature_names()89 for i in range(len(names)):90 train_features['inst_tfid-%s' %names[i]] = [row[i] for row in train_crash_instruction_tfidf]91 test_features['inst_tfid-%s' %names[i]] = [row[i] for row in test_crash_instruction_tfidf]92 if flag_countvector:93 train_crash_instruction_count = count.fit_transform(df_train['crash_instruction']).toarray()94 test_crash_instruction_count = count.transform(df_test['crash_instruction']).toarray()95 names = count.get_feature_names()96 for i in range(len(names)):97 train_features['inst_count-%s' %names[i]] = [row[i] for row in train_crash_instruction_count]98 test_features['inst_count-%s' %names[i]] = [row[i] for row in test_crash_instruction_count]99 return train_features, test_features100 def process_crash_function(train_features, test_features):101 if flag_tfidvector:102 train_crash_function_tfidf = tfidf.fit_transform(df_train['crash_function_full']).toarray()103 test_crash_function_tfidf = tfidf.transform(df_test['crash_function_full']).toarray()104 names = tfidf.get_feature_names()105 for i in range(len(names)):106 train_features['func_full_tfid-%s' %names[i]] = [row[i] for row in train_crash_function_tfidf]107 test_features['func_full_tfid-%s' %names[i]] = [row[i] for row in test_crash_function_tfidf]108 if flag_countvector:109 train_crash_function_count = count.fit_transform(df_train['crash_function_full']).toarray()110 test_crash_function_count = count.transform(df_test['crash_function_full']).toarray()111 names = count.get_feature_names()112 for i in range(len(names)):113 train_features['func_full_count-%s' %names[i]] = [row[i] for row in train_crash_function_count]114 test_features['func_full_count-%s' %names[i]] = [row[i] for row in test_crash_function_count]115 if flag_preprocess:116 if flag_tfidvector:117 train_crash_function_tfidf = tfidf.fit_transform(df_train['crash_function_func']).toarray()118 test_crash_function_tfidf = tfidf.transform(df_test['crash_function_func']).toarray()119 names = tfidf.get_feature_names()120 for i in range(len(names)):121 train_features['func_tfid-%s' %names[i]] = [row[i] for row in train_crash_function_tfidf]122 test_features['func_tfid-%s' %names[i]] = [row[i] for row in test_crash_function_tfidf]123 if flag_countvector:124 train_crash_function_count = count.fit_transform(df_train['crash_function_func']).toarray()125 test_crash_function_count = count.transform(df_test['crash_function_func']).toarray()126 names = count.get_feature_names()127 for i in range(len(names)):128 train_features['func_count-%s' %names[i]] = [row[i] for row in train_crash_function_count]129 test_features['func_count-%s' %names[i]] = [row[i] for row in test_crash_function_count]130 return train_features, test_features131 def process_backtrace(train_features, test_features):132 if flag_tfidvector:133 train_backtrace_tfidf = tfidf.fit_transform(df_train['backtrace_full']).toarray()134 test_backtrace_tfidf = tfidf.transform(df_test['backtrace_full']).toarray()135 names = tfidf.get_feature_names()136 for i in range(len(names)):137 train_features['bt_full_tfid-%s' %names[i]] = [row[i] for row in train_backtrace_tfidf]138 test_features['bt_full_tfid-%s' %names[i]] = [row[i] for row in test_backtrace_tfidf]139 if flag_countvector:140 train_backtrace_count = count.fit_transform(df_train['backtrace_full']).toarray()141 test_backtrace_count = count.transform(df_test['backtrace_full']).toarray()142 names = count.get_feature_names()143 for i in range(len(names)):144 train_features['bt_full_count-%s' %names[i]] = [row[i] for row in train_backtrace_count]145 test_features['bt_full_count-%s' %names[i]] = [row[i] for row in test_backtrace_count]146 if flag_preprocess:147 if flag_tfidvector:148 train_backtrace_tfidf = tfidf.fit_transform(df_train['backtrace_func']).toarray()149 test_backtrace_tfidf = tfidf.transform(df_test['backtrace_func']).toarray()150 names = tfidf.get_feature_names()151 for i in range(len(names)):152 train_features['bt_tfid-%s' %names[i]] = [row[i] for row in train_backtrace_tfidf]153 test_features['bt_tfid-%s' %names[i]] = [row[i] for row in test_backtrace_tfidf]154 if flag_countvector:155 train_backtrace_count = count.fit_transform(df_train['backtrace_func']).toarray()156 test_backtrace_count = count.transform(df_test['backtrace_func']).toarray()157 names = count.get_feature_names()158 for i in range(len(names)):159 train_features['bt_count-%s' %names[i]] = [row[i] for row in train_backtrace_count]160 test_features['bt_count-%s' %names[i]] = [row[i] for row in test_backtrace_count]161 return train_features, test_features162 def process_exniffer(train_features, test_features):163 import ast164 for i in range(1, 45):165 train_features['feature%d' %i] = df_train['feature'].apply(lambda x: i in ast.literal_eval(x))166 train_features['feature%d' %i] = train_features['feature%d' %i].factorize(sort=True)[0]167 test_features['feature%d' %i] = df_test['feature'].apply(lambda x: i in ast.literal_eval(x))168 test_features['feature%d' %i] = test_features['feature%d' %i].factorize(sort=True)[0]169 return train_features, test_features170 train_features, test_features = process_engine(train_features, test_features)171 train_features.index = df_train.index172 test_features.index = df_test.index173 train_features, test_features = process_arch(train_features, test_features)174 train_features, test_features = process_mode(train_features, test_features)175 train_features, test_features = process_signal(train_features, test_features)176 train_features, test_features = process_crash_type(train_features, test_features)177 train_features, test_features = process_crash_instruction(train_features, test_features)178 train_features, test_features = process_crash_function(train_features, test_features)179 train_features, test_features = process_backtrace(train_features, test_features)180 train_features, test_features = process_exniffer(train_features, test_features)181 train_labels = df_train.security_id182 test_labels = df_test.security_id183 names = train_features.columns.values184 return train_features, test_features, train_labels, test_labels, names185def select_features(features, labels, names):186 corr = features.corr()187 columns = np.full((corr.shape[0],), True, dtype=bool)188 for i in range(corr.shape[0]):189 for j in range(i+1, corr.shape[0]):190 if corr.iloc[i,j] >= 0.9:191 if columns[j]:192 columns[j] = False193 names = features.columns[columns]194 features = features[names]195 from sklearn.feature_selection import SelectKBest, chi2196 if features.shape[1] > 100:197 k = 100198 else:199 k = 'all'200 selected_features = SelectKBest(chi2, k=k).fit(features, labels).get_support()201 names = [names[i] for i in range(len(selected_features)) if selected_features[i]]202 features = features[names]203 features = features.values204 return features, names205def sampling(X, y, over, option):206 if over == 'over':207 if option == 1:208 X_sampled, y_sampled = RandomOverSampler().fit_sample(X, y)209 elif option == 2:210 X_sampled, y_sampled = ADASYN().fit_sample(X, y)211 elif option == 3:212 X_sampled, y_sampled = SMOTE().fit_sample(X, y)213 elif over == 'under':214 if option == 1:215 X_sampled, y_sampled = RandomUnderSampler().fit_sample(X, y)216 elif option == 2:217 X_sampled, y_sampled = TomekLinks().fit_sample(X, y)218 elif option == 3:219 X_sampled, y_sampled = CondensedNearestNeighbour().fit_sample(X, y)220 elif option == 4:221 X_sampled, y_sampled = OneSidedSelection().fit_sample(X, y)222 elif option == 5:223 X_sampled, y_sampled = EditedNearestNeighbours().fit_sample(X, y)224 elif option == 6:225 X_sampled, y_sampled = NeighbourhoodCleaningRule().fit_sample(X, y)226 return X_sampled, y_sampled227def drop_features(names, choice):228 s_names = []229 for name in names:230 if 'feature' in name:231 if choice == 'exniffer' or choice == 'combi':232 s_names.append(name)233 else:234 if choice == 'crscope' or choice == 'combi':235 s_names.append(name)236 return s_names237def run(case, model, set_list, names, xlsx):238 model.learn(logger, set_list, names, 4, args.engine)239 model.log_data()240 case.add_accuracy(model.name, model.accuracy_score)241 case.add_aucs(model.name, model.roc_auc_score)242 case.add_tprs(model.name, model.tprs)243 xlsx.write(case.get_name(), model)244 print model.name245 print model.accuracy_score246 print model.roc_auc_score247if __name__ == "__main__":248 start_time = time.time()249 BaseManager.register('Logger', Logger)250 BaseManager.register('Case', Case)251 BaseManager.register('Xlsx', Xlsx)252 manager = BaseManager()253 manager.start()254 # parse arguments255 args = arg.parse(sys.argv[1:])256 # create logger257 logger = manager.Logger('%s.v%s' %(args.engine, args.version))258 259 # load dataset260 dataset = load_dataset(args.datafile.name)261 # create docx, xlsx for report262 docx = Docx(dataset, args.engine, args.version)263 xlsx = manager.Xlsx(args.engine, args.version)264 # create models265 cases = [266 # manager.Case(False, True, False),267 # manager.Case(False, False, True),268 # manager.Case(False, True, True),269 # manager.Case(True, True, False),270 # manager.Case(True, False, True),271 manager.Case(True, True, True)272 ]273 models = [274 MyLogisticRegression(),275 MyRandomForestClassifier(),276 MyMultinomialNB(),277 MyDecisionTreeClassifier(),278 MyLinearSVC(),279 MyMLPClassifier(),280 ]281 label_list = [model.name for model in models]282 if not os.path.exists('./dump/%s' %(args.engine)):283 os.makedirs('./dump/%s' %(args.engine))284 n = 4285 tscv = TimeSeriesSplit(n_splits=n)286 procs = []287 for case in cases:288 case.init_array(label_list)289 set_list = []290 names_list = []291 dump_file = './dump/%s/%s_%s_%s.dataset' %(args.engine, case.get_flag_preprocess(), case.get_flag_tfidvector(), case.get_flag_countvector())292 if os.path.isfile(dump_file):293 set_list, names_list = load(dump_file)294 else:295 for i, [train_index, test_index] in enumerate(tscv.split(dataset.df)):296 train_features, test_features, train_labels, test_labels, names \297 = extract_features(dataset.df.iloc[train_index], dataset.df.iloc[test_index], case.get_flag_preprocess(), case.get_flag_tfidvector(), case.get_flag_countvector())298 X_sample1, y_sample1 = sampling(train_features, train_labels, args.sampling, args.option)299 X_sample2, y_sample2 = sampling(test_features, test_labels, args.sampling, args.option)300 new_X = np.vstack([X_sample1, X_sample2])301 new_y = np.append(y_sample1, y_sample2)302 X_train, X_test, y_train, y_test = train_test_split(new_X, new_y, test_size=len(test_index), stratify=new_y)303 set_list.append([X_train, X_test, y_train, y_test])304 names_list.append(names)305 dump([set_list, names_list], dump_file)306 dump_file = './dump/%s/%s_%s_%s.%s_dataset' %(args.engine, case.get_flag_preprocess(), case.get_flag_tfidvector(), case.get_flag_countvector(), args.choice)307 if os.path.isfile(dump_file):308 new_set_list, new_names_list, info = load(dump_file)309 else:310 new_set_list = []311 new_names_list = []312 info = ['', '']313 for [X_train, X_test, y_train, y_test], names in zip(set_list, names_list):314 s_names = drop_features(names, args.choice)315 train_df = pd.DataFrame(X_train, columns=names)316 test_df = pd.DataFrame(X_test, columns=names)317 if args.choice == 'exniffer':318 ss_names = s_names319 new_X_train = train_df[s_names].values320 new_X_test = test_df[s_names].values321 else:322 new_X_train, ss_names = select_features(train_df[s_names], y_train, s_names)323 new_X_test = test_df[ss_names].values324 new_set_list.append([new_X_train, new_X_test, y_train, y_test])325 new_names_list.append(ss_names)326 info[0] += '%s / %s\n' %(str(X_train.shape), str(X_test.shape))327 info[1] += '%s / %s\n' %(str(new_X_train.shape), str(new_X_test.shape))328 dump([new_set_list, new_names_list, info], dump_file)329 330 for model in models:331 proc = Process(target=run, name=model.name, args=(case, model, new_set_list, new_names_list, xlsx, ))332 procs.append(proc)333 proc.start()334 for proc in procs:335 proc.join()336 case.draw(docx.image_dir)337 docx.write(case.get_name(), case.get_figname(), info)338 xlsx.reset_col()339 docx.close()340 xlsx.close()...
lgbm_model.py
Source:lgbm_model.py
1import numpy as np2import pandas as pd3from src.data import preprocess as pp4from sklearn.model_selection import KFold5import lightgbm as lgb6import gc7def lightgbm_model(training_features, test_features, n_folds=3):8 """Light gradient boosting model with cross validation.9 Input parameters10 training_features (pd.DataFrame):11 df containing the training_set with Target values.12 test_features (pd.DataFrame):13 df containing the testings features14 n_folds (Integer):15 sets the number of desired folds for the cross validation16 Return17 submit (pd.DataFrame):18 df with `SK_ID_CURR` and `TARGET` probabilities of model prediction19 """20 # Extracting ID and Target21 test_id = test_features['SK_ID_CURR']22 training_labels = training_features['TARGET']23 # Deleting the ID and Target columns24 training_features = training_features.drop(columns=['SK_ID_CURR', 'TARGET'])25 test_features = test_features.drop(columns=['SK_ID_CURR'])26 training_features, test_features = training_features.align(test_features, join='inner', axis=1)27 # Encoding categorical values and imputing and scaling the dataframes28 training_features, test_features = pp.encode_categorical(training_features, test_features)29 training_features, test_features = pp.impute(training_features, test_features)30 training_features, test_features = pp.scale(training_features, test_features)31 # Create the kfold object32 k_fold = KFold(n_splits=n_folds, shuffle=True, random_state=100)33 # Empty array for test predictions34 prediction_test = np.zeros(test_features.shape[0])35 # For loop for iterating through the defined folds36 for t_index, v_index in k_fold.split(training_features):37 # Training data and validation data for the fold38 train_features, train_labels = training_features[t_index], training_labels[t_index]39 valid_features, valid_labels = training_features[v_index], training_labels[v_index]40 # Building the model - parameters were calculated with bayesian optimization41 classifier = lgb.LGBMClassifier(n_estimators=10309, objective='binary',42 class_weight='balanced', learning_rate=0.0192,43 max_depth=7,44 min_child_weight=49,45 min_split_gain=0.0803,46 num_leaves=33, random_state=50,47 reg_alpha=0.1, reg_lambda=0.1,48 subsample=0.8, n_jobs=-1)49 # fitting the model50 classifier.fit(train_features, train_labels, eval_metric='auc',51 eval_set=[(valid_features, valid_labels), (train_features, train_labels)],52 eval_names=['valid', 'train'],53 early_stopping_rounds=100, verbose=200)54 best_iteration = classifier.best_iteration_55 # Make predictions56 prediction_test += classifier.predict_proba(test_features,57 num_iteration=best_iteration)[:, 1] / k_fold.n_splits58 # Save the model59 classifier.booster_.save_model('./model/lgbm_classifier.txt', num_iteration=best_iteration)60 # Cleaning up memory61 gc.enable()62 del classifier, train_features, valid_features63 gc.collect()64 # create the result dataframe for the submission65 submit = pd.DataFrame({'SK_ID_CURR': test_id, 'TARGET': prediction_test})...
d2v_test.py
Source:d2v_test.py
1#!/usr/bin/env python32from src.libsvm.python.svmutil import *3import random4import json5import glob6import sys7import pdb8import pylab as plt910train_features = []11train_labels = []12for inp in range(1000):13 train_features.append([random.random(),random.random()])14 if train_features[-1][0] * train_features[-1][0] - train_features[-1][1] * train_features[-1][1] < 0:15 train_labels.append(0)16 else:17 train_labels.append(1)18model = svm_train(train_labels,train_features, "-s 0 -t 2")1920test_features = []21test_labels = []22for inp in range(1000):23 test_features.append([random.random(),random.random()])24 if test_features[-1][0] * test_features[-1][0] - test_features[-1][1] * test_features[-1][1] < 0:25 test_labels.append(0)26 else:27 test_labels.append(1)2829predictions,[acc,mse,cor], oth = svm_predict(test_labels,test_features,model)303132all = 033cor = 034j=035for i in range(len(test_features)):36 all += 137 if test_features[i][0] * test_features[i][0] - test_features[i][1] * test_features[i][1] < 0:38 if predictions[i] == 0:39 cor += 140 else:41 if predictions[i] == 1:42 cor+=143
...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!