TestMu 2025 - The Largest Virtual Software Testing Conference is Back | 50,000+ Attendees

How to use copy_data method in autotest

Best Python code snippet using autotest_python

_1_encode_cat_features.py

1# -*- coding: utf-8 -*-2import pandas as pd3import numpy as np4import scipy as sc5import scipy.sparse as sp6from sklearn.utils import check_random_state 7import pylab 8import sys9import time10# sys.path.append('/home/zzhang/Downloads/xgboost/wrapper')11import xgboost as xgb12from joblib import dump, load, Parallel, delayed13import utils14from utils import *15raw_data_path = utils.raw_data_path16tmp_data_path = utils.tmp_data_path17train_data = pd.read_csv(raw_data_path + "train.csv", nrows=1e5)18test_data = pd.read_csv(raw_data_path + "test.csv", nrows=1e5)19# train_data = test_data.copy()20# ä»è®ç»æ ·æ¬ä¸éæºæ½å utils.sample_pct çæ ·æ¬æ¥è®ç», 1.0 è¡¨ç¤ºå¨é¨,21if utils.sample_pct < 1.0:22    np.random.seed(999)23    r1 = np.random.uniform(0, 1, train_data.shape[0])24    train_data = train_data.ix[r1 < utils.sample_pct, :]25    print("testing with small sample of training data, ", train_data.shape)26# æµè¯æ ·æ¬æ¯è®ç»æ ·æ¬å°äºlabelå±æ§27test_data['click'] = 028# åå¹¶æµè¯æ ·æ¬åæµè¯æ ·æ¬, ç»ä¸è¿è¡ç¹å¾å·¥ç¨å¤ç29all_data = pd.concat([train_data, test_data])30print("finished loading raw data, ", all_data.shape)31print("to add some basic features ...")32# å°hourç¹å¾è½¬æhour1, day_hour, day_hour_prev, day_hour_nextç¹å¾33all_data['day']=np.round(all_data.hour % 10000 / 100)34all_data['hour1'] = np.round(all_data.hour % 100)35all_data['day_hour'] = (all_data.day.values - 21) * 24 + all_data.hour1.values36all_data['day_hour_prev'] = all_data['day_hour'] - 137all_data['day_hour_next'] = all_data['day_hour'] + 138all_data['app_or_web'] = 039all_data.ix[all_data.app_id.values=='ecad2386', 'app_or_web'] = 140copy_data = all_data41copy_data['app_site_id'] = np.add(copy_data.app_id.values, copy_data.site_id.values)42print("to encode categorical features using mean responses from earlier days -- univariate")43sys.stdout.flush()44calc_exptv(copy_data,  ['app_or_web'])45exptv_vn_list = ['app_site_id', 'as_domain', 'C14','C17', 'C21', 'device_model', 'device_ip', 'device_id', 'dev_ip_aw', 46                'app_site_model', 'site_model','app_model', 'dev_id_ip', 'C14_aw', 'C17_aw', 'C21_aw']47calc_exptv(copy_data, exptv_vn_list)48calc_exptv(copy_data, ['app_site_id'], add_count=True)49print("to encode categorical features using mean responses from earlier days -- multivariate")50vns = ['app_or_web',  'device_ip', 'app_site_id', 'device_model', 'app_site_model', 'C1', 'C14', 'C17', 'C21',51                        'device_type', 'device_conn_type','app_site_model_aw', 'dev_ip_app_site']52dftv = copy_data.ix[np.logical_and(copy_data.day.values >= 21, copy_data.day.values < 32), ['click', 'day', 'id'] + vns].copy()53dftv['app_site_model'] = np.add(dftv.device_model.values, dftv.app_site_id.values)54dftv['app_site_model_aw'] = np.add(dftv.app_site_model.values, dftv.app_or_web.astype('string').values)55dftv['dev_ip_app_site'] = np.add(dftv.device_ip.values, dftv.app_site_id.values)56for vn in vns:57    dftv[vn] = dftv[vn].astype('category')58    print(vn)59n_ks = {'app_or_web': 100, 'app_site_id': 100, 'device_ip': 10, 'C14': 50, 'app_site_model': 50, 'device_model': 100, 'device_id': 50,60        'C17': 100, 'C21': 100, 'C1': 100, 'device_type': 100, 'device_conn_type': 100, 'banner_pos': 100,61        'app_site_model_aw': 100, 'dev_ip_app_site': 10 , 'device_model': 500}62exp2_dict = {}63for vn in vns:64    exp2_dict[vn] = np.zeros(dftv.shape[0])65days_npa = dftv.day.values66    67for day_v in range(22, 32):68    df1 = dftv.ix[np.logical_and(dftv.day.values < day_v, dftv.day.values < 31), :].copy()69    df2 = dftv.ix[dftv.day.values == day_v, :]70    print("Validation day:", day_v, ", train data shape:", df1.shape, ", validation data shape:", df2.shape)71    pred_prev = df1.click.values.mean() * np.ones(df1.shape[0])72    for vn in vns:73        if 'exp2_'+vn in df1.columns:74            df1.drop('exp2_'+vn, inplace=True, axis=1)75    for i in range(3):76        for vn in vns:77            p1 = calcLeaveOneOut2(df1, vn, 'click', n_ks[vn], 0, 0.25, mean0=pred_prev)78            pred = pred_prev * p179            print(day_v, i, vn, "change = ", ((pred - pred_prev)**2).mean())80            pred_prev = pred    81            82        pred1 = df1.click.values.mean()83        for vn in vns:84            print("="*20, "merge", day_v, vn)85            diff1 = mergeLeaveOneOut2(df1, df2, vn)86            pred1 *= diff187            exp2_dict[vn][days_npa == day_v] = diff188        89        pred1 *= df1.click.values.mean() / pred1.mean()90        print("logloss = ", logloss(pred1, df2.click.values))91        #print my_lift(pred1, None, df2.click.values, None, 20, fig_size=(10, 5))92        #plt.show()93for vn in vns:94    copy_data['exp2_'+vn] = exp2_dict[vn]95print("to count prev/current/next hour by ip ...")96cntDualKey(copy_data, 'device_ip', None, 'day_hour', 'day_hour_prev', fill_na=0)97cntDualKey(copy_data, 'device_ip', None, 'day_hour', 'day_hour', fill_na=0)98cntDualKey(copy_data, 'device_ip', None, 'day_hour', 'day_hour_next', fill_na=0)99print("to create day diffs")100copy_data['pday'] = copy_data.day - 1101calcDualKey(copy_data, 'device_ip', None, 'day', 'pday', 'click', 10, None, True, True)102copy_data['cnt_diff_device_ip_day_pday'] = copy_data.cnt_device_ip_day.values  - copy_data.cnt_device_ip_pday.values103copy_data['hour1_web'] = copy_data.hour1.values104copy_data.ix[copy_data.app_or_web.values==0, 'hour1_web'] = -1105copy_data['app_cnt_by_dev_ip'] = my_grp_cnt(copy_data.device_ip.values.astype('string'), copy_data.app_id.values.astype('string'))106copy_data['hour1'] = np.round(copy_data.hour.values % 100)107copy_data['cnt_diff_device_ip_day_pday'] = copy_data.cnt_device_ip_day.values  - copy_data.cnt_device_ip_pday.values108copy_data['rank_dev_ip'] = my_grp_idx(copy_data.device_ip.values.astype('string'), copy_data.id.values.astype('string'))109copy_data['rank_day_dev_ip'] = my_grp_idx(np.add(copy_data.device_ip.values, copy_data.day.astype('string').values).astype('string'), copy_data.id.values.astype('string'))110copy_data['rank_app_dev_ip'] = my_grp_idx(np.add(copy_data.device_ip.values, copy_data.app_id.values).astype('string'), copy_data.id.values.astype('string'))111copy_data['cnt_dev_ip'] = get_agg(copy_data.device_ip.values, copy_data.id, np.size)112copy_data['cnt_dev_id'] = get_agg(copy_data.device_id.values, copy_data.id, np.size)113copy_data['dev_id_cnt2'] = np.minimum(copy_data.cnt_dev_id.astype('int32').values, 300)114copy_data['dev_ip_cnt2'] = np.minimum(copy_data.cnt_dev_ip.astype('int32').values, 300)115copy_data['dev_id2plus'] = copy_data.device_id.values116copy_data.ix[copy_data.cnt_dev_id.values == 1, 'dev_id2plus'] = '___only1'117copy_data['dev_ip2plus'] = copy_data.device_ip.values118copy_data.ix[copy_data.cnt_dev_ip.values == 1, 'dev_ip2plus'] = '___only1'119copy_data['diff_cnt_dev_ip_hour_phour_aw2_prev'] = (copy_data.cnt_device_ip_day_hour.values - copy_data.cnt_device_ip_day_hour_prev.values) * ((copy_data.app_or_web * 2 - 1)) 120copy_data['diff_cnt_dev_ip_hour_phour_aw2_next'] = (copy_data.cnt_device_ip_day_hour.values - copy_data.cnt_device_ip_day_hour_next.values) * ((copy_data.app_or_web * 2 - 1)) 121print("to save copy_data ...")122dump(copy_data, tmp_data_path + 'copy_data.joblib_dat')123print("to generate copy_datatv_mx .. ")124app_or_web = None125_start_day = 22126list_param = ['C1', 'C14', 'C15', 'C16', 'C17', 'C18', 'C19', 'C20', 'C21', 'banner_pos', 'device_type', 'device_conn_type']127feature_list_dict = {}128feature_list_name = 'tvexp3'129feature_list_dict[feature_list_name] = list_param + \130                            ['exptv_' + vn for vn in ['app_site_id', 'as_domain', 131                             'C14','C17', 'C21', 'device_model', 'device_ip', 'device_id', 'dev_ip_aw', 132                             'dev_id_ip', 'C14_aw', 'C17_aw', 'C21_aw']] + \133                            ['cnt_diff_device_ip_day_pday', 134                             'app_cnt_by_dev_ip', 'cnt_device_ip_day_hour', 'app_or_web',135                             'rank_dev_ip', 'rank_day_dev_ip', 'rank_app_dev_ip',136                             'diff_cnt_dev_ip_hour_phour_aw2_prev', 'diff_cnt_dev_ip_hour_phour_aw2_next',137                             'exp2_device_ip', 'exp2_app_site_id', 'exp2_device_model', 'exp2_app_site_model',138                             'exp2_app_site_model_aw', 'exp2_dev_ip_app_site',139                             'cnt_dev_ip', 'cnt_dev_id', 'hour1_web']140filter_tv = np.logical_and(copy_data.day.values >= _start_day, copy_data.day.values < 31)141filter_t1 = np.logical_and(copy_data.day.values < 30, filter_tv)142filter_v1 = np.logical_and(~filter_t1, filter_tv)    143    144print(filter_tv.sum())145for vn in feature_list_dict[feature_list_name] :146    if vn not in copy_data.columns:147        print("="*60 + vn)148        149yv = copy_data.click.values[filter_v1]150copy_datatv_mx = copy_data.as_matrix(feature_list_dict[feature_list_name])151print(copy_datatv_mx.shape)152print("to save copy_datatv_mx ...")153copy_datatv_mx_save = {}154copy_datatv_mx_save['copy_datatv_mx'] = copy_datatv_mx155copy_datatv_mx_save['click'] = copy_data.click.values156copy_datatv_mx_save['day'] = copy_data.day.values157copy_datatv_mx_save['site_id'] = copy_data.site_id.values...

feuture_get.py

Source: feuture_get.py

1def lag(data, index_cols, lag_cols, mode = "day"):2    3    shift_range = [1, 2, 3, 4, 5, 12]45    for shift in tqdm_notebook(shift_range):6        shifted_data = data[index_cols + lag_cols].copy()78        # pd.DateOffset(seconds = shift)9        # "-" for data from future10        shifted_data['date'] -= pd.DateOffset(days = 1)1112        foo = lambda x: '{}_{}lag_{}'.format(x, mode, shift) if x in lag_cols else x13        shifted_data = shifted_data.rename(columns = foo)1415        data = pd.merge(data, shifted_data, on = index_cols, how = 'left').fillna(0) # or other NaN value1617    return data1819def first_extremum(data, delta_list, value_column, mode = 'max'):20    # data must have "date"21    copy_data = data.copy()22    23    for delta in delta_list:24        for value_label in value_column:25            26            if mode == 'max':27                max_mask = (copy_data[value_label].rolling(1 + 2 * delta , center = True).max() ==\28                            copy_data[value_label])29                indexes = np.where(copy_data[value_label].rolling(1 + 2 * delta , center = True).max() ==\30                                   copy_data[value_label])[0]31            else:32                max_mask = (copy_data[value_label].rolling(1 + 2 * delta , center = True).min() ==\33                            copy_data[value_label])34                indexes = np.where(copy_data[value_label].rolling(1 + 2 * delta , center = True).min() ==\35                                   copy_data[value_label])[0]3637            indexes_with_nan = np.concatenate([indexes, [None]])3839            # fmxi is first max index (index of the first maximum)40            copy_data['{}_f{}i{}'.format(value_label, mode, 1 + 2 * delta)] =\41                indexes_with_nan[np.searchsorted(indexes, data.index, side='right')]4243            # fmxr is first max range (range to the first maximum)44            copy_data['{}_f{}r{}'.format(value_label, mode, 1 + 2 * delta)] =\45                copy_data['{}_f{}i{}'.format(value_label, mode, 1 + 2 * delta)] - copy_data.index4647            max_val = copy_data[max_mask][[value_label]]48            max_val = max_val.rename(columns = lambda x : "{}_{}{}".format(x, mode, 1 + 2 * delta))  4950            copy_data = copy_data.join(max_val, how = 'left')5152            # print(copy_data[value_label] == copy_data[value_label+"_max"])53            copy_data.loc[copy_data[value_label] == copy_data['{}_{}{}'.format(value_label, mode,  1 + 2 * delta)], 54                          '{}_f{}r{}'.format(value_label, mode, 1 + 2 * delta)] = 05556            copy_data.drop(['{}_f{}i{}'.format(value_label, mode, 1 + 2 * delta)], inplace = True, axis = 1)57            copy_data = copy_data[::-1]58            copy_data['{}_{}{}'.format(value_label, mode,  1 + 2 * delta)] = copy_data[59                '{}_{}{}'.format(value_label, mode,  1 + 2 * delta)].ffill()60            copy_data = copy_data[::-1]6162    return copy_data636465#lag(data, ["date"], ["brent_close", "brent_open"])66#       date	        brent_close	brent_open	brent_close_daylag_1	brent_open_daylag_1	brent_close_daylag_2	brent_open_daylag_2	brent_close_daylag_3	brent_open_daylag_3	brent_close_daylag_4	brent_open_daylag_4	brent_close_daylag_5	brent_open_daylag_5	brent_close_daylag_12	brent_open_daylag_1267#0	2002-07-01	25.64	25.50	25.75	25.61	25.75	25.61	25.75	25.61	25.75	25.61	25.75	25.61	25.75	25.6168#1	2002-07-02	25.75	25.61	25.84	25.73	25.84	25.73	25.84	25.73	25.84	25.73	25.84	25.73	25.84	25.736970#get_max(data, [2, 3], ['brent_close', 'brent_open'], 'min')71#       date	        brent_close	brent_open	brent_close_fminr5	brent_close_min5	brent_open_fminr5	brent_open_min5	brent_close_fminr7	brent_close_min7	brent_open_fminr7	brent_open_min772#0	2002-07-01	25.64	25.50	5	25.08	6	25.1	5	25.08	6	25.1
...

modify.py

Source: modify.py

1import sys2def modify_instance(name):3    with open(name, "r") as f:4        data = f.readlines()5        copy_data = data.copy()6        for i,line in enumerate(data):7            for j,c in enumerate(line):8                if c == "<":9                    copy_data[i]= copy_data[i][:j]+' '+copy_data[i][j+1:]10                if c == ">":11                    copy_data[i]= copy_data[i][:j]+' '+copy_data[i][j+1:]12                if c == ",":13                    copy_data[i]= copy_data[i][:j]+' '+copy_data[i][j+1:]14                15    16    with open("modify.dat", "w") as out :17        out.writelines(copy_data)18if __name__ == "__main__":...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.