Best Python code snippet using autotest_python
results.py
Source:results.py
1import os2import gym3import json4import random5import pickle6import numpy as np7import matplotlib.pyplot as plt8from os.path import join9from collections import OrderedDict10from scipy.stats import wasserstein_distance as wd11from scipy.spatial.distance import euclidean as ed12import sys 13tools_path = join(os.getcwd(), 'customer_behaviour/tools')14sys.path.insert(1, tools_path)15import policy_evaluation as pe16############################17dir_path = 'n_historical_events'18param = dir_path19n_new_customers = 1020sample_length = 200021resample = False22plot_only = True23update_classification = False24compare_features = False25k = 2 # The number of closest experts26N = 1 # The number of reinitializations when predicting a new customer 27############################28def main():29 if resample: input("Are you sure you want to resample all data?\nPress 'Enter' to continue...")30 data = load_data()31 fig1, ax1 = plt.subplots()32 ax1.set_xlabel(get_label_from_param(param))33 ax1.set_ylabel('Classification error (%)')34 fig2, ax2 = plt.subplots()35 ax2.set_xlabel(get_label_from_param(param))36 ax2.set_ylabel('Absolute difference (ED)') if compare_features else ax2.set_ylabel('Absolute difference (EMD)') 37 ax2.set_title('Comparison with %s new customers' % n_new_customers)38 # Det skulle också vara intressant att jämföra med det absoluta avståndet till den expert vars dummyvariabel är satt till 139 fig3, ax3 = plt.subplots()40 ax3.set_xlabel('Training steps')41 ax3.set_ylabel('Classification error (%)')42 fig4, ax4 = plt.subplots()43 ax4.set_xlabel('Training steps')44 ax4.set_ylabel('Absolute difference (ED)') if compare_features else ax4.set_ylabel('Absolute difference (EMD)') 45 ax4.set_title('Comparison with experts in training data')46 # Inkludera variansen i agentens beteende 47 # Agenten beter sig bra på populationsnivå efter, säg, 2000 epsioder men påvisa att den inte har någon varians på individnivå48 # Kan exempelvis summera avståndet från agenten till samtliga experter och visa att avståndet minskar med tiden49 # Kan också summera inbördes avstånd mellan agentens olika beteenden och visa att det ökar med tiden50 param_values = list(data.keys())51 n_params = len(param_values)52 assert param_values53 step_values = list(data[param_values[0]][0].models.keys())[:-1]54 n_step_values = len(step_values)55 colors = plt.rcParams['axes.prop_cycle'].by_key()['color']56 error_vs_param = [[] for i in range(n_params)]57 avg_dist_vs_param = [[] for i in range(n_params)]58 error_vs_step = [[[] for j in range(n_step_values)] for i in range(n_params)]59 avg_dist_vs_step = [[[] for j in range(n_step_values)] for i in range(n_params)]60 for param_value, results in data.items():61 i = param_values.index(param_value)62 for result in results:63 for n_train_steps, t in result.models.items():64 if n_train_steps == 'final':65 _, abs_diffs, errors = t66 error_vs_param[i].append(100 * np.mean(errors))67 avg_dist_vs_param[i].append(np.mean(abs_diffs))68 else:69 j = step_values.index(n_train_steps)70 _, _, errors, avg_dist = t71 error_vs_step[i][j].append(100 * np.mean(errors))72 avg_dist_vs_step[i][j].append(avg_dist)73 error_vs_param = list(map(lambda x: np.mean(x), error_vs_param))74 avg_dist_vs_param = list(map(lambda x: np.mean(x), avg_dist_vs_param))75 ax1.plot(param_values, error_vs_param)76 ax2.plot(param_values, avg_dist_vs_param)77 for i in range(n_params):78 temp1 = list(map(lambda x: np.mean(x), error_vs_step[i]))79 temp2 = list(map(lambda x: np.mean(x), avg_dist_vs_step[i]))80 ax3.plot(step_values, temp1, label=str(param_values[i]), color=colors[i])81 ax4.plot(step_values, temp2, label=str(param_values[i]), color=colors[i])82 for ax in (ax3, ax4):83 handles, labels = ax.get_legend_handles_labels()84 by_label = OrderedDict(zip(labels, handles))85 ax.legend(by_label.values(), by_label.keys())86 plt.show()87def save_data(path, sample_length, n_new_customers, compare_features):88 args = json.loads(open(join(path, 'args.txt'), 'r').read())89 n_experts = args['n_experts']90 # Sample customer data91 if args['state_rep'] == 71:92 env = pe.get_env_and_model(args, '', sample_length, only_env=True, n_experts_in_adam_basket=n_experts+n_new_customers)93 else:94 env = pe.get_env_and_model(args, '', sample_length, only_env=True)95 customer_trajectories = env.generate_expert_trajectories(96 out_dir=None, 97 n_demos_per_expert=1,98 n_experts=n_experts+n_new_customers,99 n_expert_time_steps=sample_length100 )101 expert_states = np.array(customer_trajectories['states'][:n_experts])102 expert_actions = np.array(customer_trajectories['actions'][:n_experts])103 new_states = np.array(customer_trajectories['states'][n_experts:])104 new_actions = np.array(customer_trajectories['actions'][n_experts:])105 if compare_features:106 avg_expert = get_features(expert_actions, average=True)107 experts = get_features(expert_actions)108 new_customers = get_features(new_actions)109 else:110 avg_expert = pe.get_distrib(expert_states, expert_actions)111 experts = get_distribs(expert_states, expert_actions)112 new_customers = get_distribs(new_states, new_actions)113 models = {}114 model_paths = [d for d in [x[0] for x in os.walk(path)] if d.endswith('checkpoint')]115 model_paths.sort(key=get_key_from_path)116 for mp in model_paths:117 n_train_steps = get_key_from_path(mp)118 if int(n_train_steps) <= 1000000: continue119 print('Collecting data from model saved after %s steps' % n_train_steps)120 if args['state_rep'] == 71:121 agent, abs_diffs, errors = evaluate_on_new_customers(args, mp, experts, new_customers, compare_features, n_new_customers)122 else:123 agent, abs_diffs, errors = evaluate_on_new_customers(args, mp, experts, new_customers, compare_features)124 avg_dist = evaluate_on_pop_level(args, mp, avg_expert, compare_features)125 126 models[n_train_steps] = (agent, abs_diffs, errors, avg_dist)127 # final_model_path = next((d for d in [x[0] for x in os.walk(path)] if d.endswith('finish')), None)128 # agent, abs_diffs, errors = evaluate_on_new_customers(args, final_model_path, experts, new_customers, compare_features)129 130 # models['final'] = (agent, abs_diffs, errors)131 result = Result(models)132 save_result(result, path)133def load_data():134 data = {}135 136 # Load data137 data_paths = [join(dir_path, x) for x in os.listdir(dir_path) if x.startswith('2020')]138 data_paths.sort()139 for i, path in enumerate(data_paths):140 print('Processing folder {} of {}'.format(i + 1, len(data_paths)))141 content = os.listdir(path)142 args = json.loads(open(join(path, 'args.txt'), 'r').read())143 if not (plot_only and not update_classification) and i == 0:144 env = pe.get_env_and_model(args, '', sample_length, only_env=True)145 # Sample customer data146 n_experts = args['n_experts']147 customer_trajectories = env.generate_expert_trajectories(148 out_dir=None, 149 n_demos_per_expert=1,150 n_experts=n_experts+n_new_customers,151 n_expert_time_steps=sample_length152 )153 expert_states = np.array(customer_trajectories['states'][:n_experts])154 expert_actions = np.array(customer_trajectories['actions'][:n_experts])155 new_states = np.array(customer_trajectories['states'][n_experts:])156 new_actions = np.array(customer_trajectories['actions'][n_experts:])157 if compare_features:158 avg_expert = get_features(expert_actions, average=True)159 experts = get_features(expert_actions)160 new_customers = get_features(new_actions)161 else:162 avg_expert = pe.get_distrib(expert_states, expert_actions)163 experts = get_distribs(expert_states, expert_actions)164 new_customers = get_distribs(new_states, new_actions)165 if 'result.pkl' in content and not resample:166 print('Loading saved result')167 result = load_result(path)168 if update_classification:169 print('Updating classification with k = %d' % k)170 for n_train_steps, t in result.models.items():171 agent = t[0]172 errors = compare_with_new_customers(agent, experts, new_customers, compare_features)173 l = list(t)174 l[2] = errors175 t = tuple(l)176 result.models.update({n_train_steps: t})177 else:178 if plot_only: 179 print('Ignoring')180 continue181 print('Collecting result by sampling from model')182 183 models = {}184 model_paths = [d for d in [x[0] for x in os.walk(path)] if d.endswith('checkpoint')]185 model_paths.sort(key=get_key_from_path)186 for mp in model_paths:187 n_train_steps = get_key_from_path(mp)188 if n_train_steps < 1000000: continue189 agent, abs_diffs, errors = evaluate_on_new_customers(args, mp, experts, new_customers, compare_features)190 avg_dist = evaluate_on_pop_level(args, mp, avg_expert, compare_features)191 192 models[n_train_steps] = (agent, abs_diffs, errors, avg_dist)193 # final_model_path = next((d for d in [x[0] for x in os.walk(path)] if d.endswith('finish')), None)194 # agent, abs_diffs, errors = evaluate_on_new_customers(args, final_model_path, experts, new_customers, compare_features)195 196 # models['final'] = (agent, abs_diffs, errors)197 result = Result(models)198 save_result(result, path)199 if args[param] in data:200 data[args[param]].append(result)201 else:202 data[args[param]] = [result]203 return data204def evaluate_on_pop_level(args, model_path, avg_expert, compare_features):205 n_experts = args['n_experts']206 207 env, model, obs_normalizer = pe.get_env_and_model(args, model_path, sample_length, only_env=False)208 metric = ed if compare_features else wd209 agent_states = []210 agent_actions = []211 for i in range(n_experts):212 # Initialize agent with data from ith expert213 env.model.spawn_new_customer(i)214 sample = env.case.get_sample(215 n_demos_per_expert=1, 216 n_historical_events=args['n_historical_events'], 217 n_time_steps=1000218 )219 all_data = np.hstack(sample[0]) # history, data = sample[0]220 j = np.random.randint(0, all_data.shape[1] - args['n_historical_events'])221 history = all_data[:, j:j + args['n_historical_events']]222 if args['state_rep'] == 71:223 adam_basket = np.random.permutation(env.case.adam_baskets[i])224 env.case.i_expert = i225 initial_state = env.case.get_initial_state(history, adam_basket[0])226 else:227 initial_state = env.case.get_initial_state(history, i)228 states, actions = pe.sample_from_policy(env, model, obs_normalizer, initial_state=initial_state)229 agent_states.append(states)230 agent_actions.append(actions)231 agent_states = np.array(agent_states)232 agent_actions = np.array(agent_actions)233 avg_agent = get_features(agent_actions, average=True) if compare_features else pe.get_distrib(agent_states, agent_actions)234 distance = metric(avg_agent, avg_expert)235 return distance236def compare_with_new_customers(agents, experts, new_customers, compare_features):237 errors = []238 metric = ed if compare_features else wd239 for i, nc in enumerate(new_customers):240 distances = [metric(nc, e) for e in experts]241 closest_experts = np.argsort(distances)[:k]242 n_errors = 0243 for a in agents[i]:244 distances = [metric(a, e) for e in experts]245 if np.argmin(distances) not in closest_experts:246 n_errors += 1247 errors.append(n_errors / N)248 return errors249def evaluate_on_new_customers(args, model_path, experts, new_customers, compare_features, n_new_customers=None):250 global k, N251 n_experts = args['n_experts']252 if n_new_customers is not None:253 env, model, obs_normalizer = pe.get_env_and_model(args, model_path, sample_length, only_env=False, n_experts_in_adam_basket=n_experts+n_new_customers)254 else:255 env, model, obs_normalizer = pe.get_env_and_model(args, model_path, sample_length, only_env=False)256 agents = []257 abs_diffs = []258 errors = []259 metric = ed if compare_features else wd260 for i, nc in enumerate(new_customers):261 distances = [metric(nc, e) for e in experts]262 closest_experts = np.argsort(distances)[:k]263 dummy = closest_experts[0]264 temp_agents = []265 temp_abs_diffs = []266 n_errors = 0267 seed = n_experts + i268 if args['state_rep'] == 71: 269 adam_basket = np.random.permutation(env.case.adam_baskets[seed])270 env.case.i_expert = seed271 env.model.spawn_new_customer(seed)272 sample = env.case.get_sample(273 n_demos_per_expert=1, 274 n_historical_events=args['n_historical_events'], 275 n_time_steps=1000276 )277 all_data = np.hstack(sample[0]) # history, data = sample[0]278 for l in range(N):279 j = np.random.randint(0, all_data.shape[1] - args['n_historical_events'])280 history = all_data[:, j:j + args['n_historical_events']]281 if args['state_rep'] == 71: 282 initial_state = env.case.get_initial_state(history, adam_basket[l])283 else:284 initial_state = env.case.get_initial_state(history, dummy) # We set dummy to closest expert285 states, actions = pe.sample_from_policy(env, model, obs_normalizer, initial_state=initial_state)286 states = np.array(states)287 actions = np.array(actions)288 a = get_features([actions]) if compare_features else pe.get_distrib(states, actions)289 temp_agents.append(a)290 temp_abs_diffs.append(metric(a, nc))291 distances = [metric(a, e) for e in experts]292 if np.argmin(distances) not in closest_experts:293 n_errors += 1294 agents.append(temp_agents)295 abs_diffs.append(temp_abs_diffs)296 errors.append(n_errors / N)297 return agents, abs_diffs, errors298############################299class Result():300 def __init__(self, models):301 self.models = models302def get_features(all_actions, average=False):303 features = []304 for actions in all_actions:305 avg, std = get_mean_std_purchase_freq(actions)306 # purchase_ratio = np.count_nonzero(actions) / len(actions)307 features.append([avg, std])308 if average:309 return np.mean(features, axis=0)310 else:311 return features312def get_mean_std_purchase_freq(actions):313 indices = np.argwhere(actions).reshape((-1,))314 assert indices.size > 0315 temp = np.diff(indices)316 return np.mean(temp), np.std(temp)317def get_key_from_path(path):318 temp = path.split('/')[-1]319 steps = int(temp.split('_')[0]) # training steps320 return steps321def get_distribs(all_states, all_actions):322 N = len(all_states)323 distribs = []324 for states, actions in zip(np.split(all_states, N), np.split(all_actions, N)): # Loop over individuals325 distribs.append(pe.get_distrib(states, actions))326 return distribs327def save_result(result_obj, folder):328 with open(join(folder, 'result.pkl'), 'wb') as f:329 pickle.dump(result_obj, f, pickle.HIGHEST_PROTOCOL)330def load_result(folder):331 with open(join(folder, 'result.pkl'), 'rb') as f:332 result = pickle.load(f)333 return result334def get_label_from_param(param):335 if param == 'n_historical_events':336 return 'Number of historical events'337 else:338 raise NotImplementedError339############################340if __name__ == '__main__':341 if len(sys.argv) > 1:342 path = sys.argv[1]343 save_data(path, sample_length=10000, n_new_customers=50, compare_features=False)344 else:...
compare_efel.py
Source:compare_efel.py
1from allensdk.core.nwb_data_set import NwbDataSet2import numpy as np3import matplotlib.pyplot as plt4import sys5import h5py 6import os7import efel8import pickle as pkl9os.chdir('neuron_genetic_alg')10from config import *11os.chdir('../')12rank = int(os.environ['SLURM_PROCID'])13all_features = efel.api.getFeatureNames()14np.set_printoptions(threshold=sys.maxsize)15plt.rcParams['agg.path.chunksize'] = 1000016parsed_stim_response_path = f'./allen_model_sota_model_parsed_cell_{model_num}.hdf5'17data_file = h5py.File(parsed_stim_response_path, 'r')18sweep_keys = [e.decode('ascii') for e in data_file['sweep_keys']]19def show_efel(efel_feature_dict, feature_names):20 def normalize_efel_score(cell_features, allen_features, compare_features):21 cell_feature_mean = np.mean(cell_features)22 if cell_feature_mean != 0:23 return np.array(cell_features)/cell_feature_mean, np.array(allen_features)/cell_feature_mean,\24 np.array(compare_features)/cell_feature_mean 25 else:26 return np.array(cell_features), np.array(allen_features), np.array(compare_features) 27 width=0.428 fig, ax = plt.subplots(figsize=(15, 5))29 for i, l in enumerate(feature_names):30 efel_data = efel_feature_dict[l]31 cell_features, allen_features, compare_features = efel_data["cell_features"], efel_data["allen_features"], efel_data["compare_features"]32 if cell_features is None or len(cell_features)==0:33 cell_features = [0]34 continue35 if allen_features is None or len(allen_features)==0:36 allen_features = [0]37 continue38 if compare_features is None or len(compare_features)==0:39 compare_features = [0]40 continue41 x_cell = np.ones(len(cell_features))*i + (np.random.rand(len(cell_features))*width/3.-width/3.)42 x_allen = np.ones(len(allen_features))*i + (np.random.rand(len(allen_features))*width/3.)43 x_compare = np.ones(len(compare_features))*i + (np.random.rand(len(compare_features))*width/3.+width/3.)44 cell_features, allen_features, compare_features = normalize_efel_score(cell_features, allen_features, compare_features)45 ax.scatter(x_cell, cell_features, color="black", s=25)46 ax.scatter(x_allen, allen_features, color="blue", s=25)47 ax.scatter(x_compare, compare_features, color="crimson", s=25)48 ax.set_xticks(range(len(feature_names)))49 ax.set_xticklabels(feature_names, rotation=90)50 ax.scatter([], [], color="black", label="Cell response")51 ax.scatter([], [], color="blue", label="Allen's model response")52 ax.scatter([], [], color="crimson", label="CoMPare model response")53 ax.legend(bbox_to_anchor=(1.5, 1))54 plt.show()55 56def eval_efel(feature_name, target, data, dt):57 def diff_lists(lis1, lis2):58 def safe_mean(lis):59 if np.size(lis) == 0:60 return 061 return np.mean(lis)62 if lis1 is None and lis2 is None:63 return 064 if lis1 is None:65 lis1 = [0]66 if lis2 is None:67 lis2 = [0]68 len1, len2 = len(lis1), len(lis2)69 if len1 > len2:70 lis2 = np.concatenate((lis2, np.zeros(len1 - len2)), axis=0)71 if len2 > len1:72 lis1 = np.concatenate((lis1, np.zeros(len2 - len1)), axis=0)73 return np.sqrt(safe_mean((lis1 - lis2)**2))74 time_stamps = len(target)75 time = np.cumsum([dt for i in range(time_stamps)])76 curr_trace_target, curr_trace_data = {}, {}77 stim_start, stim_end = dt, time_stamps*dt78 curr_trace_target['T'], curr_trace_data['T'] = time, time79 curr_trace_target['V'], curr_trace_data['V'] = target, data80 curr_trace_target['stim_start'], curr_trace_data['stim_start'] = [stim_start], [stim_start]81 curr_trace_target['stim_end'], curr_trace_data['stim_end'] = [stim_end], [stim_end]82 traces = [curr_trace_target, curr_trace_data]83 traces_results = efel.getFeatureValues(traces, [feature_name], raise_warnings=False)84 diff_feature = diff_lists(traces_results[0][feature_name], traces_results[1][feature_name])85 return diff_feature, traces_results86efel_data = {}87# for sweep_key in sweep_keys:88try:89 sweep_key = sweep_keys[rank]90except:91 exit()92 93if int(sweep_key) > 78:94 exit()95print(f"processing {sweep_key} sweep for {len(all_features)}")96stim_val = data_file[sweep_key+'_stimulus'][:]97cell_response = data_file[sweep_key+'_cell_response'][:]98allen_response = data_file[sweep_key+'_allen_model_response'][:]99compare_response = data_file[sweep_key+'_compare_model_response'][:]100dt_val = data_file[sweep_key+'_dt'][0]101#plot_sampled(sweep_key, stim_val, cell_response, allen_response, compare_response)102efel_feature_dict = {}103for efel_name in all_features:104 # print(f'efel name : {efel_name}')105 l2_val_allen, efel_values_allen = eval_efel(efel_name, cell_response, allen_response, dt_val)106 l2_val_compare, efel_values_compare = eval_efel(efel_name, cell_response, compare_response, dt_val)107 efel_feature_dict[efel_name] = {"cell_features": efel_values_allen[0][efel_name],\108 "allen_features": efel_values_allen[1][efel_name],\109 "compare_features": efel_values_compare[1][efel_name]}110# print('eFEL name:', efel_name)111# #print(efel_name+" value for target:", efel_values[0][efel_name])112# #print(efel_name+" value for best model response:", efel_values[1][efel_name])113# print("Euclidean Distance for Allen's Model:", l2_val_allen)114# print("Euclidean Distance for CoMParE Model:", l2_val_compare)115# print('\n')116efel_data[sweep_key] = efel_feature_dict117# show_efel(efel_feature_dict)118os.makedirs(f'./efel_data/subsets/', exist_ok=True)...
similarity_filter.py
Source:similarity_filter.py
1#!/usr/bin/env python2# David Prihoda3# Filter a Domain CSV file by comparing its feature representation to a feature representation of a different set of samples4# Will produce the original Domain CSV file where samples with distance < threshold are removed.5import argparse6import pandas as pd7import numpy as np8from sklearn.metrics.pairwise import PAIRWISE_DISTANCE_FUNCTIONS9def get_similar_index(input_features, compare_features, metric, threshold):10 """11 Get index of input samples more distant from a second set of samples than given threshold12 :param input_features: The input samples vector representation DataFrame, one sample vector per row13 :param compare_features: The compared samples vector representation DataFrame, one sample vector per row14 :param metric: Metric to use (euclidean, cosine, ...)15 :param threshold: Return index of samples with distance is bigger than the threshold16 :return: index of input samples more distant from the second set of samples than given threshold17 """18 cols = np.intersect1d(input_features.columns, compare_features.columns)19 input_features = input_features[cols]20 compare_features = compare_features[cols]21 fn = PAIRWISE_DISTANCE_FUNCTIONS[metric]22 distances = pd.DataFrame(fn(input_features.values, compare_features.values), index=input_features.index, columns=compare_features.index)23 min_distances = distances.min(axis=1)24 print('Distance stats:')25 print(min_distances.describe())26 filtered = min_distances[min_distances >= threshold]27 print('Removed {} contigs with distance < {}'.format(len(input_features)-len(filtered), threshold))28 return filtered.index29if __name__ == "__main__":30 # Parse command line31 parser = argparse.ArgumentParser()32 parser.add_argument("-i", "--input", dest="input", required=True,33 help="Input domain CSV file.", metavar="FILE")34 parser.add_argument("-f", "--feature", dest="features", required=True,35 help="Input feature matrix pickle file.", metavar="FILE")36 parser.add_argument("-c", "--compare", dest="compare", required=True,37 help="Feature matrix pickle file to compare against.", metavar="FILE")38 parser.add_argument("--metric", dest="metric", default='euclidean',39 help="Metric used to compute similarity.", metavar="STRING")40 parser.add_argument("--threshold", dest="threshold", required=True, type=float,41 help="Remove samples with distance < threshold.", metavar="FLOAT")42 parser.add_argument("-o", "--output", dest="output", required=True,43 help="Output file path.", metavar="FILE")44 options = parser.parse_args()45 domains = pd.read_csv(options.input).set_index('contig_id')46 input_features = pd.read_pickle(options.features)47 compare_features = pd.read_pickle(options.compare)48 print('Filtering {} contigs by similarity to {} contigs...'.format(len(input_features), len(compare_features)))49 index = get_similar_index(input_features, compare_features, metric=options.metric, threshold=options.threshold)50 orig_len = len(input_features)51 new_len = len(index)52 print('Remaining contigs: {}/{} = {:.1f}%'.format(new_len, orig_len, (new_len/orig_len)*100))53 filtered_domains = domains.loc[index]54 print('Saving {} domains to: {}'.format(len(filtered_domains), options.output))...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!