Best Python code snippet using hypothesis
experiment_association_only.py
Source:experiment_association_only.py
1import matplotlib2matplotlib.use('Agg')3import matplotlib.pyplot as plt4from matplotlib.ticker import MaxNLocator5import numpy as np6import os7import ast8import operator9import pickle10import pandas as pd11from scipy import stats12from matplotlib.ticker import FormatStrFormatter13import matplotlib.patches as patches141516# Say, "the default sans-serif font is COMIC SANS"17matplotlib.rcParams['font.sans-serif'] = "Times New Roman"18# Then, "ALWAYS use sans-serif fonts"19matplotlib.rcParams['font.family'] = "serif"20matplotlib.rcParams.update({'font.size': 8})21222324def GetCategory(class_label):25 category = None26 '''27 if class_label in ['volume_ring[disc]', 'volume_system[disc]', 'volume_notification[disc]', 'volume_music[disc]']:28 category = 'VOLUME'29 '''30 if class_label in ['display_orientation[cat]']:31 category = 'APP'32 elif class_label in ['class']:33 category = 'Availability'34 '''35 elif 'battery' in class_label:36 category = 'BATTERY'37 '''38 return category394041def Entropy(values):42 value_cnt_dict = dict()43 for value in values:44 if value not in value_cnt_dict:45 value_cnt_dict[value] = 046 value_cnt_dict[value] += 147 value_ratio_list = list()48 for value in value_cnt_dict:49 value_ratio_list.append(value_cnt_dict[value] / float(sum(value_cnt_dict.values())))50 # return max(value_ratio_list), value_cnt_dict5152 if len(value_ratio_list) == 1:53 entropy = 1.054 else:55 entropy = stats.entropy(value_ratio_list) / np.log(len(value_ratio_list)) # Normalized56 entropy = 1 - entropy57 return entropy, value_cnt_dict585960def GetEntropyEverage(entropy_info_list, granularity_min):61 time_key_class_values_dict = dict()62 for class_info in entropy_info_list:63 class_timestamp = class_info[0]64 class_value = class_info[1]65 granularity_timestamp = pd.DatetimeIndex(((np.round(pd.DatetimeIndex([class_timestamp]).asi8 / (1e9 * 60 * granularity_min))) * 1e9 * 60 * granularity_min).astype(np.int64))[0]66 daily_time = granularity_timestamp.strftime("%H:%M:%S")67 time_key = "time_daily:" + str(daily_time)68 if time_key not in time_key_class_values_dict:69 time_key_class_values_dict[time_key] = list()70 time_key_class_values_dict[time_key].append(class_value)71 entropy_list = list()72 instance_num_list = list()73 for time_key in time_key_class_values_dict:74 # print Entropy(time_key_class_values_dict[time_key])[0]75 entropy_list.append(Entropy(time_key_class_values_dict[time_key])[0])76 instance_num_list.append(len(time_key_class_values_dict[time_key]))77 return np.mean(entropy_list), instance_num_list7879def GetTimesFromPattern(patterns):80 time_list = list()81 for pattern in patterns:82 for item in pattern:83 # print item84 if "time_daily" in item:85 time_list.append(item)86 return tuple(sorted(time_list))8788def PlotEntropyForPatternPair(entropy_result_path, output_dir, granularity_min):89 if not os.path.exists(output_dir):90 os.makedirs(output_dir)9192 if True or not os.path.exists(output_dir + '/sequence_only_result_class.pickle'):93 getall = [[files, os.path.getsize(entropy_result_path + "/" + files)] for files in os.listdir(entropy_result_path)]94 file_info_list = list()95 for file_name, file_size in sorted(getall, key=operator.itemgetter(1)):96 if file_name[-7:] == '.pickle' and 'U8' not in file_name:97 file_info_list.append(file_name)98 print "# All Processed Users: %d" % len(file_info_list)99 print file_info_list100 class_label_user_pattern_pair_info = dict()101 for i, user_result_path in enumerate(file_info_list):102 user_result_path = "%s/%s" % (entropy_result_path, user_result_path)103 print "%d/%d - %s" % (i, len(file_info_list), user_result_path)104 user_result = pickle.load(open(user_result_path))105 print "Loaded"106 for line_info in user_result:107 user = line_info[0]108 class_label = line_info[1]109 if class_label != 'class':#'class':110 continue111 mcpp_len = line_info[2]112 condition_len = line_info[3]113 fully_pattern = str(line_info[4])114 unfully_pattern = str(line_info[5])115 fully_class_list = line_info[6]116 unfully_class_list = line_info[7]117 fully_class_cnt = len(fully_class_list)118 unfully_class_cnt = len(unfully_class_list)119 # fully_entropy = line_info[8]120 # unfully_entropy = line_info[9]121 fully_entropy, fully_instance_num_list = GetEntropyEverage(fully_class_list, granularity_min)122 unfully_entropy, unfully_instance_num_list = GetEntropyEverage(unfully_class_list, granularity_min)123124 print (user, class_label, mcpp_len, condition_len, str(fully_instance_num_list), str(unfully_instance_num_list), fully_entropy, unfully_entropy)125126 '''127 if condition_len <= 1:128 continue129 '''130 if mcpp_len != len(fully_instance_num_list) or mcpp_len != len(unfully_instance_num_list):131 continue132133 for instance_num in fully_instance_num_list + unfully_instance_num_list:134 if instance_num < 5:135 continue136137 if class_label not in class_label_user_pattern_pair_info:138 class_label_user_pattern_pair_info[class_label] = dict()139140 if user not in class_label_user_pattern_pair_info[class_label]:141 class_label_user_pattern_pair_info[class_label][user] = list()142 class_label_user_pattern_pair_info[class_label][user].append((user, class_label, mcpp_len, condition_len, fully_pattern, unfully_pattern, fully_class_cnt, unfully_class_cnt, fully_entropy, unfully_entropy))143 user_feature_pattern_pair_info_list_dict = dict()144 user_fully_unfully_time_class_cnt = dict()145 for class_label in class_label_user_pattern_pair_info:146 user_plot_data = dict()147 print class_label_user_pattern_pair_info[class_label].keys()148 for user in class_label_user_pattern_pair_info[class_label]:149 pattern_pair_info_list = class_label_user_pattern_pair_info[class_label][user]150 fully_entropy_list = list()151 unfully_entropy_list = list()152 for pattern_pair_info in pattern_pair_info_list:153 condition_len = pattern_pair_info[3]154 fully_class_cnt = pattern_pair_info[-4]155 fully_entropy = pattern_pair_info[-2]156 unfully_class_cnt = pattern_pair_info[-3]157 unfully_entropy = pattern_pair_info[-1]158159 '''160 if condition_len <= 1 or fully_class_cnt < 10 or unfully_class_cnt < 10:161 continue162 '''163 # Store the number of class in fully time and unfully time164 if user not in user_fully_unfully_time_class_cnt:165 user_fully_unfully_time_class_cnt[user] = dict()166167 if class_label not in user_fully_unfully_time_class_cnt[user]:168 user_fully_unfully_time_class_cnt[user][class_label] = dict()169 user_fully_unfully_time_class_cnt[user][class_label]["fully"] = dict()170 user_fully_unfully_time_class_cnt[user][class_label]["unfully"] = dict()171172 fully_left = ast.literal_eval(pattern_pair_info[4].split('-')[0])173 unfully_left = tuple([ast.literal_eval(unfully_pattern.split('-')[0])[0] for unfully_pattern in ast.literal_eval(pattern_pair_info[5])])174175 fully_timelist = GetTimesFromPattern(fully_left)176 unfully_timelist = GetTimesFromPattern(unfully_left)177178 user_fully_unfully_time_class_cnt[user][class_label]["fully"][fully_timelist] = fully_class_cnt179 user_fully_unfully_time_class_cnt[user][class_label]["unfully"][unfully_timelist] = unfully_class_cnt180181 # print pattern_pair_info182 fully_entropy_list.append(fully_entropy)183 unfully_entropy_list.append(unfully_entropy)184 if user not in user_feature_pattern_pair_info_list_dict:185 user_feature_pattern_pair_info_list_dict[user] = dict()186 if class_label not in user_feature_pattern_pair_info_list_dict[user]:187 user_feature_pattern_pair_info_list_dict[user][class_label] = list()188 # DONE Store fully, unfully of user for the class label189 user_feature_pattern_pair_info_list_dict[user][class_label].append(pattern_pair_info)190191 if len(fully_entropy_list) == 0:192 continue193 # print len(fully_entropy_list)194 user_plot_data[user] = (fully_entropy_list, unfully_entropy_list)195196 ### Result Analysis ###197 # 1. Select feature for user as increasing entropy difference.198 # 2. User grouping based on selected features. (3~4 features)199 print "# Users: %d" % (len(user_feature_pattern_pair_info_list_dict.keys()))200 user_category_pair_avg_entropy_diff_dict = dict()201 class_label_user_list_dict = dict() # Plotting distribution features in the top 3 entorpy diff features of a each user.202 for user in user_feature_pattern_pair_info_list_dict:203 class_label_entropy_diff_list = list()204 for class_label in user_feature_pattern_pair_info_list_dict[user]:205 pattern_pair_info_list = user_feature_pattern_pair_info_list_dict[user][class_label]206 entropy_diff_list = [pattern_pair_info[-2] - pattern_pair_info[-1] for pattern_pair_info in pattern_pair_info_list]207 entropy_diff_mean = np.mean(entropy_diff_list)208 class_label_entropy_diff_list.append((user, class_label, entropy_diff_mean))209 class_label_entropy_diff_list = sorted(class_label_entropy_diff_list, key=lambda x: x[2])210 for class_label_entropy_diff in class_label_entropy_diff_list[:3]:211 print "%s\t%s\t%f" % class_label_entropy_diff212 class_label = class_label_entropy_diff[1]213 if class_label not in class_label_user_list_dict:214 class_label_user_list_dict[class_label] = list()215 class_label_user_list_dict[class_label].append(user)216 # DONE Calculating average entropy of subfeatures in the feature category(App, Volume, Battery).217 class_category_feature_entropy_diff_list_dict = dict()218 for class_label_entropy_diff in class_label_entropy_diff_list:219 class_label = class_label_entropy_diff[1]220 category = GetCategory(class_label)221 if category is not None:222 if category not in class_category_feature_entropy_diff_list_dict:223 class_category_feature_entropy_diff_list_dict[category] = list()224 class_category_feature_entropy_diff_list_dict[category].append(class_label_entropy_diff)225 for category in class_category_feature_entropy_diff_list_dict:226 category_feature_entropy_diff_list = class_category_feature_entropy_diff_list_dict[category]227 # for category_feature_entropy_diff in category_feature_entropy_diff_list:228 # print "%s\t%s\t%f" % category_feature_entropy_diff229 category_feature_entropy_diff_list = [entropy_info[2] for entropy_info in category_feature_entropy_diff_list]230 category_feature_entropy_diff_mean = np.mean(category_feature_entropy_diff_list) # Mean of entropy diff between pattern pair on subfeatures of the category feature231 print ">> %s\t%s\t%f" % (user, category, category_feature_entropy_diff_mean)232 if user not in user_category_pair_avg_entropy_diff_dict:233 user_category_pair_avg_entropy_diff_dict[user] = dict()234 user_category_pair_avg_entropy_diff_dict[user][category] = category_feature_entropy_diff_mean235236 ax = plt.subplot(1, 1, 1)237 x_list = class_label_user_list_dict.keys()238 y_list = [len(class_label_user_list_dict[class_label]) for class_label in x_list]239 ind = np.arange(len(x_list))240 width = 0.35241 ax.bar(ind, y_list, width, align='center')242 ax.set_ylabel('# Users')243 ax.set_xlabel('Features of Top 3 in a each user')244 ax.set_xticks(ind)245 ax.set_xticklabels(x_list, rotation=270)246 ax.yaxis.set_major_locator(MaxNLocator(integer=True))247 ax.margins(0.02, 0.0)248249 # plt.show()250 fig = plt.gcf()251 fig.set_size_inches(3, 3)252 fig.tight_layout()253 plt.savefig(output_dir + "/feature_distribution.pdf")254 plt.clf()255256 # Plotting distribution of entropy in the sequential and nonsequential pattern257 user_class_category_pattern_type_feature_pattern_info_list_dict = dict()258 for user in user_feature_pattern_pair_info_list_dict:259 class_label_entropy_diff_list = list()260 for class_label in user_feature_pattern_pair_info_list_dict[user]:261 category = GetCategory(class_label)262 if category is not None:263 if user not in user_class_category_pattern_type_feature_pattern_info_list_dict:264 user_class_category_pattern_type_feature_pattern_info_list_dict[user] = dict()265 if category not in user_class_category_pattern_type_feature_pattern_info_list_dict[user]:266 user_class_category_pattern_type_feature_pattern_info_list_dict[user][category] = dict()267 user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"] = dict()268 user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"] = dict()269 pattern_pair_info_list = user_feature_pattern_pair_info_list_dict[user][class_label]270 for pattern_pair_info in pattern_pair_info_list:271 fully_pattern = pattern_pair_info[4]272 fully_entropy = pattern_pair_info[-2]273 unfully_pattern = pattern_pair_info[5]274 unfully_entropy = pattern_pair_info[-1]275 if fully_pattern not in user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"]:276 user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"][fully_pattern] = dict()277 if unfully_pattern not in user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"]:278 user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"][unfully_pattern] = dict()279280 user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"][fully_pattern][class_label] = fully_entropy281 user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"][unfully_pattern][class_label] = unfully_entropy282 # Complete to check with manual result283284285 def GetAllTimesFromTimeKeyList(time_key_list):286 all_time_key_list = set()287 for time_key in time_key_list:288 for time in time_key:289 all_time_key_list.add(time)290 return all_time_key_list291292 # Availability Instance Counts293 # class_label = 'class'294 # category = 'Availability'295 class_label = 'class'296 category = 'Availability'297 category_class_count_list = dict()298 for user in user_fully_unfully_time_class_cnt:299 if class_label in user_fully_unfully_time_class_cnt[user]:300 fully_class_count_list = list()301 unfully_class_count_list = list()302 for time_key in user_fully_unfully_time_class_cnt[user][class_label]['fully']:303 time_len = len(time_key)304 class_cnt = user_fully_unfully_time_class_cnt[user][class_label]['fully'][time_key]305 class_cnt_mean = class_cnt # / float(time_len)306 # print class_cnt_mean307 fully_class_count_list.append(class_cnt_mean)308 for time_key in user_fully_unfully_time_class_cnt[user][class_label]['unfully']:309 time_len = len(time_key)310 class_cnt = user_fully_unfully_time_class_cnt[user][class_label]['unfully'][time_key]311 class_cnt_mean = class_cnt # / float(time_len)312 # print class_cnt_mean313 unfully_class_count_list.append(class_cnt_mean)314 fully_class_count_mean = np.mean(fully_class_count_list)315 unfully_class_count_mean = np.mean(unfully_class_count_list)316 all_class_count_mean = (fully_class_count_mean + unfully_class_count_mean)317 # all_class_count_mean = fully_pattern_count318 if category not in category_class_count_list:319 category_class_count_list[category] = list()320 category_class_count_list[category].append((user, class_label, all_class_count_mean))321322 f = open(output_dir + '/sequence_only_result_class.pickle', 'w')323 pickle.dump(user_class_category_pattern_type_feature_pattern_info_list_dict, f)324 f.close()325 f = open(output_dir + '/sequence_only_category_class_count_list.pickle', 'w')326 pickle.dump(category_class_count_list, f)327 f.close()328329 else:330 user_class_category_pattern_type_feature_pattern_info_list_dict = pickle.load(open(output_dir + '/sequence_only_result_class.pickle'))331 category_class_count_list = pickle.load(open(output_dir + '/sequence_only_category_class_count_list.pickle'))332333 category = 'Availability'334 class_top_k_list = [len(category_class_count_list[category])]335 x_label = ["PAS", "A-Only"]336 ind = [1, 2]337 fig, axarr = plt.subplots(1, 1)338 # category = 'Availability'339 category = 'Availability'340 for class_top_k_idx, class_top_k in enumerate(class_top_k_list):341 user_class_cnt_list = category_class_count_list[category] # user_fully_unfully_time_cnt_dict[category]342 user_class_cnt_list = sorted(user_class_cnt_list, key=lambda x:x[2], reverse=True)343 user_class_cnt_list = user_class_cnt_list[:class_top_k]344345 entropy_diff_list = list()346 overall_fully_entropy_list = list()347 overall_unfully_entropy_list = list()348 for user_idx, user_class_count in enumerate(user_class_cnt_list):349 user = user_class_count[0]350 fully_x_list = list()351 fully_y_list = list()352 unfully_x_list = list()353 unfully_y_list = list()354 fully_pattern_time_list = list()355 unfully_pattern_time_list = list()356 for fully_pattern in user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"]:357 fully_left = ast.literal_eval(fully_pattern.split('-')[0])358 fully_timelist = GetTimesFromPattern(fully_left)359 fully_pattern_entropy_list = user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["FULLY"][fully_pattern].values()360 fully_pattern_entropy_mean = np.mean(fully_pattern_entropy_list)361 if fully_timelist not in fully_pattern_time_list:362 fully_x_list.append(ind[0])363 fully_y_list.append(fully_pattern_entropy_mean)364 fully_pattern_time_list.append(fully_timelist)365 for unfully_pattern in user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"]:366 unfully_left = tuple([ast.literal_eval(unfully_pattern_item.split('-')[0])[0] for unfully_pattern_item in ast.literal_eval(unfully_pattern)])367 unfully_timelist = GetTimesFromPattern(unfully_left)368 unfully_pattern_entropy_list = user_class_category_pattern_type_feature_pattern_info_list_dict[user][category]["UNFULLY"][unfully_pattern].values()369 unfully_pattern_entropy_mean = np.mean(unfully_pattern_entropy_list)370 if unfully_timelist not in unfully_pattern_time_list:371 unfully_x_list.append(ind[1])372 unfully_y_list.append(unfully_pattern_entropy_mean)373 unfully_pattern_time_list.append(unfully_timelist)374 # print unfully_timelist375 # print fully_y_list376 # print unfully_y_list377 # print "%s\t%d\t%d\t%d\t%d\t%f" % (user, len(fully_pattern_time_list), len(unfully_pattern_time_list), len(fully_pattern_time_list) + len(unfully_pattern_time_list), user_class_count[2], np.median(fully_y_list) - np.median(unfully_y_list))378 entropy_diff_list.append(np.median(fully_y_list) - np.median(unfully_y_list))379 # overall_fully_entropy_list += fully_y_list380 # overall_unfully_entropy_list += unfully_y_list381 overall_fully_entropy_list.append(np.median(fully_y_list))382 overall_unfully_entropy_list.append(np.median(unfully_y_list))383384 overall_ax = axarr#[class_top_k_idx]385 overall_ax.scatter([ind[0]] * len(overall_fully_entropy_list), overall_fully_entropy_list, facecolors='none', edgecolors='c')386 overall_ax.scatter([ind[1]] * len(overall_unfully_entropy_list), overall_unfully_entropy_list, facecolors='none', edgecolors='c')387 bp = overall_ax.boxplot([overall_fully_entropy_list, overall_unfully_entropy_list], widths=(0.3, 0.3))388 print "User Cnt: %d" % len(user_class_cnt_list)389 print "Consistency in PAS: %f" % bp['medians'][0].get_ydata()[0]390 print "Consistency in A-Only: %f" % bp['medians'][1].get_ydata()[0]391 '''392 print "-----"393 print np.mean(overall_fully_entropy_list) - np.mean(overall_unfully_entropy_list)394 print np.median(overall_fully_entropy_list) - np.median(overall_unfully_entropy_list)395 print np.mean(entropy_diff_list)396 print np.median(entropy_diff_list)397 print "-----"398 print np.median(overall_fully_entropy_list)399 print np.median(overall_unfully_entropy_list)400 print bp['medians'][0].get_ydata()[0]401 print bp['medians'][1].get_ydata()[0]402 print bp['medians'][0].get_ydata()[0] - bp['medians'][1].get_ydata()[0] # Median in boxplot403 '''404 if class_top_k_idx == 0:405 overall_ax.set_ylabel('Consistency')406 # overall_ax.set_xlabel('Top %d Users' % (class_top_k))407 # else:408 # overall_ax.set_xlabel('All Users')409 # overall_ax.text(0.8, 0.8, )410 # result_text = "%f" % (bp['medians'][0].get_ydata()[0] - bp['medians'][1].get_ydata()[0])411 # ax.text(0.5, 0.5, result_text, horizontalalignment='center', verticalalignment='center', transform=ax.transAxes, fontsize=15)412 # ax.set_xticks(ind)413 # overall_ax.xaxis.tick_top()414 overall_ax.yaxis.set_major_formatter(FormatStrFormatter('%.1f'))415 overall_ax.set_xticklabels(x_label)416 overall_ax.margins(0.1, 0.1)417 overall_ax.set_xlabel('1')418 overall_ax.xaxis.label.set_color('white')419 diff_text = "%.3f" % (bp['medians'][0].get_ydata()[0] - bp['medians'][1].get_ydata()[0])420 diff_percen = "%.1f" % ((bp['medians'][0].get_ydata()[0] - bp['medians'][1].get_ydata()[0]) / bp['medians'][1].get_ydata()[0])421422 # overall_ax.text(1.27,0.25, diff_percen, fontsize=7)423 overall_ax.arrow(1.85,0.200,-0.65,0.035)424 fig.set_size_inches(2, 1.5)425 fig.tight_layout()426 # matplotlib.rcParams.update({'font.size': 30})427 plt.savefig(output_dir + "/entropy_distribution.pdf", bbox_inches='tight')428 # plt.show()429430 print len(overall_fully_entropy_list)431 print len(overall_unfully_entropy_list)432 user_class_cnt_list = user_class_cnt_list[:class_top_k]433 print len(user_class_cnt_list)434435 fig, ax = plt.subplots(1)436 ind = np.arange(len(user_class_cnt_list))437438 for x in ind:439 entropy_diff = overall_fully_entropy_list[x] - overall_unfully_entropy_list[x]440 if entropy_diff > 0:441 # ax.plot([x, x], [overall_fully_entropy_list[x], overall_unfully_entropy_list[x]], c='r', linewidth=1.5)442 width = 0.4443 p = patches.Rectangle(444 (x-width/2.0, overall_fully_entropy_list[x]),445 width, overall_unfully_entropy_list[x]-overall_fully_entropy_list[x],446 hatch='////',447 fill=False,448 edgecolor="red"449 )450 ax.add_patch(p)451 else:452 # ax.plot([x, x], [overall_unfully_entropy_list[x], overall_fully_entropy_list[x]], c='b', linewidth=1.5)453 p = patches.Rectangle(454 (x-width/2.0, overall_unfully_entropy_list[x]),455 width, overall_fully_entropy_list[x]-overall_unfully_entropy_list[x],456 fill=False,457 edgecolor="blue"458 )459 ax.add_patch(p)460461 ax.plot(ind, overall_fully_entropy_list, 'ro', c='k', marker='o', markerfacecolor='black', label='1', markersize=5)462 ax.plot(ind, overall_unfully_entropy_list, 'ro', c='k', marker='o', markerfacecolor='None', label='2', markersize=5)463464465 ax.set_xticks(ind)466 ax.set_xticklabels([str(x+1) for x in ind])467 ax.set_xlabel('User Index', labelpad=1)468 ax.set_ylabel('Consistency')469 ax.margins(0.1, 0.1)470 # ax.set_position([0.2,0.2,0.5,0.8])471 lgd = ax.legend(loc='upper center', ncol=2, bbox_to_anchor=(0.5, 1.2), fontsize=7, frameon=False)472 fig.set_size_inches(2, 1.5)473 fig.tight_layout()474 # matplotlib.rcParams.update({'font.size': 30})475 plt.savefig(output_dir + "/entropy_distribution_all_users.pdf", bbox_inches='tight')476477 # plt.show()478479480481482483484485486if __name__ == '__main__':487 entropy_result_path = 'entropy_result'488 output_dir = 'output'
...
evaluate.py
Source:evaluate.py
1import numpy as np2''''3evaluates predictions in a multiclass multilabel setting, i.e. we predict all classes at the same time4and each instance can belong to several classes (gold_annotations)5'''6def evaluate_multiclass_one_vs_all(preds_one_hot, true_labels_one_hot, class_labels):7 # for each class, calculate metrics8 precision = dict()9 recall = dict()10 f = dict()11 stats = dict()12 collector = np.zeros(4)13 for i in range(len(class_labels)):14 class_label = class_labels[i]15 precision[class_label], recall[class_label], f[class_label], stats[class_label] = evaluate_binary(binarize_multi_labels(preds_one_hot, i), binarize_multi_labels(true_labels_one_hot, i))16 for j in range(len(stats[class_label])):17 collector[j] += stats[class_label][j]18 p_avg, r_avg, f_avg = micro_average(collector)19 return {'p':precision, 'r': recall, 'f': f, 'p_avg': p_avg, 'r_avg': r_avg, 'f_avg': f_avg,20 'p_macro_avg': np.mean([precision[c] for c in precision.keys()]),21 'r_macro_avg': np.mean([recall[c] for c in recall.keys()]),22 'f_macro_avg': np.mean([f[c] for c in f.keys()])}23''''24evaluates predictions in a multiclass multilabel setting, i.e. we predict all classes at the same time25and each instance can belong to several classes (gold_annotations)26'''27def evaluate_multiclass(preds_one_hot, true_labels_one_hot, class_labels):28 # for each class, calculate metrics29 precision = dict()30 recall = dict()31 f = dict()32 # 0 tp, 1 fp, 2 fn33 collector = np.zeros((len(class_labels), 4))34 for i in range(len(preds_one_hot)):35 gold = true_labels_one_hot[i]36 gold_classes = [idx for idx in range(len(gold)) if gold[idx] == 1]37 pred = preds_one_hot[i]38 predicted_class = np.argmax(pred)39 for gold_class in gold_classes:40 if predicted_class == gold_class:41 # tp for predicted class42 collector[predicted_class, 0] += 1.43 else:44 # fp for predicted class45 collector[predicted_class, 1] += 1.46 # fn for gold class47 collector[gold_class, 2] += 1.48 for c, class_label in enumerate(class_labels):49 # tp /(tp + fp)50 if (collector[c,0] + collector[c,1]) == 0:51 precision[class_label] = 052 else:53 precision[class_label] = collector[c,0]/(collector[c,0] + collector[c,1])54 # tp /(tp + fn)55 if (collector[c,0] + collector[c,2]) == 0:56 recall[class_label] = 057 else:58 recall[class_label] = collector[c, 0] / (collector[c, 0] + collector[c, 2])59 # 2pr / (p+r)60 if (precision[class_label]+recall[class_label]) == 0:61 f[class_label] = 062 else:63 f[class_label] = 2*precision[class_label]*recall[class_label]/(precision[class_label]+recall[class_label])64 p_avg = np.sum(collector[:,0])/(np.sum(collector[:,0]) + np.sum(collector[:,1]))65 r_avg = np.sum(collector[:, 0]) / (np.sum(collector[:, 0]) + np.sum(collector[:, 2]))66 f_avg = 2*p_avg*r_avg /(p_avg + r_avg)67 return {'p': precision, 'r': recall, 'f': f, 'p_avg': p_avg, 'r_avg': r_avg, 'f_avg': f_avg,68 'p_macro_avg': np.mean([precision[c] for c in precision.keys()]),69 'r_macro_avg': np.mean([recall[c] for c in recall.keys()]),70 'f_macro_avg': np.mean([f[c] for c in f.keys()])}71def binarize_multi_labels(one_hot, target_idx):72 if type(one_hot) == list:73 one_hot= np.array(one_hot)74 return one_hot[:,target_idx]75def generate_random_labels(numLabels, one_hot=False):76 frow = list(np.random.randint(2, size=numLabels))77 if one_hot == False:78 return frow79 else:80 srow = [1 - i for i in frow]81 a = np.zeros((len(frow), 2))82 a[:, 0] = frow83 a[:, 1] = srow84 return a85def evaluate_binary(preds, true_labels):86 tp = 087 fp = 088 tn = 089 fn = 090 for i in range(len(preds)):91 pred = preds[i]92 gold = true_labels[i]93 if pred == 1 and gold == 1:94 tp += 195 elif pred == 0 and gold == 0:96 tn += 197 elif pred == 1 and gold == 0:98 fp += 199 elif pred == 0 and gold == 1:100 fn += 1101 if (tp + fp) > 0:102 prec = float(tp) / (tp + fp)103 else:104 prec = 0105 if (tp + fn) > 0:106 rec = float(tp) / (tp + fn)107 else:108 rec = 0109 if (prec + rec) > 0:110 f = (2*prec*rec)/(prec + rec)111 else:112 f = 0113 return prec, rec, f, np.array([tp, tn, fp, fn])114def count(target, l):115 return len([i for i in l if i == target])116def micro_average(collector):117 tp = collector[0]118 fp = collector[2]119 fn = collector[3]120 p = tp/(float(tp) + fp)121 r = tp/(float(tp) + fn)122 f = (2*p*r)/(p+r)...
classification_metrics.py
Source:classification_metrics.py
1#run command as example2#python classification_metrics.py --n_classes 10 --n_examples 1000 --class_label 0 --seed 423import numpy as np4import argparse5parser = argparse.ArgumentParser(description='Calculating Classification Metrics')6parser.add_argument('--n_classes', type=int, help='Number of classes')7parser.add_argument('--n_examples', type=int, help='Number of examples')8parser.add_argument('--class_label', type=int, help='Class Label')9parser.add_argument('--seed', type=int, help='SEED')10args = parser.parse_args()11class classification_metrics():12 def __init__(self, n_classes, n_examples, class_label, seed):13 self.n_classes = n_classes14 self.n_examples = n_examples15 self.class_label = class_label16 self.seed=seed17 18 def make_data(self):19 20 np.random.seed(self.seed)21 22 classes = np.arange(self.n_classes)23 actual_labels = np.random.choice(classes, self.n_examples)24 predicted_labels = np.random.choice(classes, self.n_examples)25 26 return actual_labels, predicted_labels27 28 def calculate_accuracy(self):29 actual_labels, predicted_labels = self.make_data()30 31 return sum(actual_labels==predicted_labels) / len(actual_labels)32 33 def find_outcome(self,x,y,class_label):34 35 if (x==y):36 if (x==class_label):37 return 'TP'38 else:39 return 'TN'40 else:41 if (x==class_label):42 return 'FN'43 else:44 return 'FP'45 46 def find_confusion_matrix(self,class_label):47 actual_labels, predicted_labels = self.make_data()48 outcomes = np.array(list(map(lambda x,y: self.find_outcome(x,y,class_label), actual_labels, predicted_labels)))49 50 tp = sum(outcomes=='TP')51 tn = sum(outcomes=='TN')52 fp = sum(outcomes=='FP')53 fn = sum(outcomes=='FN')54 55 outcome_dict = {'TP':tp, 'TN':tn, 'FP':fp, 'FN':fn}56 return outcome_dict57 58 def calculate_precision(self, class_label):59 outcome_dict = self.find_confusion_matrix(class_label)60 61 return outcome_dict['TP'] / (outcome_dict['TP'] + outcome_dict['FP'])62 63 def calculate_recall(self, class_label):64 outcome_dict = self.find_confusion_matrix(class_label)65 66 return outcome_dict['TP'] / (outcome_dict['TP'] + outcome_dict['FN'])67 68 def calculate_f1_score(self, class_label):69 prec = self.calculate_precision(class_label)70 rec = self.calculate_recall(class_label)71 72 return 2 * prec * rec / (prec + rec) 73 74 def calculate_balanced_accuracy(self):75 76 classes = np.arange(self.n_classes)77 return np.mean(list(map(lambda x: self.calculate_recall(x), classes)))78 79 80if __name__=='__main__':81 metrics = classification_metrics(args.n_classes, args.n_examples, args.class_label, args.seed)82 83 prec = metrics.calculate_precision(args.class_label).round(4) * 10084 rec = metrics.calculate_recall(args.class_label).round(4) * 10085 f1_score = metrics.calculate_f1_score(args.class_label).round(4) * 10086 acc = metrics.calculate_accuracy().round(4) * 10087 balanced_acc = metrics.calculate_balanced_accuracy().round(4) * 10088 89 print('Precision:', prec, '%')90 print('Recall:', rec, '%')91 print('F1 Score:', f1_score, '%')92 print('Accuracy:', acc, '%')...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!