Best Python code snippet using pytest-benchmark
Obtain_TopK.py
Source:Obtain_TopK.py
1import json2'''3get top k opinion words for each domain with different methods ("ml", "gradient")4'''5def get_top_k(domain="lap", method="ml"):6 if method == 'ml':7 filename = "./results/ans_ml_{}_1_final.json".format(domain)8 num_file = "./results/aspect_ml_{}_number_final.json".format(domain)9 else:10 filename = "./results/ans_gradient_{}_1_final.json".format(domain)11 num_file = "./results/aspect_number_{}_1_gradient_final.json".format(domain)12 with open(filename, 'r') as f:13 ans = json.load(f)14 with open(num_file, 'r') as f:15 aspect_num = json.load(f)16 stop_words_str = "re###able###ask###due###give###etc###tell###also###aaa###won###un###us###can###why###who###then###got###get###ll###but###lo###ki###can###wi###let###ve###his###could###still###about###this###them###so###or###if###would###only###both###been###when###our###as###be###by###he###him###she###her###they###their###your###after###with###there###what###for###at###we###you###is###!###,###,###.###;###:###are###these###those###other###were###on###its###is###was###has###will###my###how###do###does###a###an###am###me###gets###get###the###in###than###it###had###have###from###s###and###since###too###shows###that###to###of###at###itself###from###being###how###what###who###which###where###had###wants###b###c###d###e###f###g###h###i###j###k###l###m###n###o###p###q###r###s###t###u###v###w###x###y###z###-###_###'###\"###[CLS]###[SEP]".split(17 "###")18 stop_words = {}19 for word in stop_words_str:20 stop_words[word] = 121 import numpy as np22 ranking = {}23 average_acc = 0.024 count = 025 opinion_word_num = 10026 fr_to = open("./results/{}_{}_words_list_{}.csv".format(domain, method, opinion_word_num), "w")27 for aspect in ans.keys():28 words = {}29 for word in ans[aspect].keys():30 words[word] = np.sum(ans[aspect][word]) / len(ans[aspect][word])31 # print(words)32 ranking[aspect] = sorted(words.items(), key=lambda item: item[1], reverse=True)33 # print(aspect, ranking[aspect])34 if aspect_num.get(aspect, 0) < 15:35 continue36 fr_to.write(aspect)37 terms = ""38 cnt = 039 total_num = 040 for i in range(len(ranking[aspect])):41 term_value = ranking[aspect][i]42 # if term_value[1]< 2.5:43 # continue44 if term_value[0].startswith("##"):45 continue46 if term_value[0] in stop_words:47 # print(term_value)48 continue49 if total_num >= opinion_word_num:50 break51 total_num += 152 terms += term_value[0] + ": " + str(term_value[1]) + " "53 fr_to.write("," + term_value[0])54 print(aspect, " -> ", terms)55 fr_to.write("\n")56 fr_to.close()57def read_csv(domain='lap'):58 fr_1 = open("./results/{}_{}_words_list_{}.csv".format(domain, 'ml', 100), "r")59 language_mask = {}60 for line in fr_1.readlines():61 data = line.strip().split(",")62 aspect = data[0]63 language_mask[aspect] = set()64 for opinion in data[1:]:65 language_mask[aspect].add(opinion)66 fr_2 = open("./results/{}_{}_words_list_{}_PMI.csv".format(domain, 'colinear', 100), "r")67 PMI_conlinear = {}68 for line in fr_2.readlines():69 data = line.strip().split(",")70 aspect = data[0]71 PMI_conlinear[aspect] = set()72 for opinion in data[1:]:73 PMI_conlinear[aspect].add(opinion)74 fr_to = open("./results/{}_ML_PMI.csv".format(domain), "w")75 for aspect in language_mask.keys():76 if aspect not in PMI_conlinear:77 continue78 jiaoji = language_mask[aspect] & PMI_conlinear[aspect]79 chaji_lm = language_mask[aspect] - jiaoji80 chaji_pmi = PMI_conlinear[aspect] - jiaoji81 print(jiaoji, chaji_lm, chaji_pmi)82 fr_to.write("intersection," + aspect + "," + ",".join(list(jiaoji)) + "\n")83 fr_to.write("MLM," + aspect + "," + ",".join(list(chaji_lm)) + "\n")84 fr_to.write("PMI," + aspect + "," + ",".join(list(chaji_pmi)) + "\n")85 fr_to.write("\n")86 fr_to.close()87import pickle88import math89'''90calculate the score by forward91'''92def load_pkl(domain='res'):93 ans = {}94 with open("./results/aspect_aware_gradient_{}_1_final_all.pkl".format(domain), 'rb') as f:95 data = pickle.load(f)96 # print(data)97 gradient_change_all = {}98 word_value_gradient = {}99 aspect_value_gradient = {}100 aspect_word_value_gradient = {}101 total_value_gradient = 0.0102 foward_change_all = {}103 word_value_forward = {}104 aspect_value_forward = {}105 aspect_word_value_forward = {}106 aspect_word_num_forward = {}107 total_value_forward = 0.0108 word_value_PMI = {}109 aspect_value_PMI = {}110 aspect_word_value_PMI = {}111 total_value_PMI = 0.0112 for i in range(len(data)):113 orignal = data[i]['orignal']114 gradient = data[i]['gradient']115 sentiment = data[i]['sentiment']116 label = orignal['label']117 prob = orignal['prob']118 text = orignal['text']119 aspect = orignal['aspect']120 aspect_index = text.find(' ' + aspect + ' ')121 start_token_index = len(text[:aspect_index].strip().split(" "))122 end_token_index = start_token_index + len(aspect.split(" "))123 text_token = text.split(" ")124 near_words = set(125 text_token[start_token_index - 5: start_token_index] + text_token[end_token_index: end_token_index + 5])126 prob_change = {}127 if aspect not in foward_change_all:128 foward_change_all[aspect] = []129 aspect_word_value_forward[aspect] = {}130 aspect_word_num_forward[aspect] = {}131 if aspect not in aspect_word_value_PMI:132 aspect_word_value_PMI[aspect] = {}133 for tmp in sentiment:134 word = tmp[0]135 if word not in near_words:136 continue137 if word in ['[CLS]', '[SEP]']:138 continue139 label_ = tmp[1]140 prob_ = tmp[2]141 prob_change_value = math.fabs(prob[label] - prob_[label])142 if word not in prob_change:143 prob_change[word] = prob[label] - prob_[label]144 word_value_forward[word] = word_value_forward.get(word, 0.0) + prob_change_value145 aspect_word_value_forward[aspect][word] = aspect_word_value_forward[aspect].get(word,146 0.0) + prob_change_value147 aspect_value_forward[aspect] = aspect_value_forward.get(aspect, 0.0) + prob_change_value148 total_value_forward += prob_change_value149 word_value_PMI[word] = word_value_PMI.get(word, 0.0) + 1150 aspect_word_value_PMI[aspect][word] = aspect_word_value_PMI[aspect].get(word, 0.0) + 1151 aspect_value_PMI[aspect] = aspect_value_PMI.get(aspect, 0.0) + 1152 total_value_PMI += 1153 foward_change_all[aspect].append([word, prob_change_value])154 prob_change = sorted(prob_change.items(), key=lambda item: item[1], reverse=True)155 # if prob_change[0][1] > 0.4:156 # # print(text, aspect, label, prob_change)157 # if aspect not in ans:158 # ans[aspect] = {}159 # for tmp in prob_change:160 # if tmp[1] > 0.4:161 # ans[aspect][tmp[0]] = ans[aspect].get(tmp[0], 0.0) + tmp[1]162 # aspect_word_num_forward[aspect][tmp[0]] = aspect_word_num_forward[aspect].get(tmp[0], 0.0) + 1163 if aspect not in ans:164 ans[aspect] = {}165 for tmp in prob_change:166 ans[aspect][tmp[0]] = ans[aspect].get(tmp[0], 0.0) + tmp[1]167 aspect_word_num_forward[aspect][tmp[0]] = aspect_word_num_forward[aspect].get(tmp[0], 0.0) + 1.0168 if aspect not in gradient_change_all:169 gradient_change_all[aspect] = []170 aspect_word_value_gradient[aspect] = {}171 # gradient_change = {}172 for tmp in gradient:173 # print(tmp)174 word = tmp[0]175 if word not in near_words:176 continue177 if word in ['[CLS]', '[SEP]']:178 continue179 grad = tmp[1]180 word_value_gradient[word] = word_value_gradient.get(word, 0.0) + grad181 aspect_word_value_gradient[aspect][word] = aspect_word_value_gradient[aspect].get(word, 0.0) + grad182 aspect_value_gradient[aspect] = aspect_value_gradient.get(aspect, 0.0) + grad183 total_value_gradient += grad184 # if word not in gradient_change:185 # gradient_change[word] = grad186 gradient_change_all[aspect].append([word, grad])187 # gradient_change = sorted(gradient_change.items(), key=lambda item: item[1], reverse=True)188 # print(text, aspect, gradient_change)189 # gradient_change_all[aspect] = gradient_change190 # Gradient_PMI = {}191 # ans_gradient_PMI = {}192 # for aspect in aspect_word_value_gradient.keys():193 # Gradient_PMI[aspect] = {}194 # max_value = 0195 # min_value = 10000196 # for word in aspect_word_value_gradient[aspect].keys():197 # P_O_A = aspect_word_value_gradient[aspect][word] / aspect_value_gradient[aspect]198 # P_O = word_value_gradient[word] / total_value_gradient199 # PMI = P_O_A / (P_O + 0.001)200 # Gradient_PMI[aspect][word] = PMI201 # if PMI > max_value:202 # max_value = PMI203 # if PMI < min_value:204 # min_value = PMI205 # if max_value == min_value:206 # continue207 # for word in Gradient_PMI[aspect].keys():208 # Gradient_PMI[aspect][word] = (Gradient_PMI[aspect][word] - min_value) / (max_value - min_value) / 2209 # gradient_change = sorted(Gradient_PMI[aspect].items(), key=lambda item: item[1], reverse=True)210 # ans_gradient_PMI[aspect] = gradient_change211 # # print(aspect, gradient_change[:10])212 foward_PMI = {}213 ans_forward_PMI = {}214 for aspect in aspect_word_value_forward.keys():215 foward_PMI[aspect] = {}216 max_value = 0217 min_value = 10000218 for word in aspect_word_value_forward[aspect].keys():219 P_O_A = aspect_word_value_forward[aspect][word] / aspect_value_forward[aspect]220 P_O = word_value_forward[word] / total_value_forward221 PMI = P_O_A / (P_O + 0.001)222 foward_PMI[aspect][word] = PMI223 if PMI > max_value:224 max_value = PMI225 if PMI < min_value:226 min_value = PMI227 if max_value == min_value:228 continue229 for word in foward_PMI[aspect].keys():230 foward_PMI[aspect][word] = (foward_PMI[aspect][word] - min_value) / (max_value - min_value) / 2231 prob_change = sorted(foward_PMI[aspect].items(), key=lambda item: item[1], reverse=True)232 ans_forward_PMI[aspect] = prob_change233 print(aspect, prob_change[:10])234 PMI = {}235 ans_PMI = {}236 for aspect in aspect_word_value_PMI.keys():237 PMI[aspect] = {}238 max_value = 0239 min_value = 10000240 for word in aspect_word_value_PMI[aspect].keys():241 P_O_A = aspect_word_value_PMI[aspect][word] / aspect_value_PMI[aspect]242 P_O = word_value_PMI[word] / total_value_PMI243 PMI_value = P_O_A / (P_O + 0.001)244 PMI[aspect][word] = PMI_value245 if PMI_value > max_value:246 max_value = PMI_value247 if PMI_value < min_value:248 min_value = PMI_value249 if max_value == min_value:250 continue251 for word in PMI[aspect].keys():252 PMI[aspect][word] = (PMI[aspect][word] - min_value) / (max_value - min_value) / 2253 PMI_change = sorted(PMI[aspect].items(), key=lambda item: item[1], reverse=True)254 ans_PMI[aspect] = PMI_change255 # print(aspect, gradient_change[:10])256 ans_forward_avg = {}257 ans_forward_sum = {}258 for aspect in ans.keys():259 ans_forward_avg[aspect] = {}260 ans_forward_sum[aspect] = {}261 for word in ans[aspect].keys():262 ans_forward_avg[aspect][word] = ans[aspect][word] / aspect_word_num_forward[aspect][word]263 ans_forward_sum[aspect][word] = ans[aspect][word]264 aspect_word_list = sorted(ans_forward_avg[aspect].items(), key=lambda item: item[1], reverse=True)265 ans_forward_avg[aspect] = aspect_word_list266 aspect_word_list = sorted(ans_forward_sum[aspect].items(), key=lambda item: item[1], reverse=True)267 ans_forward_sum[aspect] = aspect_word_list268 ans_forward = {}269 for aspect in ans.keys():270 for word in ans[aspect].keys():271 ans[aspect][word] = ans[aspect][word] / aspect_word_num_forward[aspect][word] * (272 math.log(aspect_word_num_forward[aspect][word]) + 1)273 aspect_word_list = sorted(ans[aspect].items(), key=lambda item: item[1], reverse=True)274 ans_forward[aspect] = aspect_word_list275 # print(aspect, aspect_word_list)276 with open("./results/aspect_aware_gradient_{}_word_list_only_forward_1.pkl".format(domain), 'wb') as f:277 pickle.dump(278 {'forward': ans_forward, "forward_PMI": ans_forward_PMI, "PMI": ans_PMI, 'foward_avg': ans_forward_avg,279 "forward_sum": ans_forward_sum}, f)280'''281load cPickle file to csv for forward282'''283def load_word_list(domain='res'):284 stop_words_str = "`###+###]###[###@###%###=###/###`###$###&###*###(###)###?###re###able###ask###due###give###etc###tell###also###aaa###won###un###us###can###why###who###then###got###get###ll###but###lo###ki###can###wi###let###ve###his###could###still###about###this###them###so###or###if###would###only###both###been###when###our###as###be###by###he###him###she###her###they###their###your###after###with###there###what###for###at###we###you###is###!###,###,###.###;###:###are###these###those###other###were###on###its###is###was###has###will###my###how###do###does###a###an###am###me###gets###get###the###in###than###it###had###have###from###s###and###since###too###shows###that###to###of###at###itself###from###being###how###what###who###which###where###had###wants###b###c###d###e###f###g###h###i###j###k###l###m###n###o###p###q###r###s###t###u###v###w###x###y###z###-###_###'###\"###[CLS]###[SEP]".split(285 "###")286 stop_words = {}287 for word in stop_words_str:288 stop_words[word] = 1289 ans_file = open("./results/{}_forward_4.csv".format(domain), 'w', encoding='utf-8')290 with open("./results/aspect_aware_gradient_{}_word_list_only_forward_1.pkl".format(domain), 'rb') as f:291 data = pickle.load(f)292 # ans_gradient_PMI = data['gradient']293 ans_forward = data['forward']294 ans_forward_PMI = data['forward_PMI']295 ans_forward_avg = data['foward_avg']296 ans_forward_sum = data['forward_sum']297 ans_PMI = data['PMI']298 for aspect in ans_forward_PMI.keys():299 ans_file.write("{}\n".format(aspect))300 ans_file.write("Forward_PMI")301 for tmp in ans_forward_PMI[aspect]:302 if tmp[0] in stop_words:303 continue304 ans_file.write(",{}".format(tmp[0]))305 # print(aspect, ans_forward_PMI[aspect][: 10])306 ans_file.write("\n")307 ans_file.write("Forward_avg")308 if aspect in ans_forward_avg:309 for tmp in ans_forward_avg[aspect]:310 if tmp[0] in stop_words:311 continue312 ans_file.write(",{}".format(tmp[0]))313 ans_file.write("\n")314 ans_file.write("Forward_sum")315 if aspect in ans_forward_sum:316 for tmp in ans_forward_sum[aspect]:317 if tmp[0] in stop_words:318 continue319 ans_file.write(",{}".format(tmp[0]))320 ans_file.write("\n")321 ans_file.write("Forward")322 if aspect in ans_forward:323 for tmp in ans_forward[aspect]:324 if tmp[0] in stop_words:325 continue326 ans_file.write(",{}".format(tmp[0]))327 print(aspect, ans_forward[aspect][: 10])328 ans_file.write("\n")329 # ans_file.write("Backward_PMI")330 # if aspect in ans_gradient_PMI:331 # for tmp in ans_gradient_PMI[aspect]:332 # if tmp[0] in stop_words:333 # continue334 # ans_file.write(",{}".format(tmp[0]))335 # print(aspect, ans_gradient_PMI[aspect][: 10])336 # ans_file.write("\n")337 ans_file.write("PMI")338 if aspect in ans_PMI:339 for tmp in ans_PMI[aspect]:340 if tmp[0] in stop_words:341 continue342 ans_file.write(",{}".format(tmp[0]))343 print(aspect, ans_PMI[aspect][: 10])344 ans_file.write("\n\n")345 ans_file.close()346'''347calculate the score by gradient348'''349def load_pkl_gradient(domain='res'):350 with open("./results/aspect_aware_gradient_{}_1_final_all_only_gradient.pkl".format(domain), 'rb') as f:351 data = pickle.load(f)352 print(len(data))353 gradient_change_all = {}354 # word_value_gradient = {}355 # aspect_value_gradient = {}356 # aspect_word_value_gradient = {}357 # total_value_gradient = 0.0358 #359 # word_value_PMI = {}360 # aspect_value_PMI = {}361 # aspect_word_value_PMI = {}362 # total_value_PMI = 0.0363 for i in range(len(data)):364 orignal = data[i]['orignal']365 gradient = data[i]['gradient']366 label = orignal['label']367 prob = orignal['prob']368 text = orignal['text']369 aspect = orignal['aspect']370 aspect_index = text.find(' ' + aspect + ' ')371 start_token_index = len(text[:aspect_index].strip().split(" "))372 end_token_index = start_token_index + len(aspect.split(" "))373 text_token = text.split(" ")374 near_words = set(375 text_token[start_token_index - 5: start_token_index] + text_token[end_token_index: end_token_index + 5])376 # if aspect not in aspect_word_value_PMI:377 # aspect_word_value_PMI[aspect] = {}378 if aspect not in gradient_change_all:379 gradient_change_all[aspect] = []380 # aspect_word_value_gradient[aspect] = {}381 gradient_change = {}382 avg_gradient = 0.0383 max_value = 0384 min_value = 10000385 for tmp in gradient:386 # print(tmp)387 word = tmp[0]388 if word not in near_words:389 continue390 if word in ['[CLS]', '[SEP]']:391 continue392 grad = tmp[1]393 if max_value < grad:394 max_value = grad395 if min_value > grad:396 min_value = grad397 if word not in gradient_change:398 gradient_change[word] = grad399 # gradient_change_all[aspect].append([word, grad])400 avg_gradient /= len(gradient)401 gradient_change = sorted(gradient_change.items(), key=lambda item: item[1], reverse=True)402 # print(text, aspect, gradient_change)403 if max_value == min_value:404 continue405 for tmp in gradient_change:406 # if tmp[1]>avg_gradient*2.5:407 word = tmp[0]408 grad = (tmp[1] - min_value) / (max_value - min_value)409 gradient_change_all[aspect].append([word, grad])410 # word_value_gradient[word] = word_value_gradient.get(word, 0.0) + grad411 # aspect_word_value_gradient[aspect][word] = aspect_word_value_gradient[aspect].get(word, 0.0) + grad412 # aspect_value_gradient[aspect] = aspect_value_gradient.get(aspect, 0.0) + grad413 # total_value_gradient += grad414 # avg_gradient += grad415 del data416 ans_gradient = {}417 gradient = {}418 for aspect in gradient_change_all.keys():419 gradient[aspect] = {}420 word_num = {}421 for tmp in gradient_change_all[aspect]:422 gradient[aspect][tmp[0]] = gradient[aspect].get(tmp[0], 0.0) + tmp[1]423 word_num[tmp[0]] = word_num.get(tmp[0], 0.0) + 1424 # print(aspect, gradient_change_all[aspect][:10])425 for word in gradient[aspect].keys():426 gradient[aspect][word] = gradient[aspect][word] / word_num[word] * (math.log(word_num[word]) + 1)427 gradient_change = sorted(gradient[aspect].items(), key=lambda item: item[1], reverse=True)428 print(aspect, gradient_change[:10])429 ans_gradient[aspect] = gradient_change430 del gradient_change_all431 del gradient432 # Gradient_PMI = {}433 # ans_gradient_PMI = {}434 # for aspect in aspect_word_value_gradient.keys():435 # Gradient_PMI[aspect] = {}436 # max_value = 0437 # min_value = 10000438 # for word in aspect_word_value_gradient[aspect].keys():439 # P_O_A = aspect_word_value_gradient[aspect][word] / aspect_value_gradient[aspect]440 # P_O = word_value_gradient[word] / total_value_gradient441 # PMI = P_O_A / (P_O + 0.001)442 # Gradient_PMI[aspect][word] = PMI443 # if PMI > max_value:444 # max_value = PMI445 # if PMI < min_value:446 # min_value = PMI447 # if max_value == min_value:448 # continue449 # for word in Gradient_PMI[aspect].keys():450 # Gradient_PMI[aspect][word] = (Gradient_PMI[aspect][word] - min_value) / (max_value - min_value) / 2451 # gradient_change = sorted(Gradient_PMI[aspect].items(), key=lambda item: item[1], reverse=True)452 # ans_gradient_PMI[aspect] = gradient_change453 # # print(aspect, gradient_change[:10])454 # del aspect_word_value_gradient455 # del aspect_value_gradient456 # PMI = {}457 # ans_PMI = {}458 # for aspect in aspect_word_value_PMI.keys():459 # PMI[aspect] = {}460 # max_value = 0461 # min_value = 10000462 # for word in aspect_word_value_PMI[aspect].keys():463 # P_O_A = aspect_word_value_PMI[aspect][word] / aspect_value_PMI[aspect]464 # P_O = word_value_PMI[word] / total_value_PMI465 # PMI_value = P_O_A / (P_O + 0.001)466 # PMI[aspect][word] = PMI_value467 # if PMI_value > max_value:468 # max_value = PMI_value469 # if PMI_value < min_value:470 # min_value = PMI_value471 # if max_value == min_value:472 # continue473 # for word in PMI[aspect].keys():474 # PMI[aspect][word] = (PMI[aspect][word] - min_value) / (max_value - min_value) / 2475 # PMI_change = sorted(PMI[aspect].items(), key=lambda item: item[1], reverse=True)476 # ans_PMI[aspect] = PMI_change477 # # print(aspect, gradient_change[:10])478 # del aspect_word_value_PMI479 # del aspect_value_PMI480 with open("./results/aspect_aware_gradient_{}_word_list_only_gradient.pkl".format(domain), 'wb') as f:481 pickle.dump(482 {'gradient': ans_gradient}, f)483def normalization(x):484 ans_tmp = {}485 ans = {}486 for aspect in x.keys():487 max_value = 0488 min_value = 1000.0489 ans_tmp[aspect] = {}490 words = x[aspect]491 for i in range(len(words)):492 if words[i][1] > max_value:493 max_value = words[i][1]494 if words[i][1] < min_value:495 min_value = words[i][1]496 for i in range(len(words)):497 ans_tmp[aspect][words[i][0]] = (words[i][1] - min_value) / (max_value - min_value) / 2498 ans[aspect] = sorted(ans_tmp[aspect].items(), key=lambda item: item[1], reverse=True)499 return ans500'''501from cPickle to csv file for both forward and gradient502'''503def load_word_list_forward_gradient(domain='res'):504 stop_words_str = "told###wont###isnt###without###dont###`###+###]###[###@###%###=###/###`###$###&###*###(###)###?###re###able###ask###due###give###etc###tell###also###aaa###won###un###us###can###why###who###then###got###get###ll###but###lo###ki###can###wi###let###ve###his###could###still###about###this###them###so###or###if###would###only###both###been###when###our###as###be###by###he###him###she###her###they###their###your###after###with###there###what###for###at###we###you###is###!###,###,###.###;###:###are###these###those###other###were###on###its###is###was###has###will###my###how###do###does###a###an###am###me###gets###get###the###in###than###it###had###have###from###s###and###since###too###shows###that###to###of###at###itself###from###being###how###what###who###which###where###had###wants###b###c###d###e###f###g###h###i###j###k###l###m###n###o###p###q###r###s###t###u###v###w###x###y###z###-###_###'###\"###[CLS]###[SEP]".split(505 "###")506 stop_words = {}507 for word in stop_words_str:508 stop_words[word] = 1509 ans_file = open("./results/{}_forward_backward_4.csv".format(domain), 'w', encoding='utf-8')510 ans_final = {}511 with open("./results/aspect_aware_gradient_{}_word_list_only_forward.pkl".format(domain), 'rb') as f_forward, open(512 "./results/aspect_aware_gradient_{}_word_list_only_gradient.pkl".format(domain), 'rb') as f_gradient:513 data_forward = pickle.load(f_forward)514 data_gradient = pickle.load(f_gradient)515 ans_gradient = data_gradient['gradient']516 ans_gradient = normalization(ans_gradient)517 # ans_gradient_PMI = data_gradient['gradient_PMI']518 ans_forward = data_forward['forward']519 ans_forward = normalization(ans_forward)520 ans_forward_PMI = data_forward['forward_PMI']521 ans_PMI = data_forward['PMI']522 for aspect in ans_forward.keys():523 ans_file.write("{}\n".format(aspect))524 ans_file.write("Forward_PMI")525 if aspect in ans_forward_PMI:526 word_num = 0527 for tmp in ans_forward_PMI[aspect]:528 if tmp[0] in stop_words:529 continue530 ans_file.write(",{}".format(tmp[0]))531 word_num += 1532 if word_num >= 500:533 break534 print(aspect, ans_forward_PMI[aspect][: 10])535 ans_file.write("\n")536 ans_file.write("Forward")537 if aspect in ans_forward:538 word_num = 0539 for tmp in ans_forward[aspect]:540 if tmp[0] in stop_words:541 continue542 ans_file.write(",{}".format(tmp[0]))543 word_num += 1544 if word_num >= 500:545 break546 print(aspect, ans_forward[aspect][: 10])547 ans_file.write("\n")548 ans_file.write("Backward")549 if aspect in ans_gradient:550 for tmp in ans_gradient[aspect]:551 if tmp[0] in stop_words:552 continue553 ans_file.write(",{}".format(tmp[0]))554 print(aspect, ans_gradient[aspect][: 10])555 ans_file.write("\n")556 ans_file.write("Forward+Backward")557 if aspect in ans_forward and aspect in ans_gradient:558 word_value_gradient = {}559 words = set()560 for tmp in ans_gradient[aspect]:561 if tmp[0] in stop_words:562 continue563 word_value_gradient[tmp[0]] = tmp[1]564 words.add(tmp[0])565 word_value_forward = {}566 for tmp in ans_forward[aspect]:567 if tmp[0] in stop_words:568 continue569 word_value_forward[tmp[0]] = tmp[1]570 words.add(tmp[0])571 ans_tmp = {}572 for word in words:573 ans_tmp[word] = word_value_forward.get(word, 0.0) + word_value_gradient.get(word, 0.0)574 ans_tmp = sorted(ans_tmp.items(), key=lambda item: item[1], reverse=True)575 if aspect not in ans_final:576 ans_final[aspect] = {}577 for tmp in ans_tmp:578 ans_final[aspect][tmp[0]] = tmp[1]579 ans_file.write(",{}".format(tmp[0]))580 print(aspect, ans_tmp[:10])581 ans_file.write("\n")582 # ans_file.write("Backward_PMI")583 # if aspect in ans_gradient_PMI:584 # for tmp in ans_gradient_PMI[aspect]:585 # if tmp[0] in stop_words:586 # continue587 # ans_file.write(",{}".format(tmp[0]))588 # print(aspect, ans_gradient_PMI[aspect][: 10])589 # ans_file.write("\n")590 #591 # ans_file.write("Forward_PMI+Backward_PMI")592 # if aspect in ans_forward_PMI and aspect in ans_gradient_PMI:593 # word_value_gradient = {}594 # words = set()595 # for tmp in ans_gradient_PMI[aspect]:596 # if tmp[0] in stop_words:597 # continue598 # word_value_gradient[tmp[0]] = tmp[1]599 # words.add(tmp[0])600 #601 # word_value_forward = {}602 # for tmp in ans_forward_PMI[aspect]:603 # if tmp[0] in stop_words:604 # continue605 # word_value_forward[tmp[0]] = tmp[1]606 # words.add(tmp[0])607 # ans_tmp = {}608 # for word in words:609 # ans_tmp[word] = word_value_forward.get(word, 0.0) + word_value_gradient.get(word, 0.0)610 # ans_tmp = sorted(ans_tmp.items(), key=lambda item: item[1], reverse=True)611 # for tmp in ans_tmp:612 # ans_file.write(",{}".format(tmp[0]))613 # print(aspect, ans_tmp[:10])614 # ans_file.write("\n")615 ans_file.write("PMI")616 if aspect in ans_PMI:617 for tmp in ans_PMI[aspect]:618 if tmp[0] in stop_words:619 continue620 ans_file.write(",{}".format(tmp[0]))621 print(aspect, ans_PMI[aspect][: 10])622 ans_file.write("\n\n")623 ans_file.close()624 with open("./results/{}_forward_backward_4_with_score.pkl".format(domain), 'wb') as f:625 pickle.dump(ans_final, f)626def aspect_num(domain='res'):627 with open("./results/aspect_aware_gradient_{}_1_final_all_only_gradient.pkl".format(domain), 'rb') as f:628 data = pickle.load(f)629 ans = {}630 for i in range(len(data)):631 orignal = data[i]['orignal']632 aspect = orignal['aspect']633 ans[aspect] = ans.get(aspect, 0) + 1634 with open("./results/{}_aspect_num.pkl".format(domain), 'wb') as f:635 pickle.dump(ans, f)636if __name__ == '__main__':637 # domain = 'lap'638 # get_top_k(domain=domain, method='ml')639 # domain = 'res'640 # get_top_k(domain=domain, method='ml')641 # read_csv(domain='lap')642 # read_csv(domain='res')643 # load_pkl(domain='res')644 # load_pkl(domain='lap')645 # load_pkl_gradient(domain='res')646 # load_pkl_gradient(domain='lap')647 load_word_list(domain='res')648 load_word_list(domain='lap')649 # load_word_list_forward_gradient(domain='res')650 # load_word_list_forward_gradient(domain='lap')651 # aspect_num(domain='res')...
DiscretePerturbation.py
Source:DiscretePerturbation.py
1#!/usr/bin/env python2# coding: utf-83import os4os.environ["CUDA_VISIBLE_DEVICES"] = "0"5import torch6from pytorch_pretrained_bert import BertTokenizer, BertModel, BertForMaskedLM7import copy8import json9# Load pre-trained model tokenizer (vocabulary)10device = torch.device("cuda", 0)11domain = 'lap'12base_path = './bert_model/{}/lm_pretraining/'.format(domain)13# base_path = './bert_model/sentiment/'14# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')15tokenizer = BertTokenizer.from_pretrained(base_path)16model = BertForMaskedLM.from_pretrained(base_path).to(device)17# tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')18# model = BertForMaskedLM.from_pretrained('bert-base-uncased')19model.eval()20def prediction(text, masked_index_start=3, masked_index_end=7):21# print(text)22 tokenized_text = tokenizer.tokenize(text)23# print(tokenized_text)24 indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)25 tokenized_text_masked = copy.deepcopy(tokenized_text)26 for masked_index in range(masked_index_start, masked_index_end):27 tokenized_text_masked[masked_index] = '[MASK]'28# print(tokenized_text, tokenized_text_masked)29 indexed_tokens_masked = tokenizer.convert_tokens_to_ids(tokenized_text_masked)30 with torch.no_grad():31 tokens_tensor = torch.tensor([indexed_tokens_masked]).to(device)32 predictions = model(tokens_tensor)33 del tokens_tensor34 import math35 ans = []36 for masked_index in range(masked_index_start, masked_index_end):37 predicted_rate = predictions[0, masked_index, indexed_tokens[masked_index]].item()38 ans.append([tokenized_text[masked_index], math.exp(-predicted_rate)])39 return ans40# In[ ]:41def get_aspects(domain="lap"):42 aspects = set()43 if domain == "lap":44 base_path = "./data_asc/14lap_train.json"45 data = json.load(open(base_path, 'r'))46 for tmp in data:47 opi2asp = tmp['Opi2Asp']48 text_tokens = tmp['text_token']49 for o_a in opi2asp:50 aspect = ""51 for x in text_tokens[o_a[3]: o_a[4] + 1]:52 if x.startswith("##"):53 aspect += x[2:]54 else:55 aspect += ' ' + x56 # aspect = [x[2:] if x.startswith("##") else x for x in text_tokens[o_a[3]: o_a[4]+1]]57 aspect = aspect.strip()58 aspects.add(aspect)59 else:60 for year in ['14', '15', '16']:61 base_path = "./data_asc/{}res_train.json".format(year)62 data = json.load(open(base_path, 'r'))63 for tmp in data:64 opi2asp = tmp['Opi2Asp']65 text_tokens = tmp['text_token']66 for o_a in opi2asp:67 aspect = ""68 for x in text_tokens[o_a[3]: o_a[4] + 1]:69 if x.startswith("##"):70 aspect += x[2:]71 else:72 aspect += ' ' + x73 # aspect = [x[2:] if x.startswith("##") else x for x in text_tokens[o_a[3]: o_a[4]+1]]74 aspect = aspect.strip()75 aspects.add(aspect)76 return aspects77def mask_language(domain='res'):78 cut_words = 579 aspects = get_aspects(domain=domain)80 import math81 ans = {}82 aspect_num = {}83 index = 084 n_gram = 185 fr = open("./sentiment/{}/train.tsv".format(domain), 'r')86 line = fr.readline()87 while line.strip() != "":88 if index%100 == 0 and index != 0:89 print(index)90 with open("ans_ml_{}_{}_final.json".format(domain, n_gram), 'w') as f:91 json.dump(ans, f)92 with open("aspect_ml_{}_number_final.json".format(domain), 'w') as f:93 json.dump(aspect_num, f)94 index += 195 line = line.strip().split("\t")96 if len(line) != 2:97 print(line)98 line = fr.readline()99 continue100 text = line[0]101 label = int(line[1])102 tokenized_text = tokenizer.tokenize(text)103 104 tokenized_text = ['[CLS]'] + tokenized_text[: 480] + ['[SEP]']105 indexed_tokens = tokenizer.convert_tokens_to_ids(tokenized_text)106# print(len(tokenized_text), len(indexed_tokens))107# text = " ".join(tokenized_text)108 text = ""109 for x in tokenized_text:110 if x.startswith("##"):111 text += x[2:]112 else:113 text += ' ' + x114 text = text.strip()115 for aspect in aspects:116 if " "+aspect+" " not in text:117 continue118 text_left = text[: text.find(aspect)]119 text_right = text[text.find(aspect)+len(aspect): ]120 text_left_len = len(tokenizer.tokenize(text_left))121 text_right_len = len(tokenizer.tokenize(text_right))122 if text_right_len == 0:123 continue124# print(text, "\n", text_left, "\n", aspect, "\n", text_right)125 masked_index_start = len(tokenizer.tokenize(text_left + aspect))126 if text_right_len < cut_words:127 masked_index_end = masked_index_start + text_right_len128 else:129 masked_index_end = masked_index_start + cut_words130# print(text_left + aspect + text_right)131 try: 132 ans_predict = prediction(text_left + aspect + text_right, masked_index_start=masked_index_start, masked_index_end=masked_index_end)133 except:134 print(text_left, "\n", aspect, "\n", text_right)135 continue136 # print(ans_predict)137 if aspect not in ans:138 ans[aspect] = {}139 aspect_num[aspect] = aspect_num.get(aspect, 0) + 1140 for aspect_replace in list(aspects)[: 60]:141 if aspect_replace == aspect:142 continue143 masked_index_start_replace = len(tokenizer.tokenize(text_left + aspect_replace))144 if text_right_len < cut_words:145 masked_index_end_replace = masked_index_start_replace + text_right_len146 else:147 masked_index_end_replace = masked_index_start_replace + cut_words148 try:149 ans_predict_replace = prediction(text_left + aspect_replace + text_right, masked_index_start=masked_index_start_replace, masked_index_end=masked_index_end_replace)150 except:151 print(text_left, "\n", aspect, "\n", text_right)152 continue153 # print(ans_predict_replace, aspect_replace)154 for i in range(len(ans_predict)):155 if i < len(ans_predict)-1 and ans_predict[i+1][0].startswith("##"):156 word = ans_predict[i][0] + ans_predict[i+1][0].replace("##", "")157 value = ((math.fabs(ans_predict_replace[i][1] - ans_predict[i][1])/ans_predict[i][1])+(math.fabs(ans_predict_replace[i+1][1] - ans_predict[i+1][1])/ans_predict[i+1][1]))/2.0158 i += 1159 else:160 word = ans_predict[i][0]161 value = (math.fabs(ans_predict_replace[i][1] - ans_predict[i][1])/ans_predict[i][1])162 if word not in ans[aspect]:163 ans[aspect][word] = []164 ans[aspect][word].append(value)165 masked_index_end = len(tokenizer.tokenize(text_left))166 if text_left_len < cut_words:167 masked_index_start = masked_index_end - text_left_len168 else:169 masked_index_start = masked_index_end - cut_words170 171 try:172 ans_predict = prediction(text_left + aspect + text_right, masked_index_start=masked_index_start, masked_index_end=masked_index_end)173 except:174 print(text_left, "\n", aspect, "\n", text_right)175 continue176 177 for aspect_replace in list(aspects)[: 60]:178 if aspect_replace == aspect:179 continue180 masked_index_end_replace = len(tokenizer.tokenize(text_left))181 if text_left_len < cut_words:182 masked_index_start_replace = masked_index_end_replace - text_left_len183 else:184 masked_index_start_replace = masked_index_end_replace - cut_words185 try:186 ans_predict_replace = prediction(text_left + aspect_replace + text_right, masked_index_start=masked_index_start_replace, masked_index_end=masked_index_end_replace)187 except:188 print(text_left, "\n", aspect, "\n", text_right)189 continue190 # print(ans_predict_replace, aspect_replace, text_left, aspect_replace, )191 # if len(ans_predict_replace) == 0:192 # continue193 for i in range(len(ans_predict)):194 if i < len(ans_predict)-1 and ans_predict[i+1][0].startswith("##"):195 word = ans_predict[i][0] + ans_predict[i+1][0].replace("##", "")196 value = ((math.fabs(ans_predict_replace[i][1] - ans_predict[i][1])/ans_predict[i][1])+(math.fabs(ans_predict_replace[i+1][1] - ans_predict[i+1][1])/ans_predict[i+1][1]))/2.0197 i += 1198 else:199 word = ans_predict[i][0]200 value = (math.fabs(ans_predict_replace[i][1] - ans_predict[i][1])/ans_predict[i][1])201 if word not in ans[aspect]:202 ans[aspect][word] = []203 ans[aspect][word].append(value)204 205 line = fr.readline()206 fr.close()207 with open("ans_ml_{}_{}_final.json".format(domain, n_gram), 'w') as f:208 json.dump(ans, f)209 with open("aspect_ml_{}_number_final.json".format(domain), 'w') as f:210 json.dump(aspect_num, f)211mask_language(domain=domain)212def get_top_k_lap(filename="ans_ml_lap_1.json", num_file="aspect_ml_number.json"):213 import json214 with open(filename, 'r') as f:215 ans = json.load(f)216 with open(num_file, 'r') as f:217 aspect_num = json.load(f)218 import numpy as np219 ranking = {}220 average_acc = 0.0221 count = 0222 for aspect in ans.keys():223 words = {}224 for word in ans[aspect].keys():225 words[word] = np.sum(ans[aspect][word])/len(ans[aspect][word])226 # print(words)227 ranking[aspect] = sorted(words.items(), key=lambda item: item[1], reverse=True)228 # print(aspect, ranking[aspect])229 if aspect_num.get(aspect, 0) < 15:230 continue231 terms = ""232 cnt = 0233 total_num = 0234 for i in range(len(ranking[aspect])):235 term_value = ranking[aspect][i]236 # if term_value[1]< 2.5:237 # continue238 if i >= 10:239 break240 total_num += 1241 terms += term_value[0] + ": " + str(term_value[1]) + " "242 print("{}/{}".format(cnt, total_num), aspect, " -> ",terms)...
test_aspects.py
Source:test_aspects.py
...21 assert repr(aspect).startswith("<")22def test_aspect_from_components():23 aspect = Aspect([Component_A])24 assert Component_A in aspect25def test_aspect_from_aspect():26 aspect_1 = Aspect([Component_A])27 aspect_2 = Aspect([aspect_1])28 assert Component_A in aspect_229def test_aspect_from_multiple_components_same_dict():30 aspect = Aspect([Component_A, Component_B])31 assert Component_A in aspect32 assert Component_B in aspect33def test_aspect_from_multiple_components_different_dicts():34 aspect = Aspect([Component_A, Component_B])35 assert Component_A in aspect36 assert Component_B in aspect37def test_aspect_from_multiple_aspects():38 aspect_1 = Aspect([Component_A])39 aspect_2 = Aspect([Component_B])40 aspect_3 = Aspect([aspect_1, aspect_2])41 assert Component_A in aspect_342 assert Component_B in aspect_343def test_aspect_from_multiple_components_different_dicts():44 aspect = Aspect([Component_A, Component_B])45 assert Component_A in aspect46 assert Component_B in aspect47def test_aspect_from_mixed_args():48 aspect_1 = Aspect([Component_A])49 aspect_2 = Aspect([Component_B, aspect_1])50 assert Component_A in aspect_251 assert Component_B in aspect_252def test_clashing_args():53 with pytest.raises(ValueError):54 aspect = Aspect([Component_A, Component_A])55def test_clashing_aspects():56 aspect_1 = Aspect([Component_A, Component_B])57 aspect_2 = Aspect([Component_B])58 with pytest.raises(ValueError):59 aspect_3 = Aspect([aspect_1, aspect_2])60def test_create_components():61 aspect = Aspect([Component_A])62 [components_a] = aspect()63 [components_b] = [Component_A()]64 assert components_a.i == components_b.i65def test_create_with_overrides_on_aspect():66 aspect = Aspect(67 [Component_A],68 overrides={69 Component_A: dict(i=1),70 },71 )72 [component] = aspect()73 assert component.i == 174def test_create_with_overrides_on_creation():75 aspect = Aspect([Component_A])76 [component] = aspect(overrides={Component_A: dict(i=1)})77 assert component.i == 178def test_create_with_override_for_missing_component():79 with pytest.raises(ValueError):80 Aspect(81 [Component_A],82 overrides={83 Component_B: dict(i=1),84 },85 )86def test_adding_aspect_to_entity(world):87 aspect = Aspect([Component_A])88 entity = world.create_entity()89 aspect.add(entity)90 world._flush_component_updates()91 assert Component_A in entity92def test_adding_clashing_aspect_to_entity(world):93 aspect = Aspect([Component_A])94 entity = world.create_entity(Component_A())95 with pytest.raises(KeyError):96 aspect.add(entity)97def test_aspect_in_entity(world):98 aspect = Aspect([Component_A])99 entity = world.create_entity()100 aspect.add(entity)101 world._flush_component_updates()102 assert aspect.in_entity(entity)103def test_aspect_not_in_entity(world):104 entity = world.create_entity()105 aspect = Aspect([Component_A])106 assert not aspect.in_entity(entity)107def test_remove_aspect_from_entity(world):108 aspect = Aspect([Component_A])109 entity = world.create_entity(*aspect())110 world._flush_component_updates()111 assert aspect.in_entity(entity)112 components = aspect.remove(entity)113 world._flush_component_updates()114 assert not aspect.in_entity(entity)115 assert Component_A not in entity116 assert len(components) == 1117 assert isinstance(components[0], Component_A)118def test_remove_aspect_from_entity_that_does_not_have_it(world):119 aspect = Aspect([Component_A])120 entity = world.create_entity()121 with pytest.raises(ValueError):122 aspect.remove(entity)123def test_factory_func():124 class Foo:125 pass126 f = factory(lambda:Foo())127 a = f()128 b = f()129 assert isinstance(a, Foo)130 assert isinstance(b, Foo)131 assert a is not b132def test_create_aspect_with_factory_function_defaults(world):133 class Foo:134 pass135 aspect = Aspect([Component_A],136 overrides={137 Component_A: dict(i=factory(lambda:1)),138 },139 )140 [a] = aspect()...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!