Best Python code snippet using stestr_python
modifySequence.py
Source:modifySequence.py
1from utils import get_code_txt2from utils import CONFIG3from utils import create_connection4from utils import check_iloc5import numpy as np6import pymysql7import pandas as pd8import pprint9import os10import ast11# from testRunning import running12def found_dataset(old_path, notebook_id, root_path, origin_code):13 """14 :param old_path:15 :param notebook_id:16 :param root_path:17 :param origin_code:18 :return:19 å¦æè¿è¡æ¶åç°è·¯å¾ä¸å¯¹ï¼æ¾å°éè¦æ¿æ¢çè·¯å¾20 """21 old_root_path = ''22 if '/' not in old_path:23 result = root_path + '/' + old_path24 old_root_path = old_path25 else:26 for index, i in enumerate(old_path.split('/')):27 if index != len(old_path.split('/')) - 1:28 old_root_path = old_root_path + i + '/'29 else:30 if '.' not in i:31 old_root_path = old_root_path + i32 if '/' == old_root_path[-1]:33 old_root_path = old_root_path[0:-1]34 result = root_path35 print('old_root_path', old_root_path)36 print("result", result)37 return origin_code.replace(old_root_path, result)38def running_temp_code(func_def, new_path,count, found=False):39 """40 :param func_def: éè¦è¿è¡ç代ç å符串41 :param new_path: æ¿æ¢è·¯å¾42 :param count: 第å 次è¿è¡äº43 :return: è¿åä¿®æ¹è¿åæè
æåè¿è¡ç代ç 44 è¿è¡ä»£ç 45 """46 try:47 cm = compile(func_def, '<string>', 'exec')48 except Exception as e:49 print("compile fail", e)50 return "compile fail"51 print("\033[0;33;40m" + str(count) +"\033[0m")52 can_run = False53 try:54 namespace = {}55 exec(cm,namespace)56 print("\033[0;32;40msucceed\033[0m")57 can_run = True58 # return 'succeed'59 except Exception as e:60 # traceback.print_exc()61 error_str = str(e)62 new_code = func_def63 foun = 064 # traceback.print_exc()65 # print("\033[0;31;40merror_str\033[0m", error_str)66 # print("\033[0;31;40merror_str\033[0m", error_str)67 if "[Errno 2] No such file or directory: " in error_str:68 error_path = error_str.replace("[Errno 2] No such file or directory: " , "")69 error_path = error_path[1:-1]70 new_code = found_dataset(error_path, 1, new_path, func_def)71 # print('error_path:', error_path)72 foun=173 print('error 1')74 # running(new_code)75 elif "does not exist:" in error_str and '[Errno 2] File ' in error_str:76 error_path = error_str.split(':')[-1].strip()77 error_path = error_path[1:-1]78 new_code = found_dataset(error_path, 1, new_path, func_def)79 # print('error_path:', error_path)80 # print('new_code:', new_code)81 print("\033[0;31;40merror_str\033[0m", error_str)82 print('error 2')83 foun=184 # elif "No module named " in error_str and '_tkinter' not in error_str:85 # package = error_str.replace("No module named ", "")86 # package = package[1:-1]87 # # command = ' pip3 install -i https://pypi.tuna.tsinghua.edu.cn/simple ' + package.split('.')[0]88 # # os.system(command)89 # command = ' pip install -i https://pypi.tuna.tsinghua.edu.cn/simple ' + package.split('.')[0] + ' --trusted-host pypi.tuna.tsinghua.edu.cn'90 # # command = ' pip install ' + package.split('.')[0]91 # os.system(command)92 # print('error 3')93 elif ": No such file or directory" in error_str:94 index1 = error_str.find("'")95 index2 = error_str.find("'", index1+1)96 error_path = error_str[index1+1:index2]97 new_code = found_dataset(error_path, 1, new_path, func_def)98 # print('error_path:', error_path)99 print('error 4')100 elif "Command '['ls'," in error_str:101 index1 = error_str.find('ls')102 # print(index1)103 el_line = error_str[index1+6:]104 # print(el_line)105 right_index = el_line.find('\'')106 error_path = el_line[0:right_index]107 # print('error_path:', error_path)108 new_code = found_dataset(error_path, 1, new_path, func_def)109 # print('new_code:', new_code)110 # print("\033[0;31;40merror_str\033[0m", error_str)111 foun = 1112 print('error 5')113 elif "File b" in error_str:114 index1 = error_str.find("'")115 index2 = error_str.find("'", index1 + 1)116 error_path = error_str[index1 + 1:index2]117 new_code = found_dataset(error_path, 1, new_path, func_def)118 # print('error_path:', error_path)119 print('error 10')120 foun = 1121 print('error 5')122 elif "'DataFrame' object has no attribute 'ix'" in error_str or "'Series' object has no attribute 'ix'" in error_str:123 new_code = func_def.replace('.ix', '.iloc')124 print('error 6')125 elif "'DataFrame' object has no attribute 'sort'" in error_str:126 new_code = func_def.replace('.sort(', '.sort_values(')127 print('error 7')128 elif "dlopen: cannot load any more object with static TLS" in error_str:129 print("\033[0;31;40merror_str\033[0m", error_str)130 return 'break'131 else:132 # print("?")133 # traceback.print_exc()134 print("\033[0;31;40merror_str\033[0m", error_str)135 print('error 8')136 return 'error 8' + error_str137 if count < 7:138 # print(new_code)139 if foun ==1:140 found = True141 code_list = new_code.split('\n')142 res = running_temp_code(new_code, new_path, count + 1,found)143 if res == 'compile fail' or res== 'False':144 return res145 if res[0:7] == 'error 8':146 return res147 # return res148 else:149 print('error 9')150 return "error 8"151 return func_def152def get_operator_code(notebook_id, notebook_code, change_rank, ope_dic, get_min_max=0):153 code_list = notebook_code.split('\n')154 # for index, line in enumerate(code_list):155 # print("\033[0;39;41m" + str(index) + ':' + line + "\033[0m")156 cursor, db = create_connection()157 try:158 walk_logs = np.load('../walklogs/' + str(notebook_id) + '.npy', allow_pickle=True).item()159 except:160 walk_logs = []161 sql = "select operator,data_object_value,data_object from operator where rank=" + str(change_rank) + " and notebook_id=" + str(162 notebook_id)163 cursor.execute(sql)164 sql_res = cursor.fetchall()165 operation = ''166 data_object_value = ''167 est_value = ''168 for row in sql_res:169 operation = row[0]170 data_object_value = row[1]171 dj = row[2]172 # print('operation:', operation)173 # print(len(operation))174 check_result = 0175 if 'iloc' in data_object_value and operation == 'iloc':176 check_result = check_iloc(data_object_value)177 # print(check_result)178 if check_result != 2 and check_result != 0:179 return [[[0,-1]]]180 if operation == '':181 return 'no such operator'182 if data_object_value[0] == '(' and data_object_value[-1] == ')':183 data_object_value = data_object_value[1:-1]184 candidate = []185 if data_object_value == "raw_data[(raw_data.region == 46)].sort_values(by='Date').set_index('Date').drop(columns=['AveragePrice'])":186 data_object_value = "raw_data[raw_data.region==46].sort_values(by='Date').set_index('Date').drop(columns=['AveragePrice'])"187 elif data_object_value == "raw_data[(raw_data.region == 46)].sort_values(by='Date').set_index('Date').drop":188 data_object_value = "raw_data[raw_data.region==46].sort_values(by='Date').set_index('Date').drop"189 # print('data_object_value:', data_object_value)190 temp_data_object_value = data_object_value.replace(' ', '')191 temp_data_object_value1 = data_object_value.replace('(- 1)','-1')192 temp_data_object_value2 = temp_data_object_value1.replace(' ','')193 # print('temp_data_object_value1:',temp_data_object_value1)194 # print('temp_data_object_value2:',temp_data_object_value2)195 if ope_dic[operation]['call_type'] == 0 or ope_dic[operation]['call_type'] == 2 or ope_dic[operation][196 'call_type'] == 4:197 count = 0198 for i in code_list:199 if len(i) > 0:200 if i[0] == '#':201 count += 1202 continue203 # print(operation,i)204 temp_code = i.replace('"','\'')205 temp_code = temp_code.replace(' ','')206 if (data_object_value in i or temp_data_object_value in i207 or data_object_value in i.replace('"','\'') or temp_data_object_value in i.replace('"','\'')208 or data_object_value in temp_code or temp_data_object_value in temp_code or temp_data_object_value1 in i or temp_data_object_value2 in i) and operation in i:209 # print('i:',i)210 if temp_data_object_value in i:211 data_object_value = temp_data_object_value212 if temp_data_object_value1 in i:213 data_object_value = temp_data_object_value1214 if temp_data_object_value2 in i:215 data_object_value = temp_data_object_value2216 candidate.append((i, count))217 count += 1218 elif ope_dic[operation]['call_type'] == 3:219 # print(walk_logs["estiminator_values"])220 if "estiminator_values" in walk_logs:221 if operation in walk_logs['estiminator_values']:222 est_value = walk_logs["estiminator_values"][operation]223 else:224 est_value = ''225 else:226 est_value = ''227 # print('est_value', est_value)228 count = 0229 for i in code_list:230 if len(i) > 0:231 if i[0] == '#':232 count += 1233 continue234 temp_code = i.replace('"', '\'')235 temp_code = temp_code.replace(' ', '')236 if est_value in i and (data_object_value in i or temp_data_object_value in i237 or data_object_value in i.replace('"','\'') or temp_data_object_value in i.replace('"','\'')238 or data_object_value in temp_code or temp_data_object_value in temp_code or temp_data_object_value1 in i or temp_data_object_value2 in i) and (239 'fit_transform' in i or 'transform' in i):240 if temp_data_object_value in i:241 data_object_value = temp_data_object_value242 if temp_data_object_value1 in i:243 data_object_value = temp_data_object_value1244 if temp_data_object_value2 in i:245 data_object_value = temp_data_object_value2246 candidate.append((i, count))247 # print(operation,count)248 elif operation in i and (data_object_value in i or temp_data_object_value in i249 or data_object_value in i.replace('"','\'') or temp_data_object_value in i.replace('"','\'')250 or data_object_value in temp_code or temp_data_object_value in temp_code or temp_data_object_value1 in i or temp_data_object_value2 in i) and (251 'fit_transform' in i or 'transform' in i):252 if temp_data_object_value in i:253 data_object_value = temp_data_object_value254 if temp_data_object_value1 in i:255 data_object_value = temp_data_object_value1256 if temp_data_object_value2 in i:257 data_object_value = temp_data_object_value2258 candidate.append((i, count))259 # print(operation, count)260 if candidate == []:261 if i and (data_object_value in i or temp_data_object_value in i262 or data_object_value in i.replace('"',263 '\'') or temp_data_object_value in i.replace(264 '"', '\'')265 or data_object_value in temp_code or temp_data_object_value in temp_code or temp_data_object_value1 in i or temp_data_object_value2 in i) and (266 'fit_transform' in i or 'transform' in i):267 if temp_data_object_value in i:268 data_object_value = temp_data_object_value269 if temp_data_object_value1 in i:270 data_object_value = temp_data_object_value1271 if temp_data_object_value2 in i:272 data_object_value = temp_data_object_value2273 candidate.append((i, count))274 # print(operation,count)275 count += 1276 elif ope_dic[operation]['call_type'] == 5:277 # print(walk_logs["estiminator_values"])278 count = 0279 for i in code_list:280 if len(i) > 0:281 if i[0] == '#':282 count += 1283 continue284 # print(operation,i)285 temp_code = i.replace('"', '\'')286 temp_code = temp_code.replace(' ', '')287 if (data_object_value in i or temp_data_object_value in i or data_object_value in i.replace('"',288 '\'') or temp_data_object_value in i.replace(289 '"',290 '\'') or data_object_value in temp_code or temp_data_object_value in temp_code or temp_data_object_value1 in i or temp_data_object_value2 in i) and operation in i:291 # print('i:',i)292 if temp_data_object_value in i:293 data_object_value = temp_data_object_value294 if temp_data_object_value1 in i:295 data_object_value = temp_data_object_value1296 if temp_data_object_value2 in i:297 data_object_value = temp_data_object_value2298 candidate.append((i, count))299 count += 1300 # print('???')301 # print(candidate)302 # print('min_max:',get_min_max)303 if get_min_max == 0:304 if len(candidate) > 1:305 if change_rank > 1:306 last_get = get_operator_code(notebook_id, notebook_code, change_rank - 1, ope_dic, get_min_max=1)307 if type(last_get).__name__ != 'str':308 min = last_get[0]309 if last_get[0][0][1] == -1:310 min = last_get[0]311 else:312 min = [(0, 0)]313 print('last_get:', last_get)314 else:315 min = [(0, 0)]316 next_get = get_operator_code(notebook_id, notebook_code, change_rank + 1, ope_dic, get_min_max=2)317 # print(next_get)318 if type(next_get).__name__ == 'str':319 max = [(0, 1000)]320 elif next_get[0][0][1] == -1:321 max = [(0, 1000)]322 else:323 max = next_get[0]324 print('max:', max)325 print('min:', min)326 print('candidate:', candidate)327 temp_candicate = []328 for i in candidate:329 if i[1] >= min[0][1] and i[1] <= max[0][1]:330 temp_candicate.append(i)331 if len(temp_candicate) == 0:332 for i in candidate:333 if i[1] <= max[0][1]:334 temp_candicate.append(i)335 candidate = [temp_candicate[-1]]336 elif len(candidate) == 0:337 return 'no such operator'338 return candidate, operation, data_object_value, est_value,dj339 elif get_min_max == 1:340 # print('1_candidate:', candidate)341 if len(candidate) > 1:342 # print('candidate:', candidate)343 if change_rank > 1:344 last_get = get_operator_code(notebook_id, notebook_code, change_rank - 1, ope_dic, get_min_max=1)345 print('last_get:', last_get)346 if type(last_get).__name__ != 'str':347 min = last_get[0]348 if last_get[0][0][1] == -1:349 min = last_get[0]350 else:351 min = [(0, 0)]352 else:353 min = [(0, 0)]354 temp_candicate = []355 count = 0356 for i in candidate:357 count += 1358 # print('count:', count)359 # print('min:',min)360 # print('type:min:', type(min))361 # print('i[1]:',i[1])362 if i[1] > min[0][1]:363 temp_candicate.append(i)364 # print('len(:',len(temp_candicate))365 if len(temp_candicate) == 0:366 temp_candicate = min367 candidate = [temp_candicate[0]]368 # print('return:', candidate)369 elif len(candidate) == 0:370 return 'no such operator'371 return candidate, operation, data_object_value, est_value,dj372 elif get_min_max == 2:373 # print('2_candidate:', candidate)374 if len(candidate) > 1:375 # print('candidate:', candidate)376 if change_rank > 1:377 last_get = get_operator_code(notebook_id, notebook_code, change_rank + 1, ope_dic, get_min_max=1)378 print('last_get:', last_get)379 if type(last_get).__name__ != 'str':380 max = last_get[0]381 if last_get[0][0][1] == -1:382 max = last_get[0]383 else:384 max = [(0, 1000)]385 else:386 max = [(0, 1000)]387 temp_candicate = []388 for i in candidate:389 if i[1] < max[0][1]:390 temp_candicate.append(i)391 if len(temp_candicate) == 0:392 temp_candicate = max393 candidate = [temp_candicate[-1]]394 elif len(candidate) == 0:395 return 'no such operator'396 return candidate, operation, data_object_value, est_value,dj397def get_result_code(notebook_id, notebook_code, result_rank, get_min_max=0):398 def delete_error_tuple():399 cursor, db = create_connection()400 sql = "select id from result where notebook_id=" + str(notebook_id)401 cursor.execute(sql)402 sql_res = cursor.fetchall()403 count = 1404 need_delete_id = -1405 for row in sql_res:406 if count != result_rank:407 count += 1408 continue409 need_delete_id = row[0]410 break411 if need_delete_id != -1:412 sql = 'delete from result where id=' + str(need_delete_id)413 cursor.execute(sql)414 db.commit()415 code_list = notebook_code.split('\n')416 # for index, line in enumerate(code_list):417 # print("\033[0;35;40m" + str(index) + ':' + line + "\033[0m")418 cursor, db = create_connection()419 sql = "select code from result where notebook_id=" + str(notebook_id)420 cursor.execute(sql)421 sql_res = cursor.fetchall()422 data_object_value = ''423 est_value = ''424 count = 1425 code = ''426 for row in sql_res:427 if count != result_rank:428 count += 1429 continue430 code = row[0]431 break432 if code == '':433 return 'no such result'434 candidate = []435 count = 0436 # print(code)437 for i in code_list:438 if len(i) > 0:439 if i[0] == '#':440 count += 1441 continue442 if code in i.replace(' ',''):443 candidate.append((i, count))444 count += 1445 # print('cadidate:',candidate)446 if candidate == []:447 return 'no such result'448 if get_min_max == 0:449 if len(candidate) > 1:450 # print('candidate:', candidate)451 if result_rank > 1:452 last_get = get_result_code(notebook_id, notebook_code, result_rank - 1, get_min_max=1)453 if last_get != 'no such result':454 min = last_get455 else:456 min = [(0, 0)]457 else:458 min = [(0, 0)]459 next_get = get_result_code(notebook_id, notebook_code, result_rank + 1, get_min_max=2)460 if next_get == 'no such result':461 max = [(0, 1000)]462 else:463 max = next_get464 # print('min:',min)465 # print('max:',max)466 if max[0][1] < min[0][1]:467 delete_error_tuple()468 temp_candicate = []469 for i in candidate:470 if i[1] >= min[0][1]:471 temp_candicate.append(i)472 else:473 temp_candicate = []474 for i in candidate:475 # print(i)476 if i[1] >= min[0][1] and i[1] <= max[0][1]:477 temp_candicate.append(i)478 candidate = [temp_candicate[0]]479 return candidate480 elif get_min_max == 1:481 # print('1_candidate:', candidate)482 if len(candidate) > 1:483 # print('candidate:', candidate)484 if result_rank > 1:485 last_get = get_result_code(notebook_id, notebook_code, result_rank - 1, get_min_max=1)486 if last_get != 'no such result':487 min = last_get488 else:489 min = [(0, 0)]490 else:491 min = [(0, 0)]492 temp_candicate = []493 for i in candidate:494 if i[1] > min[0][1]:495 temp_candicate.append(i)496 if temp_candicate == []:497 return 'no such result'498 candidate = [temp_candicate[0]]499 return candidate500 elif get_min_max == 2:501 # print('2_candidate:', candidate)502 if len(candidate) > 1:503 # print('candidate:', candidate)504 if result_rank > 1:505 last_get = get_result_code(notebook_id, notebook_code, result_rank + 1, get_min_max=1)506 if last_get != 'no such result':507 max = last_get508 else:509 max = [(0, 1000)]510 else:511 max = [(0, 1000)]512 temp_candicate = []513 for i in candidate:514 if i[1] < max[0][1]:515 temp_candicate.append(i)516 if temp_candicate == []:517 return 'no such result'518 candidate = [temp_candicate[-1]]519 return candidate520def changeOperator(notebook_id, change_rank, target_content, notebook_root_path='../notebook/'):521 """522 :param notebook_id:523 :param notebook_root_path:524 :param change_rank:525 :param target_content: {526 operation: '',527 ope_type: 1,528 parameters: [],529 }530 :return:531 """532 ope_dic = eval(CONFIG.get('operators', 'operations'))533 notebook_path = notebook_root_path + str(notebook_id) + '.ipynb'534 notebook_code = get_code_txt(notebook_path)535 # print(notebook_code)536 res = get_operator_code(notebook_id,notebook_code,change_rank,ope_dic)537 if res == 'no such operator':538 return res539 candidate_code_list =res[0]540 operation =res[1]541 data_object_value =res[2]542 est_value =res[3]543 print(candidate_code_list)544 # if len(candidate_code_list) == 1:545 candidate_code = candidate_code_list[0][0]546 line_number = candidate_code_list[0][1]547 print(candidate_code)548 print(line_number)549 need_replace = ''550 data_object = ''551 call_type = ope_dic[operation]['call_type']552 if call_type == 0:553 data_object = data_object_value[0:data_object_value.find(operation)-1]554 # print(candidate_code.find(data_object_value))555 need_code = candidate_code[candidate_code.find(data_object_value):]556 print('need_code:',need_code)557 operation_index = need_code.find(operation)558 code1 = need_code[0:operation_index]559 print("code1:",code1)560 need_code = need_code[operation_index:]561 left_index = need_code.find('(')562 ind = left_index+1563 left_count = 1564 while left_count!=0:565 if need_code[ind] == '(':566 left_count += 1567 elif need_code[ind] == ')':568 left_count -= 1569 ind += 1570 print("need_code:", need_code[0:ind])571 need_replace = code1 + need_code[0:ind]572 elif call_type == 2 or call_type == 4:573 data_object = data_object_value574 need_code_index = candidate_code.find(operation)575 head = need_code_index576 prefix = ''577 if need_code_index > 1:578 if candidate_code[need_code_index-1] == '.':579 head = need_code_index -2580 while candidate_code[head].isalnum():581 head -= 1582 prefix = candidate_code[head+1:need_code_index]583 need_code = candidate_code[need_code_index:]584 left_index = need_code.find('(')585 ind = left_index + 1586 left_count = 1587 while left_count != 0:588 if need_code[ind] == '(':589 left_count += 1590 elif need_code[ind] == ')':591 left_count -= 1592 ind += 1593 need_replace = prefix + need_code[0:ind]594 elif call_type == 3:595 if operation in candidate_code:596 head = candidate_code.find(operation)597 elif est_value in candidate_code:598 head = candidate_code.find(operation)599 else:600 return 'no estiminator'601 need_code = candidate_code[head:]602 if 'fit_transform' in candidate_code:603 fit_index = need_code.find('fit_transform')604 elif 'transform' in candidate_code:605 fit_index = need_code.find('transform')606 else:607 return 'no transform function'608 prefix = need_code[0:fit_index]609 need_code = need_code[fit_index:]610 left_index = need_code.find('(')611 ind = left_index + 1612 left_count = 1613 while left_count != 0:614 if need_code[ind] == '(':615 left_count += 1616 elif need_code[ind] == ')':617 left_count -= 1618 ind += 1619 need_replace = prefix + need_code[0:ind]620 data_object = data_object_value621 if 'data_object' in target_content.keys():622 if target_content['data_object'] != '':623 data_object = target_content['data_object']624 if ('+' in data_object or '-' in data_object or '*' in data_object or '/' in data_object) \625 and not (data_object[0] == '(' and data_object[-1] == ')'):626 data_object = '(' + data_object + ')'627 if need_replace != '' and data_object != '':628 param_code = ''629 for index,param in enumerate(target_content['parameters']):630 param_code += str(param)631 if index != len(target_content['parameters'])-1:632 param_code += ','633 if target_content['ope_type'] == 0:634 new_code_line = data_object + '.' + target_content['operation'] + '(' + param_code + ')'635 package_code = 'import pandas as pd\n'636 elif target_content['ope_type'] == 2:637 if param_code != '':638 new_code_line = 'pd.' + target_content['operation'] + '(' + data_object + ',' + param_code + ')'639 else:640 new_code_line = 'pd.' + target_content['operation'] + '(' + data_object + ')'641 package_code = 'import pandas as pd\n'642 elif target_content['ope_type'] == 3:643 new_code_line = target_content['operation'] + '(' + param_code + ')' + '.' + 'fit_transform(' + data_object +')'644 if target_content['operation'] == 'SimpleImputer':645 package_code = 'from sklearn.impute import SimpleImputer\n'646 elif target_content['operation'] == 'PCA':647 package_code = 'from sklearn.decomposition import PCA\n'648 else:649 package_code = 'from sklearn.preprocessing import ' + target_content['operation'] + '\n'650 # param_code += 'from sklearn.preprocessing import OneHotEncoder\n'651 # param_code += 'from sklearn.preprocessing import LabelEncoder\n'652 # param_code += 'from sklearn.preprocessing import LabelBinarizer\n'653 # param_code += 'from sklearn.preprocessing import StandardScaler\n'654 # param_code += 'from sklearn.preprocessing import MinMaxScaler\n'655 # param_code += 'from sklearn.preprocessing import RobustScaler\n'656 # param_code += 'from sklearn.preprocessing import Normalizer\n'657 #658 elif target_content['ope_type'] == 4:659 if target_content['operation'] == 'boxcox' or target_content['operation'] == 'boxcox1p':660 package_code = 'from scipy.stats import boxcox\n'661 package_code += 'from scipy.special import boxcox1p\n'662 if param_code != '':663 new_code_line =target_content['operation'] + '(' + data_object + ',' + param_code + ')'664 else:665 new_code_line =target_content['operation'] + '(' + data_object + ')'666 elif target_content['operation'] == 'l2_normalize':667 prefix = 'tf.nn.'668 if param_code != '':669 new_code_line =prefix + target_content['operation'] + '(' + data_object + ',' + param_code + ')'670 else:671 new_code_line =prefix + target_content['operation'] + '(' + data_object + ')'672 package_code = 'import tensorflow as tf'673 else:674 package_code = 'import numpy as np\n'675 alias = 'np'676 if param_code != '':677 new_code_line = alias + '.' + target_content['operation'] + '(' + data_object + ',' + param_code + ')'678 else:679 new_code_line = alias + '.' + target_content[680 'operation'] + '(' + data_object + ')'681 new_code = ''682 code_list = notebook_code.split('\n')683 replaced_line = candidate_code.replace(need_replace,new_code_line)684 for index,line in enumerate(code_list):685 if index != line_number:686 new_code += line687 new_code += '\n'688 else:689 new_code += replaced_line690 new_code += '\n'691 new_code = package_code + new_code692 print('need_replace:', need_replace)693 print('new_code:', new_code_line)694 return new_code695 else:696 return notebook_code697 # else:698 # return notebook_code699def deleteOperator(notebook_id, change_rank, notebook_root_path='../notebook/'):700 ope_dic = eval(CONFIG.get('operators', 'operations'))701 notebook_path = notebook_root_path + str(notebook_id) + '.ipynb'702 notebook_code = get_code_txt(notebook_path)703 code_list = notebook_code.split('\n')704 for index, line in enumerate(code_list):705 print("\033[0;35;40m" + str(index) + ':' + line + "\033[0m")706 res = get_operator_code(notebook_id, notebook_code, change_rank, ope_dic)707 if res == 'no such operator':708 return res709 candidate_code_list =res[0]710 print(candidate_code_list)711 line_number = candidate_code_list[0][1]712 new_code = ''713 code_list = notebook_code.split('\n')714 for index, line in enumerate(code_list):715 if index != line_number:716 new_code += line717 new_code += '\n'718 return new_code719def get_seq_from_rank(seq, notebook_id, padding=50):720 list = seq.split(',')721 seq_list = []722 ope_dic = eval(CONFIG.get('operators', 'operations'))723 for rank in list:724 sql = 'select operator from operator where notebook_id='+str(notebook_id) + ' and rank='+str(rank)725 cursor, db = create_connection()726 cursor.execute(sql)727 sql_res = cursor.fetchall()728 operator =''729 for row in sql_res:730 operator=row[0]731 break732 one_hot_list = list(np.zeros((27,)))733 one_hot_list[ope_dic[operator]['index']-1] = 1734 seq_list.append(one_hot_list)735 len_seq = len(seq_list)736 for i in range(len_seq,padding):737 seq_list.append(list(np.zeros((27,))))738 seq_list=np.array(seq_list)739 return seq_list740def get_origin_data(notebook_id,notebook_root='../spider/notebook',dataset_root_path='../spider/unzip_dataset'):741 cursor, db = create_connection()742 sql = 'select dataset.dataSourceUrl from dataset,notebook,pair where dataset.id=pair.did and notebook.id=pair.nid and notebook.id=' + str(743 notebook_id)744 cursor.execute(sql)745 sql_res = cursor.fetchall()746 file_list = []747 for row in sql_res:748 temp = "/" + row[0].split('/')[-1] + '.zip'749 file_list.append(temp)750 # break751 try:752 ct = get_code_txt(notebook_root + '/' + str(notebook_id) + '.ipynb')753 except:754 return 'no such notebook'755 code_list = ct.split('\n')756 find_fail = True757 print(file_list)758 for dataset_p in file_list:759 dataset_root_path += dataset_p760 dataset_root_path += '/'761 if not os.path.exists(dataset_root_path):762 return 'no such dataset'763 for code_txt in code_list:764 # print(code_txt)765 if 'read_csv(' in code_txt:766 r_node = ast.parse(code_txt.strip())767 print(code_txt)768 try:769 print(type(r_node.body[0].value.args[0]))770 if type(r_node.body[0].value.args[0]).__name__ == 'Str':771 file_path = r_node.body[0].value.args[0].s772 file_name = file_path.split('/')[-1]773 elif type(r_node.body[0].value.args[0]).__name__ == 'Name':774 file_path = r_node.body[0].value.args[0].id775 file_name = file_path.split('/')[-1]776 else:777 fl = os.listdir(dataset_root_path)778 file_name = fl[0]779 except:780 fl = os.listdir(dataset_root_path)781 file_name = fl[0]782 file_path = dataset_root_path + file_name783 try:784 origin_df = pd.read_csv(file_path)785 except Exception as e:786 print(e)787 find_fail = False788 if find_fail == True:789 break790 else:791 continue792 elif 'read_pickle(' in code_txt:793 r_node = ast.parse(code_txt)794 file_path = r_node.body[0].value.args[0].s795 file_name = file_path.split('/')[-1]796 file_path = dataset_root_path + file_name797 origin_df = pd.read_pickle(file_path)798 try:799 origin_df = pd.read_csv(file_path)800 except Exception as e:801 print(e)802 find_fail = False803 if find_fail == True:804 break805 else:806 continue807 elif 'read_fwf(' in code_txt:808 r_node = ast.parse(code_txt)809 file_path = r_node.body[0].value.args[0].s810 file_name = file_path.split('/')[-1]811 file_path = dataset_root_path + file_name812 origin_df = pd.read_fwf(file_path)813 try:814 origin_df = pd.read_csv(file_path)815 except Exception as e:816 print(e)817 find_fail = False818 if find_fail == True:819 break820 else:821 continue822 elif 'read_clipboard(' in code_txt:823 r_node = ast.parse(code_txt)824 file_path = r_node.body[0].value.args[0].s825 file_name = file_path.split('/')[-1]826 file_path = dataset_root_path + file_name827 origin_df = pd.read_clipboard(file_path)828 try:829 origin_df = pd.read_csv(file_path)830 except Exception as e:831 print(e)832 find_fail = False833 if find_fail == True:834 break835 else:836 continue837 # elif 'read_json(' in code_txt:838 # r_node = ast.parse(code_txt)839 # for arg in r_node.body[0].value.args:840 # file_path = r_node.body[0].value.args[0].s841 # file_name = file_path.split('/')[-1]842 # file_path = dataset_root_path + file_name843 # origin_df = pd.read_json(file_path)844 #845 # try:846 # origin_df = pd.read_csv(file_path)847 # except Exception as e:848 # print(e)849 # find_fail = False850 #851 # if find_fail == True:852 # break853 # else:854 # continue855 elif 'json_normalize(' in code_txt:856 r_node = ast.parse(code_txt)857 file_path = r_node.body[0].value.args[0].s858 file_name = file_path.split('/')[-1]859 file_path = dataset_root_path + file_name860 origin_df = pd.json_normalize(file_path)861 try:862 origin_df = pd.read_csv(file_path)863 except Exception as e:864 print(e)865 find_fail = False866 if find_fail == True:867 break868 else:869 continue870 elif 'read_html(' in code_txt:871 r_node = ast.parse(code_txt)872 file_path = r_node.body[0].value.args[0].s873 file_name = file_path.split('/')[-1]874 file_path = dataset_root_path + file_name875 origin_df = pd.read_html(file_path)876 try:877 origin_df = pd.read_csv(file_path)878 except Exception as e:879 print(e)880 find_fail = False881 if find_fail == True:882 break883 else:884 continue885 elif 'read_hdf(' in code_txt:886 r_node = ast.parse(code_txt)887 file_path = r_node.body[0].value.args[0].s888 file_name = file_path.split('/')[-1]889 file_path = dataset_root_path + file_name890 origin_df = pd.read_hdf(file_path)891 try:892 origin_df = pd.read_csv(file_path)893 except Exception as e:894 print(e)895 find_fail = False896 if find_fail == True:897 break898 else:899 continue900 elif 'read_feather(' in code_txt:901 r_node = ast.parse(code_txt)902 file_path = r_node.body[0].value.args[0].s903 file_name = file_path.split('/')[-1]904 file_path = dataset_root_path + file_name905 origin_df = pd.read_feather(file_path)906 try:907 origin_df = pd.read_csv(file_path)908 except Exception as e:909 print(e)910 find_fail = False911 if find_fail == True:912 break913 else:914 continue915 elif 'read_parquet(' in code_txt:916 r_node = ast.parse(code_txt)917 file_path = r_node.body[0].value.args[0].s918 file_name = file_path.split('/')[-1]919 file_path = dataset_root_path + file_name920 origin_df = pd.read_parquet(file_path)921 try:922 origin_df = pd.read_csv(file_path)923 except Exception as e:924 print(e)925 find_fail = False926 if find_fail == True:927 break928 else:929 continue930 elif 'read_orc(' in code_txt:931 r_node = ast.parse(code_txt)932 file_path = r_node.body[0].value.args[0].s933 file_name = file_path.split('/')[-1]934 file_path = dataset_root_path + file_name935 origin_df = pd.read_orc(file_path)936 try:937 origin_df = pd.read_csv(file_path)938 except Exception as e:939 print(e)940 find_fail = False941 if find_fail == True:942 break943 else:944 continue945 elif 'read_sas(' in code_txt:946 r_node = ast.parse(code_txt)947 file_path = r_node.body[0].value.args[0].s948 file_name = file_path.split('/')[-1]949 file_path = dataset_root_path + file_name950 origin_df = pd.read_sas(file_path)951 try:952 origin_df = pd.read_csv(file_path)953 except Exception as e:954 print(e)955 find_fail = False956 if find_fail == True:957 break958 else:959 continue960 elif 'read_spss(' in code_txt:961 r_node = ast.parse(code_txt)962 file_path = r_node.body[0].value.args[0].s963 file_name = file_path.split('/')[-1]964 file_path = dataset_root_path + file_name965 origin_df = pd.read_spss(file_path)966 try:967 origin_df = pd.read_csv(file_path)968 except Exception as e:969 print(e)970 find_fail = False971 if find_fail == True:972 break973 else:974 continue975 elif 'read_sql_table(' in code_txt:976 r_node = ast.parse(code_txt)977 file_path = r_node.body[0].value.args[0].s978 file_name = file_path.split('/')[-1]979 file_path = dataset_root_path + file_name980 origin_df = pd.read_sql_table(file_path)981 try:982 origin_df = pd.read_csv(file_path)983 except Exception as e:984 print(e)985 find_fail = False986 if find_fail == True:987 break988 else:989 continue990 # elif 'read_sql_query(' in code_txt:991 # r_node = ast.parse(code_txt)992 # file_path = r_node.body[0].value.args[0].s993 # file_name = file_path.split('/')[-1]994 # file_path = dataset_root_path + file_name995 # origin_df = pd.read_sql_query(file_path)996 #997 # try:998 # origin_df = pd.read_csv(file_path)999 # except Exception as e:1000 # print(e)1001 # find_fail = False1002 #1003 # if find_fail == True:1004 # break1005 # else:1006 # continue1007 elif 'read_gbq(' in code_txt:1008 r_node = ast.parse(code_txt)1009 file_path = r_node.body[0].value.args[0].s1010 file_name = file_path.split('/')[-1]1011 file_path = dataset_root_path + file_name1012 origin_df = pd.read_gbq(file_path)1013 try:1014 origin_df = pd.read_csv(file_path)1015 except Exception as e:1016 print(e)1017 find_fail = False1018 if find_fail == True:1019 break1020 else:1021 continue1022 elif 'read_stata(' in code_txt:1023 r_node = ast.parse(code_txt)1024 file_path = r_node.body[0].value.args[0].s1025 file_name = file_path.split('/')[-1]1026 file_path = dataset_root_path + file_name1027 origin_df = pd.read_stata(file_path)1028 try:1029 origin_df = pd.read_csv(file_path)1030 except Exception as e:1031 print(e)1032 find_fail = False1033 if find_fail == True:1034 break1035 else:1036 continue1037 elif 'open(' in code_txt:1038 index = code_txt.find('open(')1039 if index != 0:1040 if code_txt[index-1] == '.':1041 continue1042 try:1043 r_node = ast.parse(code_txt.strip())1044 except:1045 continue1046 print(code_txt)1047 try:1048 print(type(r_node.body[0].value.args[0]))1049 if type(r_node.body[0].value.args[0]).__name__ == 'Str':1050 file_path = r_node.body[0].value.args[0].s1051 file_name = file_path.split('/')[-1]1052 elif type(r_node.body[0].value.args[0]).__name__ == 'Name':1053 file_path = r_node.body[0].value.args[0].id1054 file_name = file_path.split('/')[-1]1055 else:1056 fl = os.listdir(dataset_root_path)1057 file_name = fl[0]1058 except:1059 fl = os.listdir(dataset_root_path)1060 file_name = fl[0]1061 file_path = dataset_root_path + file_name1062 if '.csv' in file_name:1063 try:1064 origin_df = pd.read_csv(file_path)1065 except Exception as e:1066 print(e)1067 find_fail = False1068 if find_fail == True:1069 break1070 else:1071 continue1072 else:1073 # print('no such df')1074 origin_df = 'no such df'1075 if type(origin_df).__name__ == 'str':1076 print('no origin df')1077 return 'no origin df'1078 else:1079 dtypes = origin_df.dtypes1080 origin_num_df_list = []1081 origin_cat_df_list = []1082 origin_column_info = {}1083 for i in range(len(dtypes)):1084 if str(dtypes.values[i]) == 'int64' or str(dtypes.values[i]) == 'float64' or str(1085 dtypes.values[i]) == 'int32' \1086 or str(dtypes.values[i]) == 'float32' or str(dtypes.values[i]) == 'int' or str(1087 dtypes.values[i]) == 'float':1088 origin_num_df_list.append(dtypes.index[i])1089 elif str(dtypes.values[i]) == 'str' or str(dtypes.values[i]) == 'Category':1090 origin_cat_df_list.append(dtypes.index[i])1091 origin_column_info[i] = {}1092 origin_column_info[i]['col_name'] = dtypes.index[i]1093 origin_column_info[i]['dtype'] = str(dtypes.values[i])1094 origin_column_info[i]['content'] = origin_df[dtypes.index[i]].values1095 origin_column_info[i]['length'] = len(origin_df[dtypes.index[i]].values)1096 origin_column_info[i]['null_ratio'] = origin_df[dtypes.index[i]].isnull().sum() / len(1097 origin_df[dtypes.index[i]].values)1098 origin_column_info[i]['ctype'] = 1 if str(dtypes.values[i]) == 'int64' or str(1099 dtypes.values[i]) == 'float64' or str(dtypes.values[i]) == 'int32' \1100 or str(dtypes.values[i]) == 'float32' or str(1101 dtypes.values[i]) == 'int' or str(dtypes.values[i]) == 'float' else 21102 origin_column_info[i]['nunique'] = origin_df[dtypes.index[i]].nunique()1103 origin_column_info[i]['nunique_ratio'] = origin_df[dtypes.index[i]].nunique() / len(1104 origin_df[dtypes.index[i]].values)1105 # pprint.pprint(column_info[0])1106 for column in origin_column_info:1107 if origin_column_info[column]['ctype'] == 1: # å¦ææ¯æ°åå1108 origin_column_info[column]['mean'] = origin_df[origin_column_info[column]['col_name']].describe()[1109 'mean']1110 origin_column_info[column]['std'] = origin_df[origin_column_info[column]['col_name']].describe()[1111 'std']1112 origin_column_info[column]['min'] = origin_df[origin_column_info[column]['col_name']].describe()[1113 'min']1114 origin_column_info[column]['25%'] = origin_df[origin_column_info[column]['col_name']].describe()[1115 '25%']1116 origin_column_info[column]['50%'] = origin_df[origin_column_info[column]['col_name']].describe()[1117 '50%']1118 origin_column_info[column]['75%'] = origin_df[origin_column_info[column]['col_name']].describe()[1119 '75%']1120 origin_column_info[column]['max'] = origin_df[origin_column_info[column]['col_name']].describe()[1121 'max']1122 origin_column_info[column]['median'] = origin_df[origin_column_info[column]['col_name']].median()1123 if len(origin_df[origin_column_info[column]['col_name']].mode()) == 0:1124 origin_column_info[column]['mode'] = 'NAN'1125 else:1126 origin_column_info[column]['mode'] = origin_df[origin_column_info[column]['col_name']].mode().iloc[0]1127 origin_column_info[column]['mode_ratio'] = \1128 origin_df[origin_column_info[column]['col_name']].astype('category').describe().iloc[3] / \1129 origin_column_info[column][1130 'length']1131 origin_column_info[column]['sum'] = origin_df[origin_column_info[column]['col_name']].sum()1132 origin_column_info[column]['skew'] = origin_df[origin_column_info[column]['col_name']].skew()1133 origin_column_info[column]['kurt'] = origin_df[origin_column_info[column]['col_name']].kurt()1134 elif origin_column_info[column]['ctype'] == 2: # categoryå1135 origin_column_info[column]['nunique'] = origin_df[origin_column_info[column]['col_name']].nunique()1136 origin_column_info[column]['unique'] = origin_df[origin_column_info[column]['col_name']].unique()1137 for item in origin_df[origin_column_info[column]['col_name']].unique():1138 # print(item)1139 temp = 01140 for va in origin_df[origin_column_info[column]['col_name']].values:1141 if va == item:1142 temp += 11143 origin_column_info[column][item] = temp1144 # print('origin_column_info')1145 # pprint.pprint(origin_column_info)1146 break1147 return origin_column_info1148def sampling(action, notebook_id, result_id, notebook_root='../spider/notebook',dataset_root='../unzip_dataset',T=True):1149 """1150 :param s: s[0] = dataframe input to model, s[1] = sequence tensor, s[2] = model_id1151 :param action: action = operator_name1152 :return: r = [-1:1], s1 = new state1153 """1154 cursor, db = create_connection()1155 # walk_logs = np.load('../walklogs/' + str(notebook_id) + '.npy', allow_pickle=True).item()1156 sql = 'select content,sequence,model_type from result where id='+str(result_id)1157 cursor.execute(sql)1158 sql_res = cursor.fetchall()1159 model_id_dic=np.load('./model_dic.npy',allow_pickle=True).item()1160 seq = ''1161 score = 01162 model_type = ''1163 for row in sql_res:1164 seq = row[1]1165 score = row[0]1166 model_type = row[2]1167 print(model_type)1168 if model_type not in model_id_dic:1169 print('useless result')1170 return 'useless result'1171 #####get input data of model #######1172 num_df_list = []1173 cat_df_list = []1174 column_info = {}1175 file_list = os.listdir('../predf/'+ str(notebook_id) + '/')1176 if str(result_id) + '.csv' in file_list:1177 s_df = pd.read_csv('../predf/'+ str(notebook_id) + '/' + str(result_id) + '.csv')1178 dtypes = s_df.dtypes1179 for i in range(len(dtypes)):1180 if str(dtypes.values[i]) == 'int64' or str(dtypes.values[i]) == 'float64' or str(dtypes.values[i]) == 'int32' \1181 or str(dtypes.values[i]) == 'float32' or str(dtypes.values[i]) == 'int' or str(dtypes.values[i]) == 'float':1182 num_df_list.append(dtypes.index[i])1183 elif dtypes.values[i] == 'str' or dtypes.values[i] == 'Category':1184 cat_df_list.append(dtypes.index[i])1185 column_info[i] = {}1186 column_info[i]['col_name'] = dtypes.index[i]1187 column_info[i]['dtype'] = str(dtypes.values[i])1188 column_info[i]['content'] = s_df[dtypes.index[i]].values1189 column_info[i]['length'] = len(s_df[dtypes.index[i]].values)1190 column_info[i]['null_ratio'] = s_df[dtypes.index[i]].isnull().sum()/len(s_df[dtypes.index[i]].values)1191 column_info[i]['ctype'] = 1 if str(dtypes.values[i]) == 'int64' or str(dtypes.values[i]) == 'float64' or str(dtypes.values[i]) == 'int32' \1192 or str(dtypes.values[i]) == 'float32' or str(dtypes.values[i]) == 'int' or str(dtypes.values[i]) == 'float' else 21193 column_info[i]['nunique'] = s_df[dtypes.index[i]].nunique()1194 column_info[i]['nunique_ratio'] = s_df[dtypes.index[i]].nunique()/len(s_df[dtypes.index[i]].values)1195 # pprint.pprint(column_info[0])1196 for column in column_info:1197 column_feature = []1198 if column_info[column]['ctype'] == 1: #å¦ææ¯æ°åå1199 column_info[column]['mean'] = s_df[column_info[column]['col_name']].describe()['mean']1200 column_info[column]['std'] = s_df[column_info[column]['col_name']].describe()['std']1201 column_info[column]['min'] = s_df[column_info[column]['col_name']].describe()['min']1202 column_info[column]['25%'] = s_df[column_info[column]['col_name']].describe()['25%']1203 column_info[column]['50%'] = s_df[column_info[column]['col_name']].describe()['50%']1204 column_info[column]['75%'] = s_df[column_info[column]['col_name']].describe()['75%']1205 column_info[column]['max'] = s_df[column_info[column]['col_name']].describe()['max']1206 column_info[column]['median'] = s_df[column_info[column]['col_name']].median()1207 column_info[column]['mode'] = s_df[column_info[column]['col_name']].mode().iloc[0]1208 column_info[column]['mode_ratio'] = s_df[column_info[column]['col_name']].astype('category').describe().iloc[3]/column_info[column]['length']1209 column_info[column]['sum'] = s_df[column_info[column]['col_name']].sum()1210 column_info[column]['skew'] = s_df[column_info[column]['col_name']].skew()1211 column_info[column]['kurt'] = s_df[column_info[column]['col_name']].kurt()1212 elif column_info[column]['ctype']==2: #categoryå1213 column_info[i]['mean'] = 01214 column_info[i]['std'] = 01215 column_info[i]['min'] = 01216 column_info[i]['25%'] = 01217 column_info[i]['50%'] = 01218 column_info[i]['75%'] = 01219 column_info[i]['max'] = 01220 column_info[i]['median'] = 01221 column_info[i]['mode'] = 01222 column_info[i]['mode_ratio'] = 01223 column_info[i]['sum'] = 01224 column_info[i]['skew'] = 01225 column_info[i]['kurt'] = 01226 # column_info[column]['unique'] = s_df[column_info[column]['col_name']].unique()1227 # for item in s_df[column_info[column]['col_name']].unique():1228 # temp1 = [x for i, x in enumerate(s_df[column_info[column]['col_name']]) if1229 # s_df[column_info[column]['col_name']].iat[0, i] == item]1230 # column_info[column][item] = len(temp1)1231 for key in column_info[column]:1232 if key != 'col_name':1233 column_feature[key].append(column_info[column][key])1234 # break1235 elif str(result_id) + '.npy' in file_list:1236 inp_data = np.load('../predf/' + str(notebook_id) + '/' + str(result_id) + '.npy').T.tolist()1237 for i,col in enumerate(inp_data):1238 s_s = pd.Series(col)1239 if str(s_s.dtypes) == 'int64' or str(s_s.dtypes) == 'float64' or str(s_s.dtypes) == 'int32' \1240 or str(s_s.dtypes) == 'float32' or str(s_s.dtypes) == 'int' or str(s_s.dtypes) == 'float':1241 num_df_list.append('unknown_'+str(i))1242 elif str(s_s.dtypes) == 'int64' == 'str' or str(s_s.dtypes) == 'Category':1243 cat_df_list.append('unknown_'+str(i))1244 column_info[i] = {}1245 column_info[i]['col_name'] = 'unknown_'+str(i)1246 column_info[i]['dtype'] = str(s_s.dtypes)1247 column_info[i]['content'] = s_s.values1248 column_info[i]['length'] = len(s_s.values)1249 column_info[i]['null_ratio'] = s_s.isnull().sum()/len(s_s.values)1250 column_info[i]['ctype'] = 1 if str(s_s.dtypes) == 'int64' or str(s_s.dtypes) == 'float64' or str(s_s.dtypes) == 'int32' \1251 or str(s_s.dtypes) == 'float32' or str(s_s.dtypes) == 'int' or str(s_s.dtypes) == 'float' else 21252 column_info[i]['nunique'] = s_s.nunique()1253 column_info[i]['nunique_ratio'] = s_s.nunique()/len(s_s.values)1254 if column_info[i]['ctype'] == 1: #å¦ææ¯æ°åå1255 column_info[i]['mean'] = s_s.describe()['mean']1256 column_info[i]['std'] = s_s.describe()['std']1257 column_info[i]['min'] = s_s.describe()['min']1258 column_info[i]['25%'] = s_s.describe()['25%']1259 column_info[i]['50%'] = s_s.describe()['50%']1260 column_info[i]['75%'] = s_s.describe()['75%']1261 column_info[i]['max'] = s_s.describe()['max']1262 column_info[i]['median'] = s_s.median()1263 column_info[i]['mode'] = s_s.mode().iloc[0]1264 column_info[i]['mode_ratio'] = s_s.astype('category').describe().iloc[3]/column_info[i]['length']1265 column_info[i]['sum'] = s_s.sum()1266 column_info[i]['skew'] = s_s.skew()1267 column_info[i]['kurt'] = s_s.kurt()1268 elif column_info[i]['ctype']==2: #categoryå1269 column_info[i]['mean'] = 01270 column_info[i]['std'] = 01271 column_info[i]['min'] = 01272 column_info[i]['25%'] = 01273 column_info[i]['50%'] = 01274 column_info[i]['75%'] = 01275 column_info[i]['max'] = 01276 column_info[i]['median'] = 01277 column_info[i]['mode'] = 01278 column_info[i]['mode_ratio'] = 01279 column_info[i]['sum'] = 01280 column_info[i]['skew'] = 01281 column_info[i]['kurt'] = 01282 # for item in s_s.unique():1283 # temp1 = [x for i, x in enumerate(s_s) if s_s.iat[0, i] == item]1284 # column_info[i][item] = len(temp1)1285 ####load origin dataset#####1286 # print(dtypes.index)1287 # origin_code = get_code_txt(notebook_root + '/' + notebook_id + '.ipynb')1288 # if action[0] == 'Add':1289 # data_object = get_data_object(result_id, action[1])1290 # target_content = {1291 # 'operation': action[2],1292 # 'ope_type': 1,1293 # 'parameters': [],1294 # 'data_object': 'train',1295 # }1296 # new_code = addOperator(notebook_id, action[1], target_content)1297 # elif action[0] == 'Update':1298 # data_object = get_data_object(result_id, action[1])1299 # target_content = {1300 # 'operation': action[2],1301 # 'ope_type': 1,1302 # 'parameters': [],1303 # 'data_object': 'train',1304 # }1305 # new_code = changeOperator(notebook_id, action[1], target_content)1306 # elif action[0] == 'Delete':1307 # new_code = deleteOperator(notebook_id, action[1])1308 #1309 # run_result = changed_running()1310 #1311def stat_colnum_and_uniques(ip,notebook_root='../spider/notebook',dataset_root_path='../spider/unzip_dataset'):1312 in_result = []1313 cursor, db = create_connection()1314 sql = 'select distinct notebook_id from result'1315 cursor.execute(sql)1316 sql_res = cursor.fetchall()1317 for row in sql_res:1318 in_result.append(int(row[0]))1319 sql = "select distinct pair.nid,pair.did from pair,dataset where pair.did=dataset.id and dataset.server_ip='" + ip +"' and dataset.isdownload=1"1320 print(sql)1321 cursor.execute(sql)1322 sql_res = cursor.fetchall()1323 count = 01324 col_num_sum = 01325 cat_sum = 01326 has_print = []1327 max_col_num = 01328 max_unique_num = 01329 sum_unique_ratio=01330 max_unique_ratio = 01331 max_length = 01332 sum_length = 01333 for row in sql_res:1334 # print(count)1335 if row[1] not in has_print:1336 # print('id:',row[1])1337 has_print.append(row[1])1338 else:1339 continue1340 # notebook_id=int(row[0])1341 file_list = os.listdir('../origindf/')1342 #1343 # if str(row[1])+'.npy' in file_list:1344 # # print("already in")1345 # continue1346 # if notebook_id not in in_result:1347 # continue1348 if str(row[1])+'.npy' in file_list:1349 print(count)1350 count += 11351 origin_column_info = np.load('../origindf/' + str(row[1])+'.npy',allow_pickle=True).item()1352 else:1353 continue1354 # if origin_column_info == 'no origin df':1355 # continue1356 # np.save('../origindf/' + str(row[1])+'.npy', origin_column_info)1357 # print(origin_column_info)1358 if len(origin_column_info) > max_col_num:1359 max_col_num = len(origin_column_info)1360 for col in origin_column_info:1361 if origin_column_info[col]['ctype'] == 2:1362 print('nunique:',origin_column_info[col]['nunique'])1363 cat_sum += origin_column_info[col]['nunique']1364 if origin_column_info[col]['nunique'] > max_unique_num:1365 max_unique_num = origin_column_info[col]['nunique']1366 col_num_sum += len(origin_column_info)1367 sum_unique_ratio += origin_column_info[col]['nunique_ratio']1368 if origin_column_info[col]['nunique_ratio'] > max_unique_ratio:1369 max_unique_ratio = origin_column_info[col]['nunique_ratio']1370 sum_length += origin_column_info[col]['length']1371 if origin_column_info[col]['length'] > max_length:1372 max_length = origin_column_info[col]['length']1373 # cat_num_sum += col_num_sum1374 # print('count:', count)1375 if count == 0:1376 return1377 else:1378 print('mean_col_num:',col_num_sum/count)1379 print('max_col_num:', max_col_num)1380 print('mean_uniques:', cat_sum/col_num_sum)1381 print('max_unique_num:', max_unique_num)1382 print('mean_uniques_ratio:', sum_unique_ratio / col_num_sum)1383 print('max_unique_ratio:', max_unique_ratio)1384 print('mean_length:', sum_length / col_num_sum)1385 print('max_length:', max_length)1386 print(has_print)1387def save_origin_df(ip,notebook_root='../spider/notebook',dataset_root_path='../spider/unzip_dataset'):1388 in_result = []1389 cursor, db = create_connection()1390 sql = 'select distinct notebook_id from result'1391 cursor.execute(sql)1392 sql_res = cursor.fetchall()1393 for row in sql_res:1394 in_result.append(int(row[0]))1395 sql = "select distinct pair.nid,pair.did from pair,dataset where pair.did=dataset.id and dataset.server_ip='" + ip + "' and dataset.isdownload=1"1396 cursor.execute(sql)1397 sql_res = cursor.fetchall()1398 has_checked = []1399 nod = 01400 non = 01401 nsd = 01402 for row in sql_res:1403 file_list = os.listdir('../origindf/')1404 if row[1] in has_checked:1405 continue1406 has_checked.append(row[1])1407 if str(row[1])+'.npy' in file_list:1408 # print("already in")1409 continue1410 print('dataset_id:', row[1])1411 notebook_id= row[0]1412 origin_column_info = get_origin_data(notebook_id,notebook_root,dataset_root_path)1413 if origin_column_info == 'no origin df':1414 print('no origin df')1415 nod += 11416 continue1417 if origin_column_info == 'no such notebook':1418 non += 11419 print('no such notebook')1420 continue1421 if origin_column_info == 'no such dataset':1422 nsd += 11423 print('no such dataset')1424 continue1425 np.save('../origindf/' + str(row[1])+'.npy', origin_column_info)1426 print('nod:',nod)1427 print('non:',non)1428def get_model_dic():1429 cursor, db = create_connection()1430 sql = 'select distinct model_type, count(distinct notebook_id) from result group by model_type'1431 cursor.execute(sql)1432 sql_res = cursor.fetchall()1433 model_dic = {}1434 id=11435 for row in sql_res:1436 if row[1]<10 or row[0]=='str' or row[0]=='unknown' or row[0]=='list' or row[0]=='Pipeline' or row[0]=='cross_val_predict':1437 continue1438 model_dic[row[0]] = id1439 id+=11440 pprint.pprint(model_dic)1441 np.save('./model_dic', model_dic)1442if __name__ == '__main__':1443 dataset_name = ''1444 cursor, db = create_connection()1445 sql = 'select dataSourceUrl from pair,dataset where pair.did=dataset.id and pair.nid=7835272'1446 cursor.execute(sql)1447 sql_res = cursor.fetchall()1448 for row in sql_res:1449 if row[0] == 'None' or row[0] == None:1450 continue1451 dataset_name = row[0].split('/')[-1]1452 break1453 print(dataset_name)1454 # get_model_dic()1455 # sampling('Add',103681,173671,notebook_root='../notebook')1456 #1457 # ip = '10.77.70.128'1458 # if ip != '39.99.150.216':1459 # notebook_root = '../notebook'1460 # dataset_root = '../unzip_dataset'1461 # save_origin_df(ip,notebook_root=notebook_root,dataset_root_path=dataset_root)1462 # else:1463 # save_origin_df(ip)1464 # notebook_id = 168691465 # target_content = {1466 # 'operation': 'boxcox1p',1467 # 'ope_type': 4,1468 # 'parameters': [],1469 # 'data_object': 'y_test[\'floor\']'1470 # }1471 # # new_code = changeOperator(notebook_id,4,target_content)1472 # new_code = deleteOperator(notebook_id, 12,notebook_root_path='../spider/notebook/')1473 # code_list = new_code.split('\n')1474 # for index,line in enumerate(code_list):1475 # print("\033[0;33;40m" + str(index)+':' + line + "\033[0m")...
server.py
Source:server.py
1# Flask libraries2import logging3from random import randint4from flask import Flask, render_template, session, request, Response5from flask_ask import Ask, statement, question6import jsonpickle7# Image libraries8import numpy as np9from PIL import Image10import cv211# Twitter12import twitter13# Other libraries14import datetime, time15import threading16import torch.multiprocessing as mp17import torch18import string19from random import *20import os21import atexit22# Global Variables23home_path = os.path.expanduser("~")24frame = None25image = None26username = None27greeting_nums = 2428bye_nums = 629# Twitter setup30consumer_key = 'iTl0HLBQxe8V4JksVXwu8Xwus'31consumer_secret = 'o7I8GEd8JesXN2m27bDpmNtT4ZewvNpJ9axGZCiNQPNHmTHFlG'32access_token_key = '974666982678294529-0Ho7jjlHkjVblXZeahFuBtueSZ2LO6n'33access_token_secret = 'IxvugPcrPmjoiPlA78h1zWToctLoR3dr0AXxsTCCU3Knd'34# Helper functions35def format_filename(s):36 valid_chars = "-_.() {}{}".format(string.ascii_letters, string.digits)37 filename = ''.join(c for c in s if c in valid_chars)38 filename = filename.replace(' ','_')39 return filename40def voice_mod(s):41 if isinstance(s, str) or isinstance(s, unicode):42 return "<speak><prosody pitch='+33.3%'>" + s + '</prosody></speak>'43app = Flask(__name__)44ask = Ask(app, "/")45logging.getLogger("flask_ask").setLevel(logging.DEBUG)46app.secret_key = 'ravioli ravioli give me the formuoli'47@app.route('/updateImage', methods=['POST'])48def image_update():49 r = request50 nparr = np.fromstring(r.data, np.uint8)51 global frame52 frame = cv2.imdecode(nparr, cv2.IMREAD_COLOR)53 # build a response dict to send back to client54 response = {'message': 'image received. size={}x{}'.format(frame.shape[1], frame.shape[0])}55 # encode response using jsonpickle56 response_pickled = jsonpickle.encode(response)57 return Response(response=response_pickled, status=200, mimetype="application/json")58@ask.launch59def welcome():60 msg = voice_mod(render_template('welcome'))61 reprompt = voice_mod(render_template('prompt'))62 return question(msg).reprompt(reprompt)63@ask.intent("SelfieIntent")64def selfie():65 msg = None66 global frame, image67 if frame is not None:68 image = frame69 msg = render_template('selfie_ok')70 else:71 msg = render_template('selfie_fail')72 msg = voice_mod(msg)73 return question(msg)74@ask.intent("UsernameIntent", mapping={'name': 'Name'})75def username(name):76 global username77 if isinstance(name, unicode):78 username = name79 msg = render_template('username', name=name)80 else:81 msg = render_template('username_fail')82 msg = voice_mod(msg)83 return question(msg)84@ask.intent("GreetingIntent")85def greeting():86 global username, greeting_nums87 name = username88 if name is None:89 name = ''90 msg = render_template('greeting_'+ str(randint(1, greeting_nums)), name=name)91 msg = voice_mod(msg)92 return question(msg)93@ask.intent("ExitIntent")94def bye():95 global username, bye_nums96 name = username97 if name is None:98 name = ''99 msg = render_template('bye_'+ str(randint(1, bye_nums)), name=name)100 msg = voice_mod(msg)101 reprompt = render_template('bye_reprompt')102 reprompt = voice_mod(reprompt)103 username = None104 return question(msg).reprompt(reprompt)105@ask.intent("ShowIntent", mapping={'name': 'Name', 'previous': 'Previous'})106def showImage(name, previous):107 global image108 msg = None109 print("Name: {}".format(name))110 print("Previous: {}".format(previous))111 # Show previous image112 if isinstance(previous, unicode):113 if (previous.lower() in ['last', 'previous', 'that']) and (image is not None):114 Image.fromarray(image).show()115 msg = render_template('show_image')116 else:117 msg = render_template('show_fail')118 # Find image in home folder119 elif isinstance(name, unicode):120 filt_name = format_filename(str(name).lower())121 imgPath = home_path + '/' + filt_name + ".png"122 if os.path.isfile(imgPath):123 Image.open(imgPath).show()124 msg = render_template('show_image')125 else:126 msg = render_template('find_fail')127 # Couldn't match anything128 else:129 msg = render_template('find_fail')130 msg = voice_mod(msg)131 return question(msg)132@ask.intent("NameIntent", mapping={'name': 'Name'})133def nameImage(name):134 global image135 msg = None136 # If fibi has already taken a selfie137 if image is not None:138 print(name)139 print(type(name))140 print('Received name: {}'.format(name))141 # If name is provided142 if isinstance(name, unicode):143 name = str(name).lower()144 filt_name = format_filename(name)145 print('Filtered name: {}'.format(filt_name))146 # If image with that filename already exists147 if os.path.isfile(home_path + '/' + filt_name + ".png"):148 msg = render_template('name_fail', name=name)149 # Else, try saving under that name150 else:151 try:152 Image.fromarray(image).save(home_path + '/' + filt_name + ".png")153 msg = render_template('name_image', name=name)154 except:155 msg = render_template('name_fail', name=name)156 # Else, try another name157 else:158 msg = render_template('name_no')159 # Else, prompt user to take image160 else:161 msg = render_template('name_none')162 msg = voice_mod(msg)163 return question(msg)164#165# @ask.intent("TwitterIntent", mapping={'name': 'Name', 'previous': 'Previous'})166@ask.intent("TwitterIntent", mapping={'name': 'Name', 'previous': 'Previous'})167def tweetImage(name, previous):168 global image, consumer_key, consumer_secret, access_token_key, access_token_secret169 msg = None170 status = 'Posted by Fibi!'171 twitterApi = twitter.Api(consumer_key=consumer_key,172 consumer_secret=consumer_secret,173 access_token_key=access_token_key,174 access_token_secret=access_token_secret)175 print('Received name: {}'.format(name))176 print(type(name))177 print('Received previous: {}'.format(previous))178 print(type(previous))179 # Tweet last image180 if isinstance(previous, str) and (previous.lower() in ['last', 'previous', 'that']):181 if image is not None:182 try:183 # Save last image in a temporary file184 print('Attempting to tweet...')185 imgPath = home_path + '/latestImage.png'186 print('Tweet successful')187 Image.fromarray(image).save(imgPath)188 # Open and tweet last image189 twitterApi.PostUpdate(status, media=imgPath)190 msg = render_template('tweet_ok')191 except:192 msg = render_template('tweet_fail')193 else:194 msg = render_template('find_fail')195 # Tweet specified image in home folder196 # elif isinstance(name, unicode):197 elif isinstance(name, unicode):198 name = str(name).lower()199 filt_name = format_filename(name)200 print('Filtered name: {}'.format(filt_name))201 imgPath = home_path + '/' + filt_name + ".png"202 if os.path.isfile(imgPath):203 try:204 # Open and tweet image from path205 print('Attempting to tweet...')206 f = open(imgPath, 'rb')207 twitterApi.PostUpdate(status, media=f)208 print('Tweet successful')209 msg = render_template('tweet_ok')210 except Exception as e:211 print(e)212 msg = render_template('tweet_fail')213 else:214 msg = render_template('find_fail')215 # Failed to find image with that name216 else:217 msg = render_template('find_fail')218 msg = voice_mod(msg)219 return question(msg)220@ask.intent("AMAZON.YesIntent")221def yes():222 msg = render_template('yes')223 msg = voice_mod(msg)224 return question(msg)225@ask.intent("AMAZON.NoIntent")226def no():227 msg = render_template('no')228 msg = voice_mod(msg)229 return question(msg)230@ask.intent("AMAZON.StopIntent")231def stop():232 msg = render_template('stop')233 msg = voice_mod(msg)234 return statement(msg)235@ask.intent("AMAZON.CancelIntent")236def cancel():237 msg = render_template('stop')238 msg = voice_mod(msg)239 return statement(msg)240@ask.intent("AMAZON.HelpIntent")241def help():242 msg = render_template('help')243 msg = voice_mod(msg)244 return question(msg)245@ask.intent("AboutIntent")246def about():247 msg = render_template('about')248 reprompt = render_template('about_reprompt')249 msg = voice_mod(msg)250 reprompt = voice_mod(reprompt)251 return question(msg)252 # return question(msg).reprompt(reprompt)...
prefixtree.py
Source:prefixtree.py
1FIND_SUC = 12FIND_FAIL = 03FIND_PART = 24class Node:5 def __init__(self, value=None, key=None):6 self.value = value7 self.key = key8 self.childs = {}9 def Append(self, key, value, depth=0):10 self.childs[key[depth]] = node = Node()#BAD BAD BAD!!!1111111 if depth == len(key) - 1:12 node.key = key13 node.value = value14 return True15 node.Append(key, value, depth + 1)16 17 def __str__(self, additional=''):18 res = '%s:%s' % (str(self.key), str(self.value))19 for k, v in self.childs.iteritems():20 res += '\n%s%s->%s' % (additional, k, v.__str__(additional + ' '))21 return res22 23 def Add(self, key, value):24 25 stack, res = self.Find(key)26 if res == FIND_SUC:27 stack[-1].value = value28 return29 30 depth = len(stack) - 131 if depth == len(key):32 stack[-1].value = value33 stack[-1].key = key34 return35 stack[-1].Append(key, value, len(stack) - 1)36 def Find(self, key, stack=None): 37 if not stack: stack = []38 stack.append(self)39 depth = len(stack) - 140 if self.key == key:41 return stack, FIND_SUC42 if (depth >= len(key)):43 return stack, FIND_FAIL44 45 if key[depth] in self.childs:46 return self.childs[key[depth]].Find(key, stack)47 if depth == 0:48 return stack, FIND_FAIL49 50 return stack, FIND_PART51'''52 def Add(self, key, value, depth=0):53 if depth >= len(key): return False54 stack, res = self.Find(key, depth)55 if res == FIND_SUC: return False56 node = stack[-1]57 node.Append(key, value, len(stack) - 1)58 def Find(self, key, depth=0):59 if self.key == key:60 return self, depth, FIND_SUC 61 if (depth >= len(key)):62 return self, depth, FIND_FAIL63 64 if key[depth] in self.childs:65 return self.childs[key[depth]].Find(key, depth + 1)66 else:67 return self, depth, FIND_PART68'''69class Tree:70 def __init__(self):71 self.root = Node()72 def __str__(self):73 return str(self.root)74 def Find(self, key):75 #s res = 76 return self.root.Find(key)77 def Add(self, key, value):78 self.root.Add(key, value)79'''80tree = Tree()81tree.Add('124444', 2)82tree.Add('126', 2)83tree.Add('g2', 2)84tree.Add('g', 2)85print tree...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!