Best Python code snippet using autotest_python
kgcRequest.py
Source:kgcRequest.py
1import json2import random3import re4import time5import execjs6import RequestUtil7import StrUtil8import UrlUtil9def get_page_sub_href(page):10 # è·åæ交è¯å·çå°å 并è¿å11 return re.findall('<aid="putIn"href="javascript:void\(0\);"data="(.*?)"title=""',12 StrUtil.formatting(page.text))[0]13class kgcRequest:14 session = RequestUtil.session({15 "http://tiku.kgc.cn/testing/error": "课工åºé¡µé¢å¼å¸¸"16 })17 # kgcç½ç«ä¸çè·¯å¾18 urls = UrlUtil.regionUrl("http", "tiku.kgc.cn", "80", {19 "my_exam_js": "resources/V12.0.0.5/js/myexam.js"20 })21 # kgcç½ç«ä¸ç请æ±ç»æéå22 results = {23 }24 kgcData = {25 "kgc_index_path": "",26 "entry_name": "",27 "entry_href": "",28 "get_paper_href": "",29 "100%go_on": True,30 "auto_choice_get_paper_href": True,31 "do_time": "",32 "last_data": ["", ""]33 }34 def __init__(self, kgcData: dict):35 for dataKey in self.kgcData.keys():36 try:37 self.kgcData[dataKey] = kgcData[dataKey]38 except KeyError:39 kgcData[dataKey] = ""40 kgc_index = self.session.get(self.kgcData["kgc_index_path"])41 if kgc_index.status_code == 200:42 if '对åºçpassportä¸åå¨' not in kgc_index.text:43 self.urls.put("index", self.kgcData["kgc_index_path"])44 self.results[self.urls.get("index")] = kgc_index45 # å è½½js46 self.load_js()47 # å è½½æ¥è¯¢å·é¢æ°çurl48 self.load_today_question_count_url(kgc_index)49 # å è½½æçåå²å°å50 self.load_history_href(kgc_index)51 else:52 raise RequestUtil.RequestUtilError53 def reload(self, kgc_index_path: str):54 self.kgcData["kgc_index_path"] = kgc_index_path55 self.__init__(self.kgcData)56 # å è½½å
¥å£ è¿åå 载好çå
¥å£å57 def load_entry(self, choiceEntry=None, kgc_index_result=None):58 if choiceEntry is None:59 choiceEntry = self.kgcData["entry_name"]60 if kgc_index_result is None:61 kgc_index_result = self.results[self.urls.get("index")]62 class entry:63 name = ""64 href = ""65 def __init__(self, entry_block):66 self.name = re.findall('<spanclass="test_list_name">(.*?)<imgsrc=', entry_block)[0]67 self.href = re.findall('<spanclass="test_list_go"><a.+href="(.*?)">è¿å
¥</a>', entry_block)[0]68 print("æ£å¨å è½½å·é¢å
¥å£ããã")69 # è·åææåæ°å
¥å£å70 entry_blocks = re.findall('<divclass="test_listitem\\d">(.*?)</div>', StrUtil.formatting(kgc_index_result.text))71 # å
¥å£å¯¹è±¡72 entryObjects = []73 for entryBlock in entry_blocks:74 entryObjects.append(entry(entryBlock))75 # å¦æå°åä¸å¨aé¾æ¥76 last_entryObject = entryObjects[len(entryObjects) - 1]77 if last_entryObject.href == "javascript:void(0);":78 last_entryObject.href = re.findall('<spanclass="test_list_go"><a.+data="(.*?)"', entryBlock)[0]79 # éåå
¥å£åçæ没æåä¼ å
¥å
¥å£åä¸æ ·ç80 for entryObject in entryObjects:81 if entryObject.name == choiceEntry:82 self.kgcData["entry_name"] = entryObject.name83 self.kgcData["entry_href"] = entryObject.href84 return entryObject.name85 # 循ç¯å®äºè¿æ¯æ²¡æ86 print("请éæ©å
¥å£ï¼")87 print("åºå·\tå
¥å£å")88 for index in range(len(entryObjects)):89 print(str(index + 1) + "\t" + entryObjects[index].name)90 while True:91 try:92 entryObject = entryObjects[int(input("请éæ©ï¼")) - 1]93 self.kgcData["entry_name"] = entryObject.name94 self.kgcData["entry_href"] = entryObject.href95 return entryObject.name96 except ValueError:97 print("请è¾å
¥æ°åï¼")98 except IndexError:99 print("请è¾å
¥1-" + str(len(entryObjects)) + "èå´çæ°åï¼")100 # æ ¹æ®å
¥å£å è½½è·åè¯å·çå°å101 def load_get_paper_href(self):102 # è·åè¯å·ä¹åå
å¤ææ¯å¦å è½½å
¥å£è¿103 if self.kgcData["entry_name"] == "" and self.kgcData["entry_href"] == "":104 print("é误ï¼ï¼æªå è½½å
¥å£")105 self.load_entry()106 # 请æ±å
¥å£107 entry_result = self.session.get(self.kgcData["entry_href"])108 if entry_result.url == '':109 raise IndexError("list index out of range")110 else:111 # å¤æå
¥å£æ¯å¦ç´æ¥è¿å
¥è¯å·(ç´æ¥è¿å
¥è¯å·ä»£è¡¨æ¯æ¨¡æçé¢å)112 if len(re.findall('<p class="f14">èè¯å©ä½æ¶é´</p>', entry_result.text)) == 0:113 # å
¥å£åç±»114 class entryBlock:115 enter_fun_name = ""116 enter_fun_arguments = []117 data = []118 a_id = 0119 title = ""120 def __init__(self, enter_fun_name, enter_fun_arguments, title=""):121 self.enter_fun_name = enter_fun_name122 self.enter_fun_arguments = enter_fun_arguments123 self.title = title124 # 转ååç请æ±å
¥å£125 format_entry_result = StrUtil.formatting(entry_result.text)126 # è·åææä¸çº§å
¥å£ è¿å
¥æµè¯ æ§è¡çjsæ¹æ³127 do_js_funs = re.findall('javascript:(.*?);?"', format_entry_result)128 # è·åæ§è¡çæ¹æ³å129 do_js_fun_name = re.findall('(.*?)\(', do_js_funs[0])[0]130 # å¦æ为ä¸é¡¹æè½å è¿å
¥æµè¯ æ§è¡çjsæ¹æ³ä¼å¤å¹é
å°ä¸ä¸ªæ·»å äºçº§å
¥å£æ¹æ³131 if do_js_fun_name == "percentAlert":132 do_js_funs = do_js_funs[:-1]133 # è·åfunctionå134 functions = re.findall("function\s+.+\(.+\)\{[\s\S]*?\}", entry_result.text)135 # å®ä¹scriptå136 script = ""137 # 使ç¨çæ¹æ³138 use_fun = [do_js_fun_name, "percentOutlineAlert", "percentChapterAlert"]139 # æé¤æªä½¿ç¨æ¹æ³140 index = 0141 length = len(functions)142 while index < length:143 if re.findall('function\s*(.*?)\(', functions[index])[0] not in use_fun:144 del functions[index]145 index -= 1146 length -= 1147 index += 1148 for function in functions:149 attr_data = re.findall('\$\("#(.*?)"\).attr\("(.*?)"\)', function)150 if len(attr_data) > 0:151 attr_data = attr_data[0]152 func = function153 if len(attr_data) > 0:154 func = function.replace("){", "," + attr_data[1] + "){"). \155 replace('$("#' + attr_data[0] + '").attr("' + attr_data[1] + '")', "data")156 func = func.replace("window.location.href=", "return ")157 func = func.replace("var data =", "return ")158 script += "\n" + func159 function_names = re.findall('function\s+(.*?)\(', script)160 script = execjs.compile(script)161 # è·åæ¹æ³å¯è½æ§çæ¼æ¥å符串162 function_names_joint = ""163 for index in range(len(function_names)):164 if index != 0:165 function_names_joint += "|"166 function_names_joint += function_names[index]167 # æ¥æ¾æææµè¯æ¹æ³å
¥å£å¯¹è±¡ (å½åå
å«ä¸çº§å
¥å£)168 entryBlocks = []169 for do_js_fun in do_js_funs:170 name = re.findall('(.*?)\(', do_js_fun)[0]171 try:172 arguments = re.findall('[' + function_names_joint + ']\((.*?)\)', do_js_fun)[0].split(",")173 except re.error:174 print()175 entryBlocks.append(entryBlock(name, arguments))176 if do_js_fun_name == "unitExam":177 # å è½½data178 data_s = re.findall('data="(.*?)"title="è¿å
¥æµè¯"', format_entry_result)179 for index in range(len(data_s)):180 entryBlocks[index].enter_fun_arguments.append(data_s[index])181 elif do_js_fun_name == "percentAlert":182 # è·åææa_id data title percent183 a_data_title_percent_s = re.findall(184 ';position:relative"><aid="(.*?)"href="#"title=""class="no-sj"data="(.*?)"'185 'style="padding-left:30px">(.*?)</a><p><spanstyle="width:(.*?)"></span>', format_entry_result)186 # å°ææa_id å dataå å
¥å£ç±»å187 for index in range(len(a_data_title_percent_s)):188 entryBlocks[index].a_id = a_data_title_percent_s[index][0]189 entryBlocks[index].data = a_data_title_percent_s[index][1].split(",")190 entryBlocks[index].title = a_data_title_percent_s[index][2] + "\t" + \191 a_data_title_percent_s[index][3]192 # äºçº§å
¥å£ç¹å»äºä»¶çjs代ç å193 skillList_click_js_block = ""194 # äºçº§å
¥å£çæ¹æ³å195 skillList_fun_name = ""196 # å è½½è·åäºçº§å
¥å£å°åçjsæ¹æ³197 def load_skill_list_click_js_block():198 nonlocal skillList_click_js_block199 nonlocal skillList_fun_name200 print("æ£å¨å è½½ä¸é¡¹æè½åçäºçº§å
¥å£ãããã")201 # è·åå°äºçº§å
¥å£ç¹å»äºä»¶çjs代ç å202 skillList_click_js_block = re.findall('if\s?\(pos\s?!=\s?-1\)\s?\{'203 '[\s\S]*'204 '\}[\s\t\n\r]*'205 'obj\.toggleClass\("yes-sj"\);[\s\t\n\r]*'206 '\}[\s\t\n\r]*'207 '\}[\s\t\n\r]*'208 '\}\);[\s\t\n\r]*'209 '[\s\t\n\r]*\}[\s\t\n\r]?else[\s\t\n\r]?(\{'210 '[\s\S]*'211 '\})[\s\t\n\r]*'212 '\}[\s\t\n\r]*'213 '\$\(this\)\.toggleClass\("yes-sj"\);',214 entry_result.text)[0]215 # äºæ¬¡å¤çäºçº§å
¥å£ç¹å»äºä»¶çjs代ç 216 # å°ä»£ç å转å为æ¹æ³217 skillList_click_js_block = "\nfunction click(a_id,data){\r" + skillList_click_js_block[218 1:] + "\r"219 # å°js代ç æ解为ä¸è¡ä¸è¡220 skillList_click_js_block_lines = re.findall('\n(.*?)\r', skillList_click_js_block)221 # å¨éåä¸å é¤ä¸å¯¹æå®å符222 def remove_pair(char, list_, start_index):223 char_dic = {224 "{": "}",225 "[": "]",226 "<": ">",227 "(": ")",228 }229 # å é¤ä¹åå
å¤æå½åè¡æ¯å¦å¸¦ææå®å符230 if char in list_[start_index]:231 index = start_index + 1232 deep = 0233 while index < len(list_):234 if char in list_[index] and char_dic[char] not in list_[index]:235 deep += 1236 elif char_dic[char] in list_[index] and deep == 0 and True if list_[index].find(237 char) == -1 \238 else list_[index].find(char_dic[char]) < list_[index].find(char):239 result_index = [index]240 # å¦æè¦ç§»é¤çè¡å
è¿ææå®å符241 if char in list_[index]:242 for result_index_ in remove_pair(char, list_, index):243 result_index.append(result_index_)244 # è¿åéè¦å é¤çä¸æ 245 return result_index246 elif char_dic[char] in list_[index] and char not in list_[index]:247 deep -= 1248 index += 1249 # æé¤å¸¦æjqueryçé²æ¥é250 skillList_click_js_block_lines_index = 0251 skillList_click_js_block_lines_length = len(skillList_click_js_block_lines)252 while skillList_click_js_block_lines_index < skillList_click_js_block_lines_length:253 # å¦æå½åè¡å¸¦æ$.ajax(254 if "$.ajax(" in skillList_click_js_block_lines[skillList_click_js_block_lines_index]:255 del_index = remove_pair(256 "(", skillList_click_js_block_lines, skillList_click_js_block_lines_index)257 for i in del_index:258 skillList_click_js_block_lines[i] = skillList_click_js_block_lines[i].replace(259 ")", "")260 # å°$.ajax æ¿æ¢ä¸º return261 skillList_click_js_block_lines[skillList_click_js_block_lines_index] = \262 skillList_click_js_block_lines[skillList_click_js_block_lines_index].replace(263 "$.ajax(", "return")264 # å¦æå½åè¡å¸¦æ$265 elif "$" in skillList_click_js_block_lines[skillList_click_js_block_lines_index]:266 if "{" in skillList_click_js_block_lines[skillList_click_js_block_lines_index]:267 del_index = remove_pair("{", skillList_click_js_block_lines,268 skillList_click_js_block_lines_index)269 for index in range(len(del_index)):270 if index < len(del_index) - 1:271 # å é¤å½åè¡272 del skillList_click_js_block_lines[del_index[index]]273 skillList_click_js_block_lines_length -= 1274 skillList_click_js_block_lines_index -= 1275 skillList_click_js_block_lines[del_index[index]] = \276 skillList_click_js_block_lines[del_index[index]].replace("}", "")277 # å é¤å½åè¡278 del skillList_click_js_block_lines[skillList_click_js_block_lines_index]279 skillList_click_js_block_lines_length -= 1280 skillList_click_js_block_lines_index -= 1281 # å¦æå½åè¡å¸¦æ//注é282 elif StrUtil.formatting(283 skillList_click_js_block_lines[284 skillList_click_js_block_lines_index]).startswith("//"):285 # å é¤å½åè¡286 del skillList_click_js_block_lines[skillList_click_js_block_lines_index]287 skillList_click_js_block_lines_length -= 1288 skillList_click_js_block_lines_index -= 1289 skillList_click_js_block_lines_index += 1290 # æ¥æ¾åºç³æçåé291 skillList_click_js_block_variables = ["a_id", "data"]292 for skillList_click_js_block_line in skillList_click_js_block_lines:293 re_result = re.findall('var\s*(\w+)\s*=\s*',294 StrUtil.formatting(skillList_click_js_block_line))295 if len(re_result) != 0:296 skillList_click_js_block_variables.append(re_result[0])297 # 移é¤ä½¿ç¨æªç³æåéç代ç è¡298 skillList_click_js_block_lines_index = 0299 skillList_click_js_block_lines_length = len(skillList_click_js_block_lines)300 while skillList_click_js_block_lines_index < skillList_click_js_block_lines_length:301 re_results = re.findall('(\w+)\s*[=].*;',302 StrUtil.formatting(skillList_click_js_block_lines[303 skillList_click_js_block_lines_index]))304 if len(re_results) > 0:305 for re_result in re_results:306 if re_result not in skillList_click_js_block_variables and not re_result.startswith(307 "var"):308 # 移é¤å½åè¡309 del skillList_click_js_block_lines[skillList_click_js_block_lines_index]310 skillList_click_js_block_lines_index -= 1311 skillList_click_js_block_lines_length -= 1312 break313 re_results = re.findall('(\w+)\s*[.\[].*;',314 StrUtil.formatting(skillList_click_js_block_lines[315 skillList_click_js_block_lines_index]))316 if len(re_results) > 0:317 for re_result in re_results:318 if re_result not in skillList_click_js_block_variables:319 # 移é¤å½åè¡320 del skillList_click_js_block_lines[skillList_click_js_block_lines_index]321 skillList_click_js_block_lines_index -= 1322 skillList_click_js_block_lines_length -= 1323 re_results = re.findall('[+]+(\w+)\s*.*;',324 StrUtil.formatting(skillList_click_js_block_lines[325 skillList_click_js_block_lines_index]))326 skillList_click_js_block_lines_index += 1327 # ä¸è¡ä¸è¡çjs代ç å并328 skillList_click_js_block = ""329 for skillList_click_js_block_line in skillList_click_js_block_lines:330 skillList_click_js_block += skillList_click_js_block_line331 skillList_click_js_block = execjs.compile(skillList_click_js_block)332 # è·åäºçº§å
¥å£çæ¹æ³å333 skillList_fun_name = re.findall("html\+='<aid=\"unitexam\"href=\"javascript:(.*?)\(",334 format_entry_result)[0]335 # æ ¹æ®å
¥å£å¯¹è±¡è·åäºçº§å
¥å£å¯¹åºçå
¥å£å¯¹è±¡éå336 def get_skill_list_entry_block_s(entry_block):337 # è·å请æ±äºçº§å
¥å£çè·¯å¾338 url = skillList_click_js_block.call("click", entry_block.a_id,339 entry_block.data)["url"]340 # 请æ±äºçº§å
¥å£341 skillListResult = self.session.get(url)342 skillList = json.loads(skillListResult.text)343 skill_list_entry_block_s = []344 for skill in skillList:345 # æ ¹æ®äºçº§å
¥å£å建å
¥å£å¯¹è±¡346 skill_list_entry_block_s.append(entryBlock(skillList_fun_name,347 [skill['percent'], entry_block.data[0], skill["id"]],348 "\t" + skill["name"] + "\t" + str(349 skill["percent"]) + "%"))350 return skill_list_entry_block_s351 # å¤ææ¯å¦èªå¨éæ©å
¥å£å°å352 if self.kgcData["auto_choice_get_paper_href"]:353 choose_entry_block = ""354 # å¤æå¦æ为课ç¨å¤ä¹ å æ 为ä¸é¡¹æè½åä¸å°100%继ç»å·355 if do_js_fun_name == "unitExam" or (356 do_js_fun_name == "percentAlert"357 and self.kgcData["100%go_on"]):358 choose_entry_block = entryBlocks[random.randint(0, len(entryBlocks) - 1)]359 elif do_js_fun_name == "percentAlert" and not self.kgcData["100%go_on"]:360 load_skill_list_click_js_block()361 # æ¾å°ç¬¬ä¸ä¸ªä¸ä¸º100%ç362 for entryBlock_ in entryBlocks:363 if int(re.findall("\t(.*?)%", entryBlock_.title)[0]) != 100:364 # å¤æå½åå
¥å£çäºçº§å
¥å£æ¯å¦æä¸ä¸º100%(å 为kgcæbug)365 skill_list_entry_block_s = get_skill_list_entry_block_s(entryBlock_)366 # æ¯å¦å
¨ä¸º100%367 is_all_100 = True368 for skill_list_entry_block in skill_list_entry_block_s:369 if int(re.findall("\t.*\t(.*?)%", skill_list_entry_block.title)[0]) != 100:370 is_all_100 = False371 choose_entry_block = skill_list_entry_block372 break373 if not is_all_100:374 choose_entry_block = entryBlock_375 break376 # å¦æå
¨ä¸º100%377 else:378 choose_entry_block = entryBlocks[0]379 try:380 get_paper_href = script.call(choose_entry_block.enter_fun_name,381 *choose_entry_block.enter_fun_arguments)382 except AttributeError:383 print()384 self.kgcData["get_paper_href"] = get_paper_href385 return self.kgcData["get_paper_href"]386 # å¦æä¸æ¯èªå¨éæ©å
¥å£å°å387 else:388 # å¤æå·é¢å
¥å£å°åæ¯å¦æå¹é
ç389 if self.kgcData["get_paper_href"] != "":390 for entryBlock_ in entryBlocks:391 get_paper_href = script.call(entryBlock_.enter_fun_name, *entryBlock_.enter_fun_arguments)392 if self.kgcData["get_paper_href"] == get_paper_href:393 self.kgcData["get_paper_href"] = get_paper_href394 return self.kgcData["get_paper_href"]395 # 没æå¹é
ç396 # å¤æå¦æ为ä¸é¡¹æè½å397 if do_js_fun_name == "percentAlert":398 load_skill_list_click_js_block()399 # 循ç¯è¯·æ±äºçº§å
¥å£400 index = 0401 length = len(entryBlocks)402 while index < length:403 skill_list_entry_block_s = get_skill_list_entry_block_s(entryBlocks[index])404 for skill_list_entry_block in skill_list_entry_block_s:405 # å¤æå·é¢å
¥å£å°åæ¯å¦æå¹é
ç406 if self.kgcData["get_paper_href"] != "":407 get_paper_href = script.call(skill_list_entry_block.enter_fun_name,408 *skill_list_entry_block.enter_fun_arguments)409 if self.kgcData["get_paper_href"] == get_paper_href:410 self.kgcData["get_paper_href"] = get_paper_href411 return self.kgcData["get_paper_href"]412 # å°äºçº§å
¥å£å 人å
¥å£éå413 entryBlocks.insert(index + 1, skill_list_entry_block)414 length += len(skill_list_entry_block_s)415 index += 1 + len(skill_list_entry_block_s)416 print("å·é¢å·é¢å
¥å£å°å失æ")417 # å¤æå¦æ为课ç¨å¤ä¹ å418 elif do_js_fun_name == "unitExam":419 print("å·é¢å
¥å£å°å失æ")420 # å è½½title421 titles = re.findall('<li><span>(.*?)</span><aid="unitexam"', format_entry_result)422 for index in range(len(titles)):423 entryBlocks[index].title = titles[index]424 else:425 raise Exception("åºç°æªç¥è¯¾ç¨!请èç³»ä½è
æ·»å !")426 print("请éæ©è¯¾ç¨ï¼")427 print("åºå·\t课ç¨å")428 for index in range(len(entryBlocks)):429 print(str(index + 1) + "\t" + entryBlocks[index].title)430 while True:431 try:432 case = int(input("请éæ©ï¼")) - 1433 if 0 <= case < len(entryBlocks):434 self.kgcData["get_paper_href"] = script.call(entryBlocks[case].enter_fun_name,435 *entryBlocks[case].enter_fun_arguments)436 else:437 raise IndexError438 return self.kgcData["get_paper_href"]439 except ValueError:440 print("请è¾å
¥æ°åï¼")441 except IndexError:442 print("请è¾å
¥1-" + str(len(entryBlocks)) + "èå´çæ°åï¼")443 else:444 self.kgcData["get_paper_href"] = self.kgcData["entry_href"]445 return self.kgcData["get_paper_href"]446 # æ ¹æ®è·åè¯å·çå°åå è½½è¯å· è¿åè¯å·447 def load_test_paper(self, get_paper_href=None):448 if get_paper_href is None:449 get_paper_href = self.kgcData["get_paper_href"]450 # æµè¯è·åè¯å·çå°åæ¯å¦å¯ç¨451 test_paper = self.session.get(get_paper_href)452 if test_paper.status_code != 200:453 raise Exception("è·åè¯å·çå°å失ææç½ç»é误ï¼")454 self.results["test_page"] = test_paper455 # è¿åè¯å·456 return test_paper457 # å°æ交å°åå å
¥urls458 # self.urls.put("current_sub_href", sub_href)459 # return self.kgcData["current_page_code"]460 # å è½½è¯å·è¯é¢åæ461 def load_solutions_paper(self, sub_test_paper_result):462 if sub_test_paper_result["result"]:463 # è·åå°è¯å·æ¥å页é¢464 report_page_result = self.session.get(self.urls.get("go_back_url"))465 # è·åå°è¯å·åæå°å466 solutions_href = re.findall('>æ¥çæ¥å</a><ahref="(.*?)"title=""[\s\S]*>æ¥ç解æ</a>',467 StrUtil.formatting(report_page_result.text))[0]468 return self.session.get(solutions_href)469 else:470 raise Exception("æ交è¯å·,æªç¥é误ï¼")471 # å è½½çæ¡472 def load_answer(self, solutions_paper=None):473 if solutions_paper is None:474 solutions_paper = self.load_solutions_paper(self.sub_test_paper())475 # è·åææçæ¡å476 subjects = re.findall(r'<ulclass="sec2grays">(.*?)</ul><divclass="sec3reportfont-yaheif14strong">',477 StrUtil.formatting(solutions_paper.text))478 # è·åææé¢ç®id479 # çæ¡åè·åæ¯éé¢çæ£ç¡®çæ¡å¹¶å¡«å
¥é¢ç®ä¿¡æ¯æ°ç»480 correctAnswers = []481 for i, su in enumerate(subjects):482 questionAnswers = re.findall(483 r'<lic?l?a?s?s?=?"?g?r?e?e?n?"?><pre><span>([A-Z]):</span><imagestyle="vertical-align:middle"src="('484 r'.*?)"/></pre>',485 su)486 for index in range(len(questionAnswers)):487 questionAnswers[index] = list(questionAnswers[index])488 answer_li = re.findall(r'<liclass="green"><pre><span>[A-Z]:</span>', su)489 an = []490 # å¤éé¢çå¯è½æ§491 if len(answer_li) > 1:492 for answer in answer_li:493 an.append(re.findall(r"<span>(.*):</span>", answer)[0])494 correctAnswers.append(an)495 # åéé¢çå¯è½æ§496 else:497 an.append(re.findall(r"<span>(.*):</span>", answer_li[0])[0])498 correctAnswers.append(an)499 # å°çæ¡è½¬æ01234500 answers_num_str = []501 for i in range(len(correctAnswers)):502 if len(correctAnswers[i]) == 1:503 answers_num_str.append(str(ord(correctAnswers[i][0].upper()) - 65))504 else:505 j = ""506 for anwer in correctAnswers[i]:507 j += str(ord(anwer.upper()) - 65) + ","508 answers_num_str.append(j[:-1])509 return answers_num_str510 # é®é¢æ°æ®ç±»511 class question_data:512 paper_code = ""513 question_id = ""514 sub_question_id = ""515 question_index = ""516 question_type = ""517 def __init__(self, paper_code, question_id, sub_question_id,518 question_index, question_type):519 self.paper_code = paper_code520 self.question_id = question_id521 self.sub_question_id = sub_question_id522 self.question_index = question_index523 self.question_type = question_type524 # å è½½é®é¢æ°æ®éå525 def load_question_data(self, test_paper=None):526 if test_paper is None:527 test_paper = self.load_test_paper()528 # è·åææé¢æ°æ®å529 data_s = list(set(re.findall(r'<dddata="(.*?)">', StrUtil.formatting(test_paper.text))))530 data = []531 # 循ç¯å°é¢åºæ°æ®å转å为é®é¢æ°æ®å¯¹è±¡532 for index in range(len(data_s)):533 data = data_s[index].split(",")534 data_s[index] = self.question_data(data[1], data[2], data[3], data[4], data[5])535 # å°data_så¡«å
¥last_data536 self.kgcData["last_data"][0] = data[0]537 self.kgcData["last_data"][1] = data[1]538 # æåº539 for index_ in range(len(data_s) - 1):540 for index in range(len(data_s) - index_ - 1):541 if int(data_s[index].question_index) > int(data_s[index + 1].question_index):542 temp_data = data_s[index]543 data_s[index] = data_s[index + 1]544 data_s[index + 1] = temp_data545 return data_s546 # å è½½æ¥è¯¢å·é¢æ°çurl547 def load_today_question_count_url(self, kgc_index_result=None):548 if kgc_index_result is None:549 kgc_index_result = self.results[self.urls.get("index")]550 try:551 today_question_count_url = \552 re.findall('\.val\(\'\'\);\$\.ajax\(\{url:"(.*?)",', StrUtil.formatting(kgc_index_result.text))[0]553 except IndexError:554 print()555 if self.session.get(today_question_count_url).status_code == 200:556 self.urls.put("today_question_count_url", today_question_count_url)557 else:558 raise Exception("ç½ç»å¼å¸¸")559 # å è½½æçåå²å°å560 def load_history_href(self, kgc_index_result=None):561 if kgc_index_result is None:562 kgc_index_result = self.results[self.urls.get("index")]563 history_href = \564 re.findall('<liseq="2"><ahref="(.*?)"', StrUtil.formatting(kgc_index_result.text))[0]565 if self.session.get(history_href).status_code == 200:566 self.urls.put("history_href", history_href)567 else:568 raise Exception("ç½ç»å¼å¸¸")569 # å è½½æçåå²é¡µé¢570 def load_history_page(self):571 history_page_result = self.session.get(self.urls.get("history_href"))572 if history_page_result.status_code != 200:573 self.load_history_href()574 history_page_result = self.session.get(self.urls.get("history_href"))575 return history_page_result576 # è·å对åºè¯å·ååä¸é页é¢577 def get_again_answer_page(self, test_paper=None):578 if test_paper is None:579 test_paper = self.load_test_paper()580 # å è½½æçåå²581 history_page_result = self.load_history_page()582 # è·åè¿å
¥ååä¸é页é¢çæ¹æ³å583 again_answer_fun_name = re.findall('æ¥çæ¥å</a><ahref="javascript:(.*?)\(',584 StrUtil.formatting(history_page_result.text))[0]585 # è·åè¿å
¥ååä¸é页é¢çæ¹æ³586 again_answer_fun = re.findall('function ' + again_answer_fun_name + "[\s\S]+?}", history_page_result.text)[0]587 again_answer_fun = again_answer_fun.replace("location.href =", "return ")588 again_answer_fun = execjs.compile(again_answer_fun)589 # è·åè¯å·id590 paper_id = re.findall('<aid="putIn"href="javascript:void\(0\);"'591 'data=".*?/([0-9]*?)"title=""class="f14restacenterpater"'592 '>æè¦äº¤å·</a>',593 StrUtil.formatting(test_paper.text))[0]594 # è·åååä¸é页é¢çå°å595 again_answer_href = again_answer_fun.call(again_answer_fun_name, *[paper_id, time.time()])596 again_answer_result = self.session.get(again_answer_href)597 if again_answer_result.status_code == 200:598 return again_answer_result599 else:600 raise Exception("ç½ç»é误ï¼")601 # è¾åºçé¢æ°åæ£ç¡®ç602 def print_today_question_count_url(self):603 print("æ£å¨æ¥è¯¢çé¢æ°åæ£ç¡®çããã")604 today_question_count_result = self.session.get(self.urls.get("today_question_count_url"))605 if today_question_count_result.status_code != 200:606 self.load_today_question_count_url()607 today_question_count_result = self.session.get(self.urls.get("today_question_count_url"))608 AnswerNum = json.loads(today_question_count_result.text)609 if AnswerNum["result"]:610 msgs = AnswerNum["msg"].split(";")611 print("累计çé¢æ°ï¼" + msgs[0] + "\tå®é
çé¢æ°ï¼" + msgs[1] + "\tæ£ç¡®çï¼" + msgs[2])612 else:613 print("ä»å¤©è¿æ²¡æåé¢å~å æ²¹ï¼")614 # æ交è¯å· è¿å转为jsonçç»æ615 def sub_test_paper(self, question_data_s, sub_data, test_paper=None):616 if test_paper is None:617 test_paper = self.load_test_paper()618 # å¦ææ交æ°æ®ä¸ä¸ºç©ºå°±éè¦æ交çæ¡619 if len(sub_data) > 0:620 # è·åæ交çæ¡çå°å621 sub_js = re.findall("\.length;(\$\.ajax\(\{url:[\s\S]+?),",622 StrUtil.formatting(623 self.results[self.urls.get("my_exam_js")]624 .text))[0].replace("$.ajax({url:", "return ")625 sub_js = "var lastData=" + str(self.kgcData["last_data"]) + ";" + sub_js626 sub_js = sub_js.replace("\'", "\"")627 paramQuestionId = ""628 question_data_s_len = len(question_data_s)629 for index in range(question_data_s_len):630 paramQuestionId += question_data_s[index].question_id631 if index != question_data_s_len - 1:632 paramQuestionId += ","633 sub_js = "var paramQuestionId=\"" + paramQuestionId + "\";" + sub_js634 sub_js = "function a(){" + sub_js635 sub_js += "}"636 sub_href = execjs.compile(sub_js).call("a")637 # æ交çæ¡638 self.session.post(sub_href, data=sub_data)639 # è·åæ交è¯å·å°å640 sub_href = get_page_sub_href(test_paper)641 # è·åè¯å·id并填å
¥data642 self.kgcData["current_page_code"] = sub_href.split("/")[-1].split("?")[0]643 # æ交è¯å·644 try:645 result = json.loads(self.session.post(sub_href).text)646 except json.decoder.JSONDecodeError as e:647 if "Expecting value: line" in str(e):648 raise Exception("éå¤æ交ï¼")649 else:650 raise e651 if result["result"]:652 self.urls.put("go_back_url", result["gobackUrl"])653 return result654 else:655 raise Exception("æ交è¯å·,æªç¥é误ï¼")656 # å·é¢ æ£ç¡®ç657 def do_test_paper(self, accuracy=1):658 # è·åè¯å·659 test_paper = self.load_test_paper()660 # æ交çæ°æ®661 sub_data = {662 "psqId": [],663 "time": [],664 "uAnswers": []665 }666 # è·åé®é¢æ°æ®667 question_data_s = self.load_question_data(test_paper)668 # æ交空å·669 sub_test_paper_result = self.sub_test_paper([], [], test_paper)670 # è·åè¯å·åæ671 solutions_paper = self.load_solutions_paper(sub_test_paper_result)672 # å è½½çæ¡673 answers = self.load_answer(solutions_paper)674 # é¢æ°675 question_num = len(answers)676 # 计ç®éé¢æ°677 F_num = int(question_num - accuracy * question_num)678 # 循ç¯åå
¥éé¢679 for i in range(int(F_num)):680 while True:681 ran = str(random.randint(0, 3))682 if ran != answers[i]:683 answers[i] = ran684 break685 # å¤ææ¯éé¢å·é¢æ¶é´686 do_time = self.kgcData["do_time"]687 if do_time == "":688 do_time = str(random.randint(10, 20))689 # å°é®é¢æ°æ®å¡«å
¥sub_data690 for index in range(len(question_data_s)):691 sub_data["psqId"].append(question_data_s[index].sub_question_id)692 sub_data["time"].append(do_time)693 try:694 sub_data["uAnswers"].append(answers[index])695 except IndexError as e:696 print()697 # è·åååä¸æ¬¡698 again_answer = self.get_again_answer_page(test_paper)699 # æ交çå·700 sub_test_paper_result = self.sub_test_paper(question_data_s, sub_data, again_answer)701 # è¿åæ¯å¦æå702 return sub_test_paper_result["result"]703 def load_my_exam_js(self):704 self.results[self.urls.get("my_exam_js")] = \705 self.session.get(self.urls.get("my_exam_js"))706 return self.results[self.urls.get("my_exam_js")]707 def load_js(self):708 print("æ£å¨å è½½js")...
studious.py
Source:studious.py
1#!/usr/bin/env python32## studious main3import sys4import hashlib5from base32c import cb32encode6# from Moore's book7from PySide2 import QtWidgets as qtw8from PySide2 import QtGui as qtg9from PySide2 import QtCore as qtc10# from PySide2 import QtWebEngineWidgets as qtwe11import ebooklib12from ebooklib import epub13import xml.etree.ElementTree as ETree14_debug = False15_dumpHTML = False16def unique_list(l):17 ulist = []18 for item in l:19 if item not in ulist:20 ulist.append(item)21 return ulist22class EPubTextBrowser(qtw.QTextBrowser):23 """Derived QTW class for the main text view."""24 def set_epub(self, the_epub):25 self.the_epub = the_epub26 def loadResource(self, restype, url):27 """Override to load images that are within the epub."""28 if restype == 2 and url.isRelative():29 if _debug:30 print("Image resource found: ", url.toDisplayString())31 # get file part of it. OR, load as path from zip?32 # for now, assume filename part of URL is the ID.33 imgHref = url.toDisplayString()34 if imgHref.startswith("../"):35 imgHref = imgHref[3:]36 image = self.the_epub.get_item_with_href(imgHref)37 # image = self.the_epub.get_item_with_id(url.fileName())38 if image:39 if _debug:40 print("successfully loaded image of type", type(image))41 image = qtg.QImage.fromData(image.get_content())42 if image.width() > (self.width() * 0.8):43 image = image.scaledToWidth(44 self.width() * 0.8,45 mode=qtc.Qt.TransformationMode.SmoothTransformation)46 # It accepts anything as the variant! Python!47 return image48 else:49 print("image load failed:", imgHref)50 # should we fetch external images? maybe not51 if _debug:52 print("loading non-image resource", url)53 super(EPubTextBrowser, self).loadResource(restype, url)54 55# Inheriting from QMainWindow broke the layouts.56# Should I make another class for the book itself?57class MainWindow(qtw.QMainWindow):58 """UI Class for the Studious epub reader"""59 SECTION = 0 # constants for the treeview60 HREF = 161 62 def __init__(self):63 """GUI Layout is built here."""64 # The book doesn't pass the class and object.65 super(MainWindow, self).__init__()66 # I won't call these if I set it up myself.67 # self.ui = Ui_MainWindow()68 # self.ui.setupUi(self)69 window = qtw.QWidget()70 self.setCentralWidget(window)71 72 self.setWindowTitle("Studious Reader")73 self.resize(960,600)74 ### Menu items75 openPixmap = getattr(qtw.QStyle, 'SP_DialogOpenButton')76 openIcon = self.style().standardIcon(openPixmap)77 openAction = qtw.QAction(openIcon, '&Open', self)78 openAction.setShortcut(qtg.QKeySequence("Ctrl+o"))79 openAction.triggered.connect(self.open_new_file)80 menuBar = qtw.QMenuBar(self)81 fileMenu = menuBar.addMenu("&File")82 fileMenu.addAction(openAction)83 self.setMenuBar(menuBar)84 ### The tricky layout to get the panes to work correctly.85 topLayout = qtw.QHBoxLayout()86 window.setLayout(topLayout)87 88 leftLayout = qtw.QVBoxLayout()89 topLayout.addLayout(leftLayout)90 leftSplitter = qtw.QSplitter(self) # or add to top hlayout?91 leftLayout.addWidget(leftSplitter)92 self.tocPane = qtw.QTreeWidget(self)93 self.tocPane.setColumnCount(2)94 self.tocPane.setHeaderLabels(["Section", "Link"])95 if not _debug:96 self.tocPane.hideColumn(1)97 self.tocPane.itemClicked.connect(self.jump_to_tocitem)98 #leftLayout.addWidget(self.tocPane)99 leftSplitter.addWidget(self.tocPane)100 ## 1. use a new hbox layout to have no splitter on the right101 rightFrame = qtw.QFrame(self)102 ## these didn't make any difference at all.103 # rightFrame.setFrameStyle(qtw.QFrame.NoFrame)104 # rightFrame.setContentsMargins(qtc.QMargins(0,0,0,0))105 leftSplitter.addWidget(rightFrame)106 rightHLayout = qtw.QHBoxLayout()107 rightFrame.setLayout(rightHLayout)108 ## 2. sub-splitter on the right109 # rightSplitter = qtw.QSplitter(self)110 # leftSplitter.addWidget(rightSplitter)111 ## 2a. to have the right splitter be separate112 # topLayout.addWidget(rightSplitter)113 ## 3. just 3 vboxes, no splitters114 # centerLayout = qtw.QVBoxLayout()115 # topLayout.addLayout(centerLayout)116 self.mainText = EPubTextBrowser(self) #qtw.QTextBrowser(self)117 # this isn't doing anything, is it reading the css instead?118 #self.mainText.style = """119 # <style>body{ margin-left: 60px; margin-right: 60px; line-height: 130% }</style>120 #"""121 self.mainText.document().setDefaultStyleSheet(122 'body{ margin-left: 20px; margin-right: 20px; line-height: 110% }')123 mainText_font = qtg.QFont('Liberation Serif', 12)124 mainText_font.setStyleHint(qtg.QFont.Serif)125 self.mainText.setFont(mainText_font)126 self.mainText.setOpenLinks(False)127 self.mainText.anchorClicked.connect(self.jump_to_qurl)128 self.mainText.cursorPositionChanged.connect(self.update_location)129 rightHLayout.addWidget(self.mainText) # 1130 # rightSplitter.addWidget(self.mainText) # 2131 # centerLayout.addWidget(self.mainText) # 3132 # horizontal and vertical is flipped from what I thought.133 self.mainText.setFixedWidth(500)134 # this has no effect if fixedwidth is set.135 # and it doesn't stick to preferred if there's a splitter.136 self.mainText.setSizePolicy(qtw.QSizePolicy.Maximum,137 qtw.QSizePolicy.Preferred)138 # will need this vboxlayout if we add something below the notes.139 # rightLayout = qtw.QVBoxLayout()140 self.notesFrame = qtw.QTextEdit(self)141 # topLayout.addLayout(rightLayout)142 # rightHLayout.addLayout(rightLayout) # 1143 rightHLayout.addWidget(self.notesFrame) # 1 no layout144 # rightSplitter.addWidget(self.notesFrame) # 2 145 # rightLayout.addWidget(self.notesFrame) # 3146 self.show()147 def open_new_file(self):148 filePath, _ = qtw.QFileDialog.getOpenFileName(149 self, caption="Load new ebook", filter="EPub files (*.epub)")150 self.load_epub(filePath)151 152 def update_location(self):153 print("Cursor position:", self.mainText.textCursor().position())154 #def create_toc_model(self, parent):155 # model = qtg.QStandardItemModel(0, 1, parent)156 # # can I just define constants like this?157 # model.setHeaderData(self.SECTION, qtc.Qt.Horizontal, "Section")158 # return model159 def jump_to(self, urlStr):160 """Jump to an internal link. May refer to a separate page in the 161 original epub, or to an anchor."""162 if _debug:163 print("Jumping to", urlStr)164 splitUrl = urlStr.split('#')165 href = splitUrl[0]166 # sectionText = self.the_epub.get_item_with_href(href).get_content()167 # self.mainText.setHtml(sectionText.decode('utf-8'))168 if len(splitUrl) > 1:169 self.mainText.scrollToAnchor(splitUrl[1])170 if _debug:171 print("ANCHORJUMP", splitUrl[1])172 else:173 if _debug:174 print("URLJUMP:", urlStr)175 self.mainText.scrollToAnchor(urlStr)176 # Update cursor position by moving the cursor to where we are177 # and getting its location.178 # or should I do that only when they click?179 # TODO: move TOC highlight to wherever I jumped to180 # (probably with a trigger for the scroll event, because181 # it should work for scrolling too)182 browserRect = self.mainText.rect()183 newCursor = self.mainText.cursorForPosition(browserRect.topLeft())184 #newcursor = self.mainText.cursorForPosition(qtc.QPoint(0,0))185 self.mainText.setTextCursor(newCursor)186 if _debug:187 print("new cursor rect position:", self.mainText.cursorRect())188 print("Cursor position:", self.mainText.textCursor().position())189 190 def jump_to_tocitem(self, item):191 """Jump for a click in the Contents pane."""192 self.jump_to(item.text(1))193 def jump_to_qurl(self, url):194 # TODO: popup asking if want to open in browser195 if url.isRelative():196 self.jump_to(url.toString())197 198 def process_toc(self, toc_node, treenode):199 """ Map the epub ToC structure to a Qt tree view."""200 # rowCount = 0 # rows at each level201 # store and return hrefs for the correct ordering202 # hrefs = []203 filename_anchors = False204 for toc_entry in toc_node:205 if _debug:206 print(type(toc_entry)) # epub.Link or tuple207 if hasattr(toc_entry, 'title'):208 if _debug:209 print(toc_entry.__dict__)210 newRow = qtw.QTreeWidgetItem(treenode)211 newRow.setText(self.SECTION, toc_entry.title)212 entry_href = toc_entry.href213 if entry_href.startswith('xhtml/'):214 entry_href = entry_href[6:]215 newRow.setText(self.HREF, entry_href)216 if len(toc_entry.href.split('#')) < 2:217 filename_anchors = True218 #newRow.setExpanded(True)219 ## leaving this here in case we ever need a custom model?220 #self.tocModel.insertRow(rowCount)221 #self.tocModel.setData(222 # self.tocModel.index(level, rowCount), # self.SECTION),223 # toc_entry.title)224 #rowCount += 1225 else: # it's a pair of the top level and sub-entries, so recurse226 newRow = qtw.QTreeWidgetItem(treenode)227 if _debug:228 print("tuple[0] is", type(toc_entry[0]))229 print(toc_entry[0].__dict__)230 newRow.setText(self.SECTION, toc_entry[0].title)231 entry_href = toc_entry[0].href232 if entry_href.startswith('xhtml/'):233 entry_href = entry_href[6:]234 newRow.setText(self.HREF, entry_href)235 #newLevel = qtw.QTreeWidgetItem(treenode)236 # hrefs += self.process_toc(toc_entry[1], newRow) # newLevel237 filename_anchors |= self.process_toc(toc_entry[1], newRow)238 newRow.setExpanded(True)239 return filename_anchors240 def load_notes(self, bookfilename, author_last):241 '''Load the notes file corresponding to a book's hash into the242 notes pane.'''243 hasher = hashlib.sha256()244 bookfile = open(bookfilename, 'rb')245 hasher.update(bookfile.read())246 bookfile.close()247 b32str = cb32encode(hasher.digest()[:10])248 print("hash encode: ", author_last[:3] + '_' + b32str)249 250 251 def load_epub(self, filename):252 '''Load epub content, and also call out to generate the TOC and253 load the notes file.'''254 the_epub = epub.read_epub(filename)255 self.mainText.set_epub(the_epub) # have to set early256 #for k,v in the_epub.__dict__.items():257 # print(k, ':', v)258 259 #doc_items = the_epub.get_items_of_type(ebooklib.ITEM_DOCUMENT)260 #doc_list = list(doc_items)261 all_items = the_epub.get_items()262 #for item in list(all_items):263 # will I have to make my own dictionary of these?264 # print("ITEM", item.file_name, item.get_id(), item.get_type())265 if _debug:266 print(the_epub.spine)267 self.tocPane.clear()268 filename_anchors = self.process_toc(the_epub.toc, self.tocPane)269 if filename_anchors:270 print("epub has toc links with filename only")271 # suppress "html" namespace prefix.272 ETree.register_namespace('', 'http://www.w3.org/1999/xhtml')273 # TODO: loop to get the first item that's linear.274 first_item = the_epub.get_item_with_id(the_epub.spine[0][0])275 # merge all the HTML file bodies into one.276 first_text = first_item.get_content().decode('utf-8')277 doc_tree = ETree.fromstring(first_text)278 doc_body = doc_tree.find('{http://www.w3.org/1999/xhtml}body')279 for uid, linear in the_epub.spine: # [1:] # file_hrefs[1:]:280 # TODO: if it's not linear, put it at the end.281 if _debug:282 print("LOADITEM:", uid)283 the_item = the_epub.get_item_with_id(uid)284 text = the_item.get_content().decode('utf-8')285 tree = ETree.fromstring(text)286 body = tree.find('{http://www.w3.org/1999/xhtml}body')287 # body.insert (anchor element, 0)?288 # create div elements with the name corresponding to the file289 if filename_anchors: 290 # tried 'name' and 'id'291 item_name = the_item.get_name()292 if item_name.startswith('xhtml/'):293 item_name = item_name[6:]294 toc_div = ETree.Element('div', {'id': item_name})295 if _debug:296 print("ANCHOR ADDED:", list(toc_div.items()))297 for child in body:298 # why is 0 okay?299 #toc_div.insert(0, child)300 toc_div.append(child)301 # print("CHILD ADDED")302 doc_body.append(toc_div)303 else:304 for child in body:305 doc_body.append(child)306 fulltext = ETree.tostring(doc_tree, encoding='unicode')307 self.mainText.setHtml(fulltext)308 if _dumpHTML:309 print(fulltext)310 # TODO: have spinny until finished loading, so it won't be311 # unresponsive (see the Bible)312 # this lies, it says before finished loading images313 print("load finished.") 314 author_full = the_epub.get_metadata("http://purl.org/dc/elements/1.1/", "creator")[0][0]315 author_last = author_full.split()[-1].lower()316 print(author_last)317 self.load_notes(filename, author_last)318# if __name__ == "__main__":319def main():320 app = qtw.QApplication(sys.argv)321 window = MainWindow()322 if len(sys.argv) > 1:323 bookFilename = sys.argv[1]324 window.load_epub(bookFilename)325 # this trick passes the app's exit code back to the OS....
BraveNewWordsScraper.py
Source:BraveNewWordsScraper.py
1#!/usr/bin/env python2# coding: utf-83# In[85]:4import requests5from requests_html import HTMLSession6import re7import csv8def findYear(text):9 yearPattern = re.compile("[1,2][0-9][0-9][0-9]")10 year = yearPattern.search(text)11 if year:12 return year.group()13 return None14def scraper():15 url_A = "http://www.oxfordreference.com/browse?btog=chap&page="16 url_B = "&pageSize=100&sort=titlesort&source=%2F10.1093%2Facref%2F9780195305678.001.0001%2Facref-9780195305678"17 base_selector = "#abstract_link"18 entry_dict = {}19 for i in range(1, 10):20 page_num = i21 url = url_A + str(page_num) + url_B22 base_page = HTMLSession().get(url)23 for j in range(1, 101):24 try:25 entry_num = j + 100*(i - 1)26 selector = base_selector + str(entry_num)27 entry = base_page.html.find(selector)[0]28 entry_name = entry.text29 entry_href = entry.attrs["href"]30 entry_url = "http://www.oxfordreference.com" + entry_href31 entry_page = HTMLSession().get(entry_url)32 sel = ".prosequoteType > p:nth-child(1)"33 try:34 first_year = entry_page.html.find(sel)[0].text35 except Exception as e:36 print(e)37 print("On Entry #" + str(entry_num))38 first_year = ""39 first_year = findYear(first_year)40 if first_year:41 entry_dict[entry_name] = str(first_year)42 print("Entry #" + str(entry_num) + ": " + entry_name)43 print("\t year= " + str(first_year))44 except Exception as e:45 print(e)46 print("Exited inner loop at Entry #" + str(entry_num) + ", Page #" + str(page_num))47 print("Beginning Page #" + str(page_num + 1))48 break49 return entry_dict50data_dict = scraper()51with open('braveNewWords.csv', 'w', encoding="utf-8") as f:52 writer = csv.writer(f)53 for row in data_dict.items():...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!