Join the 1-day Testing & QA Summit featuring 15+ Expert Speakers.Register for FREE! Join TestMu Conference

How to use entry_href method in autotest

Best Python code snippet using autotest_python

kgcRequest.py

Source:kgcRequest.py

1import json2import random3import re4import time5import execjs6import RequestUtil7import StrUtil8import UrlUtil9def get_page_sub_href(page):10    # è·åæäº¤è¯å·çå°å å¹¶è¿å11    return re.findall('<aid="putIn"href="javascript:void\(0\);"data="(.*?)"title=""',12                      StrUtil.formatting(page.text))[0]13class kgcRequest:14    session = RequestUtil.session({15        "http://tiku.kgc.cn/testing/error": "è¯¾å·¥åºé¡µé¢å¼å¸¸"16    })17    # kgcç½ç«ä¸çè·¯å¾18    urls = UrlUtil.regionUrl("http", "tiku.kgc.cn", "80", {19        "my_exam_js": "resources/V12.0.0.5/js/myexam.js"20    })21    # kgcç½ç«ä¸çè¯·æ±ç»æéå22    results = {23    }24    kgcData = {25        "kgc_index_path": "",26        "entry_name": "",27        "entry_href": "",28        "get_paper_href": "",29        "100%go_on": True,30        "auto_choice_get_paper_href": True,31        "do_time": "",32        "last_data": ["", ""]33    }34    def __init__(self, kgcData: dict):35        for dataKey in self.kgcData.keys():36            try:37                self.kgcData[dataKey] = kgcData[dataKey]38            except KeyError:39                kgcData[dataKey] = ""40        kgc_index = self.session.get(self.kgcData["kgc_index_path"])41        if kgc_index.status_code == 200:42            if 'å¯¹åºçpassportä¸åå¨' not in kgc_index.text:43                self.urls.put("index", self.kgcData["kgc_index_path"])44                self.results[self.urls.get("index")] = kgc_index45                # å è½½js46                self.load_js()47                # å è½½æ¥è¯¢å·é¢æ°çurl48                self.load_today_question_count_url(kgc_index)49                # å è½½æçåå²å°å50                self.load_history_href(kgc_index)51            else:52                raise RequestUtil.RequestUtilError53    def reload(self, kgc_index_path: str):54        self.kgcData["kgc_index_path"] = kgc_index_path55        self.__init__(self.kgcData)56    # å è½½å¥å£ è¿åå è½½å¥½çå¥å£å57    def load_entry(self, choiceEntry=None, kgc_index_result=None):58        if choiceEntry is None:59            choiceEntry = self.kgcData["entry_name"]60        if kgc_index_result is None:61            kgc_index_result = self.results[self.urls.get("index")]62        class entry:63            name = ""64            href = ""65            def __init__(self, entry_block):66                self.name = re.findall('<spanclass="test_list_name">(.*?)<imgsrc=', entry_block)[0]67                self.href = re.findall('<spanclass="test_list_go"><a.+href="(.*?)">è¿å¥</a>', entry_block)[0]68        print("æ£å¨å è½½å·é¢å¥å£ããã")69        # è·åææåæ°å¥å£å70        entry_blocks = re.findall('<divclass="test_listitem\\d">(.*?)</div>', StrUtil.formatting(kgc_index_result.text))71        # å¥å£å¯¹è±¡72        entryObjects = []73        for entryBlock in entry_blocks:74            entryObjects.append(entry(entryBlock))75            # å¦æå°åä¸å¨aé¾æ¥76            last_entryObject = entryObjects[len(entryObjects) - 1]77            if last_entryObject.href == "javascript:void(0);":78                last_entryObject.href = re.findall('<spanclass="test_list_go"><a.+data="(.*?)"', entryBlock)[0]79        # éåå¥å£åçææ²¡æåä¼ å¥å¥å£åä¸æ ·ç80        for entryObject in entryObjects:81            if entryObject.name == choiceEntry:82                self.kgcData["entry_name"] = entryObject.name83                self.kgcData["entry_href"] = entryObject.href84                return entryObject.name85        # å¾ªç¯å®äºè¿æ¯æ²¡æ86        print("è¯·éæ©å¥å£ï¼")87        print("åºå·\tå¥å£å")88        for index in range(len(entryObjects)):89            print(str(index + 1) + "\t" + entryObjects[index].name)90        while True:91            try:92                entryObject = entryObjects[int(input("è¯·éæ©ï¼")) - 1]93                self.kgcData["entry_name"] = entryObject.name94                self.kgcData["entry_href"] = entryObject.href95                return entryObject.name96            except ValueError:97                print("è¯·è¾å¥æ°åï¼")98            except IndexError:99                print("è¯·è¾å¥1-" + str(len(entryObjects)) + "èå´çæ°åï¼")100    # æ ¹æ®å¥å£å è½½è·åè¯å·çå°å101    def load_get_paper_href(self):102        # è·åè¯å·ä¹ååå¤ææ¯å¦å è½½å¥å£è¿103        if self.kgcData["entry_name"] == "" and self.kgcData["entry_href"] == "":104            print("éè¯¯ï¼ï¼æªå è½½å¥å£")105            self.load_entry()106        # è¯·æ±å¥å£107        entry_result = self.session.get(self.kgcData["entry_href"])108        if entry_result.url == '':109            raise IndexError("list index out of range")110        else:111            # å¤æå¥å£æ¯å¦ç´æ¥è¿å¥è¯å·(ç´æ¥è¿å¥è¯å·ä»£è¡¨æ¯æ¨¡æçé¢å)112            if len(re.findall('<p class="f14">èè¯å©ä½æ¶é´</p>', entry_result.text)) == 0:113                # å¥å£åç±»114                class entryBlock:115                    enter_fun_name = ""116                    enter_fun_arguments = []117                    data = []118                    a_id = 0119                    title = ""120                    def __init__(self, enter_fun_name, enter_fun_arguments, title=""):121                        self.enter_fun_name = enter_fun_name122                        self.enter_fun_arguments = enter_fun_arguments123                        self.title = title124                # è½¬ååçè¯·æ±å¥å£125                format_entry_result = StrUtil.formatting(entry_result.text)126                # è·åææä¸çº§å¥å£ è¿å¥æµè¯ æ§è¡çjsæ¹æ³127                do_js_funs = re.findall('javascript:(.*?);?"', format_entry_result)128                # è·åæ§è¡çæ¹æ³å129                do_js_fun_name = re.findall('(.*?)\(', do_js_funs[0])[0]130                # å¦æä¸ºä¸é¡¹æè½å è¿å¥æµè¯ æ§è¡çjsæ¹æ³ä¼å¤å¹éå°ä¸ä¸ªæ·»å äºçº§å¥å£æ¹æ³131                if do_js_fun_name == "percentAlert":132                    do_js_funs = do_js_funs[:-1]133                # è·åfunctionå134                functions = re.findall("function\s+.+\(.+\)\{[\s\S]*?\}", entry_result.text)135                # å®ä¹scriptå136                script = ""137                # ä½¿ç¨çæ¹æ³138                use_fun = [do_js_fun_name, "percentOutlineAlert", "percentChapterAlert"]139                # æé¤æªä½¿ç¨æ¹æ³140                index = 0141                length = len(functions)142                while index < length:143                    if re.findall('function\s*(.*?)\(', functions[index])[0] not in use_fun:144                        del functions[index]145                        index -= 1146                        length -= 1147                    index += 1148                for function in functions:149                    attr_data = re.findall('\$\("#(.*?)"\).attr\("(.*?)"\)', function)150                    if len(attr_data) > 0:151                        attr_data = attr_data[0]152                    func = function153                    if len(attr_data) > 0:154                        func = function.replace("){", "," + attr_data[1] + "){"). \155                            replace('$("#' + attr_data[0] + '").attr("' + attr_data[1] + '")', "data")156                    func = func.replace("window.location.href=", "return ")157                    func = func.replace("var data =", "return ")158                    script += "\n" + func159                function_names = re.findall('function\s+(.*?)\(', script)160                script = execjs.compile(script)161                # è·åæ¹æ³å¯è½æ§çæ¼æ¥åç¬¦ä¸²162                function_names_joint = ""163                for index in range(len(function_names)):164                    if index != 0:165                        function_names_joint += "|"166                    function_names_joint += function_names[index]167                # æ¥æ¾æææµè¯æ¹æ³å¥å£å¯¹è±¡ (å½ååå«ä¸çº§å¥å£)168                entryBlocks = []169                for do_js_fun in do_js_funs:170                    name = re.findall('(.*?)\(', do_js_fun)[0]171                    try:172                        arguments = re.findall('[' + function_names_joint + ']\((.*?)\)', do_js_fun)[0].split(",")173                    except re.error:174                        print()175                    entryBlocks.append(entryBlock(name, arguments))176                if do_js_fun_name == "unitExam":177                    # å è½½data178                    data_s = re.findall('data="(.*?)"title="è¿å¥æµè¯"', format_entry_result)179                    for index in range(len(data_s)):180                        entryBlocks[index].enter_fun_arguments.append(data_s[index])181                elif do_js_fun_name == "percentAlert":182                    # è·åææa_id data title percent183                    a_data_title_percent_s = re.findall(184                        ';position:relative"><aid="(.*?)"href="#"title=""class="no-sj"data="(.*?)"'185                        'style="padding-left:30px">(.*?)</a><p><spanstyle="width:(.*?)"></span>', format_entry_result)186                    # å°ææa_id å dataå å¥å£ç±»å187                    for index in range(len(a_data_title_percent_s)):188                        entryBlocks[index].a_id = a_data_title_percent_s[index][0]189                        entryBlocks[index].data = a_data_title_percent_s[index][1].split(",")190                        entryBlocks[index].title = a_data_title_percent_s[index][2] + "\t" + \191                                                   a_data_title_percent_s[index][3]192                # äºçº§å¥å£ç¹å»äºä»¶çjsä»£ç å193                skillList_click_js_block = ""194                # äºçº§å¥å£çæ¹æ³å195                skillList_fun_name = ""196                # å è½½è·åäºçº§å¥å£å°åçjsæ¹æ³197                def load_skill_list_click_js_block():198                    nonlocal skillList_click_js_block199                    nonlocal skillList_fun_name200                    print("æ£å¨å è½½ä¸é¡¹æè½åçäºçº§å¥å£ãããã")201                    # è·åå°äºçº§å¥å£ç¹å»äºä»¶çjsä»£ç å202                    skillList_click_js_block = re.findall('if\s?\(pos\s?!=\s?-1\)\s?\{'203                                                          '[\s\S]*'204                                                          '\}[\s\t\n\r]*'205                                                          'obj\.toggleClass\("yes-sj"\);[\s\t\n\r]*'206                                                          '\}[\s\t\n\r]*'207                                                          '\}[\s\t\n\r]*'208                                                          '\}\);[\s\t\n\r]*'209                                                          '[\s\t\n\r]*\}[\s\t\n\r]?else[\s\t\n\r]?(\{'210                                                          '[\s\S]*'211                                                          '\})[\s\t\n\r]*'212                                                          '\}[\s\t\n\r]*'213                                                          '\$\(this\)\.toggleClass\("yes-sj"\);',214                                                          entry_result.text)[0]215                    # äºæ¬¡å¤çäºçº§å¥å£ç¹å»äºä»¶çjsä»£ç 216                    # å°ä»£ç åè½¬åä¸ºæ¹æ³217                    skillList_click_js_block = "\nfunction click(a_id,data){\r" + skillList_click_js_block[218                                                                                  1:] + "\r"219                    # å°jsä»£ç æè§£ä¸ºä¸è¡ä¸è¡220                    skillList_click_js_block_lines = re.findall('\n(.*?)\r', skillList_click_js_block)221                    # å¨éåä¸å é¤ä¸å¯¹æå®åç¬¦222                    def remove_pair(char, list_, start_index):223                        char_dic = {224                            "{": "}",225                            "[": "]",226                            "<": ">",227                            "(": ")",228                        }229                        # å é¤ä¹ååå¤æå½åè¡æ¯å¦å¸¦ææå®åç¬¦230                        if char in list_[start_index]:231                            index = start_index + 1232                            deep = 0233                            while index < len(list_):234                                if char in list_[index] and char_dic[char] not in list_[index]:235                                    deep += 1236                                elif char_dic[char] in list_[index] and deep == 0 and True if list_[index].find(237                                        char) == -1 \238                                        else list_[index].find(char_dic[char]) < list_[index].find(char):239                                    result_index = [index]240                                    # å¦æè¦ç§»é¤çè¡åè¿ææå®åç¬¦241                                    if char in list_[index]:242                                        for result_index_ in remove_pair(char, list_, index):243                                            result_index.append(result_index_)244                                    # è¿åéè¦å é¤çä¸æ 245                                    return result_index246                                elif char_dic[char] in list_[index] and char not in list_[index]:247                                    deep -= 1248                                index += 1249                    # æé¤å¸¦æjqueryçé²æ¥é250                    skillList_click_js_block_lines_index = 0251                    skillList_click_js_block_lines_length = len(skillList_click_js_block_lines)252                    while skillList_click_js_block_lines_index < skillList_click_js_block_lines_length:253                        # å¦æå½åè¡å¸¦æ$.ajax(254                        if "$.ajax(" in skillList_click_js_block_lines[skillList_click_js_block_lines_index]:255                            del_index = remove_pair(256                                "(", skillList_click_js_block_lines, skillList_click_js_block_lines_index)257                            for i in del_index:258                                skillList_click_js_block_lines[i] = skillList_click_js_block_lines[i].replace(259                                    ")", "")260                            # å°$.ajax æ¿æ¢ä¸º return261                            skillList_click_js_block_lines[skillList_click_js_block_lines_index] = \262                                skillList_click_js_block_lines[skillList_click_js_block_lines_index].replace(263                                    "$.ajax(", "return")264                        # å¦æå½åè¡å¸¦æ$265                        elif "$" in skillList_click_js_block_lines[skillList_click_js_block_lines_index]:266                            if "{" in skillList_click_js_block_lines[skillList_click_js_block_lines_index]:267                                del_index = remove_pair("{", skillList_click_js_block_lines,268                                                        skillList_click_js_block_lines_index)269                                for index in range(len(del_index)):270                                    if index < len(del_index) - 1:271                                        # å é¤å½åè¡272                                        del skillList_click_js_block_lines[del_index[index]]273                                        skillList_click_js_block_lines_length -= 1274                                        skillList_click_js_block_lines_index -= 1275                                    skillList_click_js_block_lines[del_index[index]] = \276                                        skillList_click_js_block_lines[del_index[index]].replace("}", "")277                            # å é¤å½åè¡278                            del skillList_click_js_block_lines[skillList_click_js_block_lines_index]279                            skillList_click_js_block_lines_length -= 1280                            skillList_click_js_block_lines_index -= 1281                        # å¦æå½åè¡å¸¦æ//æ³¨é282                        elif StrUtil.formatting(283                                skillList_click_js_block_lines[284                                    skillList_click_js_block_lines_index]).startswith("//"):285                            # å é¤å½åè¡286                            del skillList_click_js_block_lines[skillList_click_js_block_lines_index]287                            skillList_click_js_block_lines_length -= 1288                            skillList_click_js_block_lines_index -= 1289                        skillList_click_js_block_lines_index += 1290                    # æ¥æ¾åºç³æçåé291                    skillList_click_js_block_variables = ["a_id", "data"]292                    for skillList_click_js_block_line in skillList_click_js_block_lines:293                        re_result = re.findall('var\s*(\w+)\s*=\s*',294                                               StrUtil.formatting(skillList_click_js_block_line))295                        if len(re_result) != 0:296                            skillList_click_js_block_variables.append(re_result[0])297                    # ç§»é¤ä½¿ç¨æªç³æåéçä»£ç è¡298                    skillList_click_js_block_lines_index = 0299                    skillList_click_js_block_lines_length = len(skillList_click_js_block_lines)300                    while skillList_click_js_block_lines_index < skillList_click_js_block_lines_length:301                        re_results = re.findall('(\w+)\s*[=].*;',302                                                StrUtil.formatting(skillList_click_js_block_lines[303                                                                       skillList_click_js_block_lines_index]))304                        if len(re_results) > 0:305                            for re_result in re_results:306                                if re_result not in skillList_click_js_block_variables and not re_result.startswith(307                                        "var"):308                                    # ç§»é¤å½åè¡309                                    del skillList_click_js_block_lines[skillList_click_js_block_lines_index]310                                    skillList_click_js_block_lines_index -= 1311                                    skillList_click_js_block_lines_length -= 1312                                    break313                        re_results = re.findall('(\w+)\s*[.\[].*;',314                                                StrUtil.formatting(skillList_click_js_block_lines[315                                                                       skillList_click_js_block_lines_index]))316                        if len(re_results) > 0:317                            for re_result in re_results:318                                if re_result not in skillList_click_js_block_variables:319                                    # ç§»é¤å½åè¡320                                    del skillList_click_js_block_lines[skillList_click_js_block_lines_index]321                                    skillList_click_js_block_lines_index -= 1322                                    skillList_click_js_block_lines_length -= 1323                        re_results = re.findall('[+]+(\w+)\s*.*;',324                                                StrUtil.formatting(skillList_click_js_block_lines[325                                                                       skillList_click_js_block_lines_index]))326                        skillList_click_js_block_lines_index += 1327                    # ä¸è¡ä¸è¡çjsä»£ç åå¹¶328                    skillList_click_js_block = ""329                    for skillList_click_js_block_line in skillList_click_js_block_lines:330                        skillList_click_js_block += skillList_click_js_block_line331                    skillList_click_js_block = execjs.compile(skillList_click_js_block)332                    # è·åäºçº§å¥å£çæ¹æ³å333                    skillList_fun_name = re.findall("html\+='<aid=\"unitexam\"href=\"javascript:(.*?)\(",334                                                    format_entry_result)[0]335                # æ ¹æ®å¥å£å¯¹è±¡è·åäºçº§å¥å£å¯¹åºçå¥å£å¯¹è±¡éå336                def get_skill_list_entry_block_s(entry_block):337                    # è·åè¯·æ±äºçº§å¥å£çè·¯å¾338                    url = skillList_click_js_block.call("click", entry_block.a_id,339                                                        entry_block.data)["url"]340                    # è¯·æ±äºçº§å¥å£341                    skillListResult = self.session.get(url)342                    skillList = json.loads(skillListResult.text)343                    skill_list_entry_block_s = []344                    for skill in skillList:345                        # æ ¹æ®äºçº§å¥å£åå»ºå¥å£å¯¹è±¡346                        skill_list_entry_block_s.append(entryBlock(skillList_fun_name,347                                                                   [skill['percent'], entry_block.data[0], skill["id"]],348                                                                   "\t" + skill["name"] + "\t" + str(349                                                                       skill["percent"]) + "%"))350                    return skill_list_entry_block_s351                # å¤ææ¯å¦èªå¨éæ©å¥å£å°å352                if self.kgcData["auto_choice_get_paper_href"]:353                    choose_entry_block = ""354                    # å¤æå¦æä¸ºè¯¾ç¨å¤ä¹ å æ ä¸ºä¸é¡¹æè½åä¸å°100%ç»§ç»å·355                    if do_js_fun_name == "unitExam" or (356                            do_js_fun_name == "percentAlert"357                            and self.kgcData["100%go_on"]):358                        choose_entry_block = entryBlocks[random.randint(0, len(entryBlocks) - 1)]359                    elif do_js_fun_name == "percentAlert" and not self.kgcData["100%go_on"]:360                        load_skill_list_click_js_block()361                        # æ¾å°ç¬¬ä¸ä¸ªä¸ä¸º100%ç362                        for entryBlock_ in entryBlocks:363                            if int(re.findall("\t(.*?)%", entryBlock_.title)[0]) != 100:364                                # å¤æå½åå¥å£çäºçº§å¥å£æ¯å¦æä¸ä¸º100%(å ä¸ºkgcæbug)365                                skill_list_entry_block_s = get_skill_list_entry_block_s(entryBlock_)366                                # æ¯å¦å¨ä¸º100%367                                is_all_100 = True368                                for skill_list_entry_block in skill_list_entry_block_s:369                                    if int(re.findall("\t.*\t(.*?)%", skill_list_entry_block.title)[0]) != 100:370                                        is_all_100 = False371                                        choose_entry_block = skill_list_entry_block372                                        break373                                if not is_all_100:374                                    choose_entry_block = entryBlock_375                                    break376                                # å¦æå¨ä¸º100%377                                else:378                                    choose_entry_block = entryBlocks[0]379                    try:380                        get_paper_href = script.call(choose_entry_block.enter_fun_name,381                                                     *choose_entry_block.enter_fun_arguments)382                    except AttributeError:383                        print()384                    self.kgcData["get_paper_href"] = get_paper_href385                    return self.kgcData["get_paper_href"]386                # å¦æä¸æ¯èªå¨éæ©å¥å£å°å387                else:388                    # å¤æå·é¢å¥å£å°åæ¯å¦æå¹éç389                    if self.kgcData["get_paper_href"] != "":390                        for entryBlock_ in entryBlocks:391                            get_paper_href = script.call(entryBlock_.enter_fun_name, *entryBlock_.enter_fun_arguments)392                            if self.kgcData["get_paper_href"] == get_paper_href:393                                self.kgcData["get_paper_href"] = get_paper_href394                                return self.kgcData["get_paper_href"]395                    # æ²¡æå¹éç396                    # å¤æå¦æä¸ºä¸é¡¹æè½å397                    if do_js_fun_name == "percentAlert":398                        load_skill_list_click_js_block()399                        # å¾ªç¯è¯·æ±äºçº§å¥å£400                        index = 0401                        length = len(entryBlocks)402                        while index < length:403                            skill_list_entry_block_s = get_skill_list_entry_block_s(entryBlocks[index])404                            for skill_list_entry_block in skill_list_entry_block_s:405                                # å¤æå·é¢å¥å£å°åæ¯å¦æå¹éç406                                if self.kgcData["get_paper_href"] != "":407                                    get_paper_href = script.call(skill_list_entry_block.enter_fun_name,408                                                                 *skill_list_entry_block.enter_fun_arguments)409                                    if self.kgcData["get_paper_href"] == get_paper_href:410                                        self.kgcData["get_paper_href"] = get_paper_href411                                        return self.kgcData["get_paper_href"]412                                # å°äºçº§å¥å£å äººå¥å£éå413                                entryBlocks.insert(index + 1, skill_list_entry_block)414                            length += len(skill_list_entry_block_s)415                            index += 1 + len(skill_list_entry_block_s)416                        print("å·é¢å·é¢å¥å£å°åå¤±æ")417                    # å¤æå¦æä¸ºè¯¾ç¨å¤ä¹ å418                    elif do_js_fun_name == "unitExam":419                        print("å·é¢å¥å£å°åå¤±æ")420                        # å è½½title421                        titles = re.findall('<li><span>(.*?)</span><aid="unitexam"', format_entry_result)422                        for index in range(len(titles)):423                            entryBlocks[index].title = titles[index]424                    else:425                        raise Exception("åºç°æªç¥è¯¾ç¨!è¯·èç³»ä½èæ·»å !")426                    print("è¯·éæ©è¯¾ç¨ï¼")427                    print("åºå·\tè¯¾ç¨å")428                    for index in range(len(entryBlocks)):429                        print(str(index + 1) + "\t" + entryBlocks[index].title)430                    while True:431                        try:432                            case = int(input("è¯·éæ©ï¼")) - 1433                            if 0 <= case < len(entryBlocks):434                                self.kgcData["get_paper_href"] = script.call(entryBlocks[case].enter_fun_name,435                                                                             *entryBlocks[case].enter_fun_arguments)436                            else:437                                raise IndexError438                            return self.kgcData["get_paper_href"]439                        except ValueError:440                            print("è¯·è¾å¥æ°åï¼")441                        except IndexError:442                            print("è¯·è¾å¥1-" + str(len(entryBlocks)) + "èå´çæ°åï¼")443            else:444                self.kgcData["get_paper_href"] = self.kgcData["entry_href"]445                return self.kgcData["get_paper_href"]446    # æ ¹æ®è·åè¯å·çå°åå è½½è¯å· è¿åè¯å·447    def load_test_paper(self, get_paper_href=None):448        if get_paper_href is None:449            get_paper_href = self.kgcData["get_paper_href"]450        # æµè¯è·åè¯å·çå°åæ¯å¦å¯ç¨451        test_paper = self.session.get(get_paper_href)452        if test_paper.status_code != 200:453            raise Exception("è·åè¯å·çå°åå¤±ææç½ç»éè¯¯ï¼")454        self.results["test_page"] = test_paper455        # è¿åè¯å·456        return test_paper457        # å°æäº¤å°åå å¥urls458        # self.urls.put("current_sub_href", sub_href)459        # return self.kgcData["current_page_code"]460    # å è½½è¯å·è¯é¢åæ461    def load_solutions_paper(self, sub_test_paper_result):462        if sub_test_paper_result["result"]:463            # è·åå°è¯å·æ¥åé¡µé¢464            report_page_result = self.session.get(self.urls.get("go_back_url"))465            # è·åå°è¯å·åæå°å466            solutions_href = re.findall('>æ¥çæ¥å</a><ahref="(.*?)"title=""[\s\S]*>æ¥çè§£æ</a>',467                                        StrUtil.formatting(report_page_result.text))[0]468            return self.session.get(solutions_href)469        else:470            raise Exception("æäº¤è¯å·,æªç¥éè¯¯ï¼")471    # å è½½çæ¡472    def load_answer(self, solutions_paper=None):473        if solutions_paper is None:474            solutions_paper = self.load_solutions_paper(self.sub_test_paper())475        # è·åææçæ¡å476        subjects = re.findall(r'<ulclass="sec2grays">(.*?)</ul><divclass="sec3reportfont-yaheif14strong">',477                              StrUtil.formatting(solutions_paper.text))478        # è·åææé¢ç®id479        # çæ¡åè·åæ¯éé¢çæ£ç¡®çæ¡å¹¶å¡«å¥é¢ç®ä¿¡æ¯æ°ç»480        correctAnswers = []481        for i, su in enumerate(subjects):482            questionAnswers = re.findall(483                r'<lic?l?a?s?s?=?"?g?r?e?e?n?"?><pre><span>([A-Z]):</span><imagestyle="vertical-align:middle"src="('484                r'.*?)"/></pre>',485                su)486            for index in range(len(questionAnswers)):487                questionAnswers[index] = list(questionAnswers[index])488            answer_li = re.findall(r'<liclass="green"><pre><span>[A-Z]:</span>', su)489            an = []490            # å¤éé¢çå¯è½æ§491            if len(answer_li) > 1:492                for answer in answer_li:493                    an.append(re.findall(r"<span>(.*):</span>", answer)[0])494                correctAnswers.append(an)495            # åéé¢çå¯è½æ§496            else:497                an.append(re.findall(r"<span>(.*):</span>", answer_li[0])[0])498                correctAnswers.append(an)499        # å°çæ¡è½¬æ01234500        answers_num_str = []501        for i in range(len(correctAnswers)):502            if len(correctAnswers[i]) == 1:503                answers_num_str.append(str(ord(correctAnswers[i][0].upper()) - 65))504            else:505                j = ""506                for anwer in correctAnswers[i]:507                    j += str(ord(anwer.upper()) - 65) + ","508                answers_num_str.append(j[:-1])509        return answers_num_str510    # é®é¢æ°æ®ç±»511    class question_data:512        paper_code = ""513        question_id = ""514        sub_question_id = ""515        question_index = ""516        question_type = ""517        def __init__(self, paper_code, question_id, sub_question_id,518                     question_index, question_type):519            self.paper_code = paper_code520            self.question_id = question_id521            self.sub_question_id = sub_question_id522            self.question_index = question_index523            self.question_type = question_type524    # å è½½é®é¢æ°æ®éå525    def load_question_data(self, test_paper=None):526        if test_paper is None:527            test_paper = self.load_test_paper()528        # è·åææé¢æ°æ®å529        data_s = list(set(re.findall(r'<dddata="(.*?)">', StrUtil.formatting(test_paper.text))))530        data = []531        # å¾ªç¯å°é¢åºæ°æ®åè½¬åä¸ºé®é¢æ°æ®å¯¹è±¡532        for index in range(len(data_s)):533            data = data_s[index].split(",")534            data_s[index] = self.question_data(data[1], data[2], data[3], data[4], data[5])535        # å°data_så¡«å¥last_data536        self.kgcData["last_data"][0] = data[0]537        self.kgcData["last_data"][1] = data[1]538        # æåº539        for index_ in range(len(data_s) - 1):540            for index in range(len(data_s) - index_ - 1):541                if int(data_s[index].question_index) > int(data_s[index + 1].question_index):542                    temp_data = data_s[index]543                    data_s[index] = data_s[index + 1]544                    data_s[index + 1] = temp_data545        return data_s546    # å è½½æ¥è¯¢å·é¢æ°çurl547    def load_today_question_count_url(self, kgc_index_result=None):548        if kgc_index_result is None:549            kgc_index_result = self.results[self.urls.get("index")]550        try:551            today_question_count_url = \552                re.findall('\.val\(\'\'\);\$\.ajax\(\{url:"(.*?)",', StrUtil.formatting(kgc_index_result.text))[0]553        except IndexError:554            print()555        if self.session.get(today_question_count_url).status_code == 200:556            self.urls.put("today_question_count_url", today_question_count_url)557        else:558            raise Exception("ç½ç»å¼å¸¸")559    # å è½½æçåå²å°å560    def load_history_href(self, kgc_index_result=None):561        if kgc_index_result is None:562            kgc_index_result = self.results[self.urls.get("index")]563        history_href = \564            re.findall('<liseq="2"><ahref="(.*?)"', StrUtil.formatting(kgc_index_result.text))[0]565        if self.session.get(history_href).status_code == 200:566            self.urls.put("history_href", history_href)567        else:568            raise Exception("ç½ç»å¼å¸¸")569    # å è½½æçåå²é¡µé¢570    def load_history_page(self):571        history_page_result = self.session.get(self.urls.get("history_href"))572        if history_page_result.status_code != 200:573            self.load_history_href()574            history_page_result = self.session.get(self.urls.get("history_href"))575        return history_page_result576    # è·åå¯¹åºè¯å·ååä¸éé¡µé¢577    def get_again_answer_page(self, test_paper=None):578        if test_paper is None:579            test_paper = self.load_test_paper()580        # å è½½æçåå²581        history_page_result = self.load_history_page()582        # è·åè¿å¥ååä¸éé¡µé¢çæ¹æ³å583        again_answer_fun_name = re.findall('æ¥çæ¥å</a><ahref="javascript:(.*?)\(',584                                           StrUtil.formatting(history_page_result.text))[0]585        # è·åè¿å¥ååä¸éé¡µé¢çæ¹æ³586        again_answer_fun = re.findall('function ' + again_answer_fun_name + "[\s\S]+?}", history_page_result.text)[0]587        again_answer_fun = again_answer_fun.replace("location.href =", "return ")588        again_answer_fun = execjs.compile(again_answer_fun)589        # è·åè¯å·id590        paper_id = re.findall('<aid="putIn"href="javascript:void\(0\);"'591                              'data=".*?/([0-9]*?)"title=""class="f14restacenterpater"'592                              '>æè¦äº¤å·</a>',593                              StrUtil.formatting(test_paper.text))[0]594        # è·åååä¸éé¡µé¢çå°å595        again_answer_href = again_answer_fun.call(again_answer_fun_name, *[paper_id, time.time()])596        again_answer_result = self.session.get(again_answer_href)597        if again_answer_result.status_code == 200:598            return again_answer_result599        else:600            raise Exception("ç½ç»éè¯¯ï¼")601    # è¾åºçé¢æ°åæ£ç¡®ç602    def print_today_question_count_url(self):603        print("æ£å¨æ¥è¯¢çé¢æ°åæ£ç¡®çããã")604        today_question_count_result = self.session.get(self.urls.get("today_question_count_url"))605        if today_question_count_result.status_code != 200:606            self.load_today_question_count_url()607            today_question_count_result = self.session.get(self.urls.get("today_question_count_url"))608        AnswerNum = json.loads(today_question_count_result.text)609        if AnswerNum["result"]:610            msgs = AnswerNum["msg"].split(";")611            print("ç´¯è®¡çé¢æ°ï¼" + msgs[0] + "\tå®éçé¢æ°ï¼" + msgs[1] + "\tæ£ç¡®çï¼" + msgs[2])612        else:613            print("ä»å¤©è¿æ²¡æåé¢å~å æ²¹ï¼")614    # æäº¤è¯å· è¿åè½¬ä¸ºjsonçç»æ615    def sub_test_paper(self, question_data_s, sub_data, test_paper=None):616        if test_paper is None:617            test_paper = self.load_test_paper()618        # å¦ææäº¤æ°æ®ä¸ä¸ºç©ºå°±éè¦æäº¤çæ¡619        if len(sub_data) > 0:620            # è·åæäº¤çæ¡çå°å621            sub_js = re.findall("\.length;(\$\.ajax\(\{url:[\s\S]+?),",622                                StrUtil.formatting(623                                    self.results[self.urls.get("my_exam_js")]624                                        .text))[0].replace("$.ajax({url:", "return ")625            sub_js = "var lastData=" + str(self.kgcData["last_data"]) + ";" + sub_js626            sub_js = sub_js.replace("\'", "\"")627            paramQuestionId = ""628            question_data_s_len = len(question_data_s)629            for index in range(question_data_s_len):630                paramQuestionId += question_data_s[index].question_id631                if index != question_data_s_len - 1:632                    paramQuestionId += ","633            sub_js = "var paramQuestionId=\"" + paramQuestionId + "\";" + sub_js634            sub_js = "function a(){" + sub_js635            sub_js += "}"636            sub_href = execjs.compile(sub_js).call("a")637            # æäº¤çæ¡638            self.session.post(sub_href, data=sub_data)639        # è·åæäº¤è¯å·å°å640        sub_href = get_page_sub_href(test_paper)641        # è·åè¯å·idå¹¶å¡«å¥data642        self.kgcData["current_page_code"] = sub_href.split("/")[-1].split("?")[0]643        # æäº¤è¯å·644        try:645            result = json.loads(self.session.post(sub_href).text)646        except json.decoder.JSONDecodeError as e:647            if "Expecting value: line" in str(e):648                raise Exception("éå¤æäº¤ï¼")649            else:650                raise e651        if result["result"]:652            self.urls.put("go_back_url", result["gobackUrl"])653            return result654        else:655            raise Exception("æäº¤è¯å·,æªç¥éè¯¯ï¼")656    # å·é¢ æ£ç¡®ç657    def do_test_paper(self, accuracy=1):658        # è·åè¯å·659        test_paper = self.load_test_paper()660        # æäº¤çæ°æ®661        sub_data = {662            "psqId": [],663            "time": [],664            "uAnswers": []665        }666        # è·åé®é¢æ°æ®667        question_data_s = self.load_question_data(test_paper)668        # æäº¤ç©ºå·669        sub_test_paper_result = self.sub_test_paper([], [], test_paper)670        # è·åè¯å·åæ671        solutions_paper = self.load_solutions_paper(sub_test_paper_result)672        # å è½½çæ¡673        answers = self.load_answer(solutions_paper)674        # é¢æ°675        question_num = len(answers)676        # è®¡ç®éé¢æ°677        F_num = int(question_num - accuracy * question_num)678        # å¾ªç¯åå¥éé¢679        for i in range(int(F_num)):680            while True:681                ran = str(random.randint(0, 3))682                if ran != answers[i]:683                    answers[i] = ran684                    break685        # å¤ææ¯éé¢å·é¢æ¶é´686        do_time = self.kgcData["do_time"]687        if do_time == "":688            do_time = str(random.randint(10, 20))689        # å°é®é¢æ°æ®å¡«å¥sub_data690        for index in range(len(question_data_s)):691            sub_data["psqId"].append(question_data_s[index].sub_question_id)692            sub_data["time"].append(do_time)693            try:694                sub_data["uAnswers"].append(answers[index])695            except IndexError as e:696                print()697        # è·åååä¸æ¬¡698        again_answer = self.get_again_answer_page(test_paper)699        # æäº¤çå·700        sub_test_paper_result = self.sub_test_paper(question_data_s, sub_data, again_answer)701        # è¿åæ¯å¦æå702        return sub_test_paper_result["result"]703    def load_my_exam_js(self):704        self.results[self.urls.get("my_exam_js")] = \705            self.session.get(self.urls.get("my_exam_js"))706        return self.results[self.urls.get("my_exam_js")]707    def load_js(self):708        print("æ£å¨å è½½js")...

studious.py

Source:studious.py

1#!/usr/bin/env python32## studious main3import sys4import hashlib5from base32c import cb32encode6# from Moore's book7from PySide2 import QtWidgets as qtw8from PySide2 import QtGui as qtg9from PySide2 import QtCore as qtc10# from PySide2 import QtWebEngineWidgets as qtwe11import ebooklib12from ebooklib import epub13import xml.etree.ElementTree as ETree14_debug = False15_dumpHTML = False16def unique_list(l):17    ulist = []18    for item in l:19        if item not in ulist:20            ulist.append(item)21    return ulist22class EPubTextBrowser(qtw.QTextBrowser):23    """Derived QTW class for the main text view."""24    def set_epub(self, the_epub):25        self.the_epub = the_epub26    def loadResource(self, restype, url):27        """Override to load images that are within the epub."""28        if restype == 2 and url.isRelative():29            if _debug:30                print("Image resource found: ", url.toDisplayString())31            # get file part of it. OR, load as path from zip?32            # for now, assume filename part of URL is the ID.33            imgHref = url.toDisplayString()34            if imgHref.startswith("../"):35                imgHref = imgHref[3:]36            image = self.the_epub.get_item_with_href(imgHref)37            # image = self.the_epub.get_item_with_id(url.fileName())38            if image:39                if _debug:40                    print("successfully loaded image of type", type(image))41                image = qtg.QImage.fromData(image.get_content())42                if image.width() > (self.width() * 0.8):43                    image = image.scaledToWidth(44                        self.width() * 0.8,45                        mode=qtc.Qt.TransformationMode.SmoothTransformation)46                # It accepts anything as the variant! Python!47                return image48            else:49                print("image load failed:", imgHref)50            # should we fetch external images? maybe not51        if _debug:52            print("loading non-image resource", url)53        super(EPubTextBrowser, self).loadResource(restype, url)54        55# Inheriting from QMainWindow broke the layouts.56# Should I make another class for the book itself?57class MainWindow(qtw.QMainWindow):58    """UI Class for the Studious epub reader"""59    SECTION = 0 # constants for the treeview60    HREF = 161   62    def __init__(self):63        """GUI Layout is built here."""64        # The book doesn't pass the class and object.65        super(MainWindow, self).__init__()66        # I won't call these if I set it up myself.67        # self.ui = Ui_MainWindow()68        # self.ui.setupUi(self)69        window = qtw.QWidget()70        self.setCentralWidget(window)71        72        self.setWindowTitle("Studious Reader")73        self.resize(960,600)74        ### Menu items75        openPixmap = getattr(qtw.QStyle, 'SP_DialogOpenButton')76        openIcon = self.style().standardIcon(openPixmap)77        openAction = qtw.QAction(openIcon, '&Open', self)78        openAction.setShortcut(qtg.QKeySequence("Ctrl+o"))79        openAction.triggered.connect(self.open_new_file)80        menuBar = qtw.QMenuBar(self)81        fileMenu = menuBar.addMenu("&File")82        fileMenu.addAction(openAction)83        self.setMenuBar(menuBar)84        ### The tricky layout to get the panes to work correctly.85        topLayout = qtw.QHBoxLayout()86        window.setLayout(topLayout)87        88        leftLayout = qtw.QVBoxLayout()89        topLayout.addLayout(leftLayout)90        leftSplitter = qtw.QSplitter(self) # or add to top hlayout?91        leftLayout.addWidget(leftSplitter)92        self.tocPane = qtw.QTreeWidget(self)93        self.tocPane.setColumnCount(2)94        self.tocPane.setHeaderLabels(["Section", "Link"])95        if not _debug:96            self.tocPane.hideColumn(1)97        self.tocPane.itemClicked.connect(self.jump_to_tocitem)98        #leftLayout.addWidget(self.tocPane)99        leftSplitter.addWidget(self.tocPane)100        ## 1. use a new hbox layout to have no splitter on the right101        rightFrame = qtw.QFrame(self)102        ## these didn't make any difference at all.103        # rightFrame.setFrameStyle(qtw.QFrame.NoFrame)104        # rightFrame.setContentsMargins(qtc.QMargins(0,0,0,0))105        leftSplitter.addWidget(rightFrame)106        rightHLayout = qtw.QHBoxLayout()107        rightFrame.setLayout(rightHLayout)108        ## 2. sub-splitter on the right109        # rightSplitter = qtw.QSplitter(self)110        # leftSplitter.addWidget(rightSplitter)111        ## 2a. to have the right splitter be separate112        # topLayout.addWidget(rightSplitter)113        ## 3. just 3 vboxes, no splitters114        # centerLayout = qtw.QVBoxLayout()115        # topLayout.addLayout(centerLayout)116        self.mainText = EPubTextBrowser(self) #qtw.QTextBrowser(self)117        # this isn't doing anything, is it reading the css instead?118        #self.mainText.style = """119        #  <style>body{ margin-left: 60px; margin-right: 60px; line-height: 130% }</style>120        #"""121        self.mainText.document().setDefaultStyleSheet(122            'body{ margin-left: 20px; margin-right: 20px; line-height: 110% }')123        mainText_font = qtg.QFont('Liberation Serif', 12)124        mainText_font.setStyleHint(qtg.QFont.Serif)125        self.mainText.setFont(mainText_font)126        self.mainText.setOpenLinks(False)127        self.mainText.anchorClicked.connect(self.jump_to_qurl)128        self.mainText.cursorPositionChanged.connect(self.update_location)129        rightHLayout.addWidget(self.mainText) # 1130        # rightSplitter.addWidget(self.mainText) # 2131        # centerLayout.addWidget(self.mainText) # 3132        # horizontal and vertical is flipped from what I thought.133        self.mainText.setFixedWidth(500)134        # this has no effect if fixedwidth is set.135        # and it doesn't stick to preferred if there's a splitter.136        self.mainText.setSizePolicy(qtw.QSizePolicy.Maximum,137                                     qtw.QSizePolicy.Preferred)138        # will need this vboxlayout if we add something below the notes.139        # rightLayout = qtw.QVBoxLayout()140        self.notesFrame = qtw.QTextEdit(self)141        # topLayout.addLayout(rightLayout)142        # rightHLayout.addLayout(rightLayout)   # 1143        rightHLayout.addWidget(self.notesFrame) # 1 no layout144        # rightSplitter.addWidget(self.notesFrame) # 2 145        # rightLayout.addWidget(self.notesFrame) # 3146        self.show()147    def open_new_file(self):148        filePath, _ = qtw.QFileDialog.getOpenFileName(149            self, caption="Load new ebook", filter="EPub files (*.epub)")150        self.load_epub(filePath)151        152    def update_location(self):153        print("Cursor position:", self.mainText.textCursor().position())154    #def create_toc_model(self, parent):155    #    model = qtg.QStandardItemModel(0, 1, parent)156    #    # can I just define constants like this?157    #    model.setHeaderData(self.SECTION, qtc.Qt.Horizontal, "Section")158    #    return model159    def jump_to(self, urlStr):160        """Jump to an internal link. May refer to a separate page in the 161        original epub, or to an anchor."""162        if _debug:163            print("Jumping to", urlStr)164        splitUrl = urlStr.split('#')165        href = splitUrl[0]166        # sectionText = self.the_epub.get_item_with_href(href).get_content()167        # self.mainText.setHtml(sectionText.decode('utf-8'))168        if len(splitUrl) > 1:169            self.mainText.scrollToAnchor(splitUrl[1])170            if _debug:171                print("ANCHORJUMP", splitUrl[1])172        else:173            if _debug:174                print("URLJUMP:", urlStr)175            self.mainText.scrollToAnchor(urlStr)176        # Update cursor position by moving the cursor to where we are177        # and getting its location.178        #  or should I do that only when they click?179        # TODO: move TOC highlight to wherever I jumped to180        #  (probably with a trigger for the scroll event, because181        #   it should work for scrolling too)182        browserRect = self.mainText.rect()183        newCursor = self.mainText.cursorForPosition(browserRect.topLeft())184        #newcursor = self.mainText.cursorForPosition(qtc.QPoint(0,0))185        self.mainText.setTextCursor(newCursor)186        if _debug:187            print("new cursor rect position:", self.mainText.cursorRect())188            print("Cursor position:", self.mainText.textCursor().position())189    190    def jump_to_tocitem(self, item):191        """Jump for a click in the Contents pane."""192        self.jump_to(item.text(1))193    def jump_to_qurl(self, url):194        # TODO: popup asking if want to open in browser195        if url.isRelative():196            self.jump_to(url.toString())197        198    def process_toc(self, toc_node, treenode):199        """ Map the epub ToC structure to a Qt tree view."""200        # rowCount = 0 # rows at each level201        # store and return hrefs for the correct ordering202        # hrefs = []203        filename_anchors = False204        for toc_entry in toc_node:205            if _debug:206                print(type(toc_entry)) # epub.Link or tuple207            if hasattr(toc_entry, 'title'):208                if _debug:209                    print(toc_entry.__dict__)210                newRow = qtw.QTreeWidgetItem(treenode)211                newRow.setText(self.SECTION, toc_entry.title)212                entry_href = toc_entry.href213                if entry_href.startswith('xhtml/'):214                    entry_href = entry_href[6:]215                newRow.setText(self.HREF, entry_href)216                if len(toc_entry.href.split('#')) < 2:217                    filename_anchors = True218                #newRow.setExpanded(True)219                ## leaving this here in case we ever need a custom model?220                #self.tocModel.insertRow(rowCount)221                #self.tocModel.setData(222                #    self.tocModel.index(level, rowCount), # self.SECTION),223                #    toc_entry.title)224                #rowCount += 1225            else: # it's a pair of the top level and sub-entries, so recurse226                newRow = qtw.QTreeWidgetItem(treenode)227                if _debug:228                    print("tuple[0] is", type(toc_entry[0]))229                    print(toc_entry[0].__dict__)230                newRow.setText(self.SECTION, toc_entry[0].title)231                entry_href = toc_entry[0].href232                if entry_href.startswith('xhtml/'):233                    entry_href = entry_href[6:]234                newRow.setText(self.HREF, entry_href)235                #newLevel = qtw.QTreeWidgetItem(treenode)236                # hrefs += self.process_toc(toc_entry[1], newRow) # newLevel237                filename_anchors |= self.process_toc(toc_entry[1], newRow)238                newRow.setExpanded(True)239        return filename_anchors240    def load_notes(self, bookfilename, author_last):241        '''Load the notes file corresponding to a book's hash into the242        notes pane.'''243        hasher = hashlib.sha256()244        bookfile = open(bookfilename, 'rb')245        hasher.update(bookfile.read())246        bookfile.close()247        b32str = cb32encode(hasher.digest()[:10])248        print("hash encode: ", author_last[:3] + '_' + b32str)249    250    251    def load_epub(self, filename):252        '''Load epub content, and also call out to generate the TOC and253        load the notes file.'''254        the_epub = epub.read_epub(filename)255        self.mainText.set_epub(the_epub) # have to set early256        #for k,v in the_epub.__dict__.items():257        #    print(k, ':', v)258            259        #doc_items = the_epub.get_items_of_type(ebooklib.ITEM_DOCUMENT)260        #doc_list = list(doc_items)261        all_items = the_epub.get_items()262        #for item in list(all_items):263            # will I have to make my own dictionary of these?264            # print("ITEM", item.file_name, item.get_id(), item.get_type())265        if _debug:266            print(the_epub.spine)267        self.tocPane.clear()268        filename_anchors = self.process_toc(the_epub.toc, self.tocPane)269        if filename_anchors:270            print("epub has toc links with filename only")271        # suppress "html" namespace prefix.272        ETree.register_namespace('', 'http://www.w3.org/1999/xhtml')273        # TODO: loop to get the first item that's linear.274        first_item = the_epub.get_item_with_id(the_epub.spine[0][0])275        # merge all the HTML file bodies into one.276        first_text = first_item.get_content().decode('utf-8')277        doc_tree = ETree.fromstring(first_text)278        doc_body = doc_tree.find('{http://www.w3.org/1999/xhtml}body')279        for uid, linear in the_epub.spine: # [1:] # file_hrefs[1:]:280            # TODO: if it's not linear, put it at the end.281            if _debug:282                print("LOADITEM:", uid)283            the_item = the_epub.get_item_with_id(uid)284            text = the_item.get_content().decode('utf-8')285            tree = ETree.fromstring(text)286            body = tree.find('{http://www.w3.org/1999/xhtml}body')287            # body.insert (anchor element, 0)?288            # create div elements with the name corresponding to the file289            if filename_anchors: 290                # tried 'name' and 'id'291                item_name = the_item.get_name()292                if item_name.startswith('xhtml/'):293                    item_name = item_name[6:]294                toc_div = ETree.Element('div', {'id': item_name})295                if _debug:296                    print("ANCHOR ADDED:", list(toc_div.items()))297                for child in body:298                    # why is 0 okay?299                    #toc_div.insert(0, child)300                    toc_div.append(child)301                    # print("CHILD ADDED")302                doc_body.append(toc_div)303            else:304                for child in body:305                    doc_body.append(child)306        fulltext = ETree.tostring(doc_tree, encoding='unicode')307        self.mainText.setHtml(fulltext)308        if _dumpHTML:309            print(fulltext)310        # TODO: have spinny until finished loading, so it won't be311        #  unresponsive (see the Bible)312        # this lies, it says before finished loading images313        print("load finished.") 314        author_full = the_epub.get_metadata("http://purl.org/dc/elements/1.1/", "creator")[0][0]315        author_last = author_full.split()[-1].lower()316        print(author_last)317        self.load_notes(filename, author_last)318# if __name__ == "__main__":319def main():320    app = qtw.QApplication(sys.argv)321    window = MainWindow()322    if len(sys.argv) > 1:323        bookFilename = sys.argv[1]324        window.load_epub(bookFilename)325    # this trick passes the app's exit code back to the OS....

BraveNewWordsScraper.py

Source:BraveNewWordsScraper.py

1#!/usr/bin/env python2# coding: utf-83# In[85]:4import requests5from requests_html import HTMLSession6import re7import csv8def findYear(text):9    yearPattern = re.compile("[1,2][0-9][0-9][0-9]")10    year = yearPattern.search(text)11    if year:12        return year.group()13    return None14def scraper():15    url_A = "http://www.oxfordreference.com/browse?btog=chap&page="16    url_B = "&pageSize=100&sort=titlesort&source=%2F10.1093%2Facref%2F9780195305678.001.0001%2Facref-9780195305678"17    base_selector = "#abstract_link"18    entry_dict = {}19    for i in range(1, 10):20        page_num = i21        url = url_A + str(page_num) + url_B22        base_page = HTMLSession().get(url)23        for j in range(1, 101):24            try:25                entry_num = j + 100*(i - 1)26                selector = base_selector + str(entry_num)27                entry = base_page.html.find(selector)[0]28                entry_name = entry.text29                entry_href = entry.attrs["href"]30                entry_url = "http://www.oxfordreference.com" + entry_href31                entry_page = HTMLSession().get(entry_url)32                sel = ".prosequoteType > p:nth-child(1)"33                try:34                    first_year = entry_page.html.find(sel)[0].text35                except Exception as e:36                    print(e)37                    print("On Entry #" + str(entry_num))38                    first_year = ""39                first_year = findYear(first_year)40                if first_year:41                    entry_dict[entry_name] = str(first_year)42                print("Entry #" + str(entry_num) + ": " + entry_name)43                print("\t year= " + str(first_year))44            except Exception as e:45                print(e)46                print("Exited inner loop at Entry #" + str(entry_num) + ", Page #" + str(page_num))47                print("Beginning Page #" + str(page_num + 1))48                break49    return entry_dict50data_dict = scraper()51with open('braveNewWords.csv', 'w', encoding="utf-8") as f:52    writer = csv.writer(f)53    for row in data_dict.items():...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.