Best Python code snippet using autotest_python
DBPIA_CRAWLER.py
Source:DBPIA_CRAWLER.py
1from time import sleep2import requests, re3from bs4 import BeautifulSoup4import random5from pymongo import MongoClient6'''7 @MongoDB ì ì URI / DBëª
8'''9client = MongoClient('mongodb://203.255.92.141:27017', authSource='admin')10db = client.DBPIA11pubDB = client.PUBLIC12Author = db.Author13Status = pubDB.DBPIA_CRAWLER14sl_start = 5.015'''16ëíí
ì´ë¸17'''18def isEnglishOrKorean(input_s):19 k_count = 020 e_count = 021 try:22 for c in input_s:23 if ord('ê°') <= ord(c) <= ord('í£'):24 k_count+=1 25 elif ord('a') <= ord(c.lower()) <= ord('z'):26 e_count+=127 return "k" if k_count>1 else "e"28 29 except TypeError as e:30 print(input_s)31 return "e"32def check_college(univ0):33 branch_set = ['ì±ê· ê´ëíêµ', 'ê±´êµëíêµ', 'íìëíêµ']34 univName = client['PUBLIC']['CollegeName']35 univ1 = re.sub("ì°ííë ¥ë¨|ë³ì","",univ0)36 univ2 = re.sub("ëíêµ","ëíêµ ",univ1)37 38 try:39 if univ0 == "":40 return univ041 if isEnglishOrKorean(univ0) == 'e':42 univ0 = univ0.upper()43 univ0 = univ0.replace('.', ',')44 univ = univ0.split(', ')45 else:46 univ = univ2.replace(",", "").split()47 univ = list(set(univ)) 48 49 for uni in univ:50 if uni in branch_set:51 if ("ERICA" or "ì리카") in univ0:52 univ[univ.index("íìëíêµ")] = "íìëíêµ(ERICAìº í¼ì¤)"53 elif ("ê¸ë¡ì»¬" or "GLOCAL") in univ0:54 if "ê±´êµëíêµ" in univ0:55 univ[univ.index("ê±´êµëíêµ")] = "ê±´êµëíêµ GLOCAL(ê¸ë¡ì»¬)ìº í¼ì¤"56 else :57 univ[univ.index("ì±ê· ê´ëíêµ")] = "ì±ê· ê´ëíêµ"58 59 elif "ìì°ê³¼íìº í¼ì¤" in univ0:60 univ[univ.index("ì±ê· ê´ëíêµ")] = "ì±ê· ê´ëíêµ(ìì°ê³¼íìº í¼ì¤)"61 univs = '{"$or": ['62 for u in range(len(univ)):63 if univ[-1] == univ[u]:64 univs += '{"inputName": "' + univ[u] + '"}'65 else:66 univs += '{"inputName": "' + univ[u] + '"}, '67 univs += ']}'68 univ_query = univName.find_one(eval(univs))69 if univ_query is None:70 return univ071 else:72 return univ_query['originalName']73 74 except SyntaxError as e:75 return univ076'''77 @DBPIA ID를 ì´ì©í´ì ìì ìì§ Crawling ê°ë° (BeautifulSoup íì©) 78'''79while True:80 soup = ""81 i = ""82 try:83 for doc in Author.find({"hasInst" : False}).batch_size(1):84 print("DBPIA Inst. Crawler :", doc['name'], doc['_id'])85 i = int( doc['_id'])86 #url ë³ë ëë©´ í´ë¹ ë¶ë¶ ìì íì 87 url = 'https://www.dbpia.co.kr/author/authorDetail?ancId={}'.format(i)88 conn = requests.get(url, timeout=60).text89 soup = BeautifulSoup(conn, 'html.parser')90 #Parsing íê·¸ ì ë³´ (ë³ê²½ ì ìì íì)91 division_extract = soup.select('dd')92 name_extract = soup.select('h2')93 test_extract = name_extract[0].text.strip().split("\n")94 division = division_extract[3].text.strip()95 department = division_extract[4].text.strip()96 new_name = test_extract[0].strip()97 print(new_name)98 if 'ë
¼ë¬¸ì' == new_name:99 Author.delete_one({"_id": doc['_id']})100 Status.find_one_and_update({"_id":4865},{"$inc":{"total":-1}})101 continue 102 103 if division == '-' and department == '-':104 inst = ''105 106 elif department=='-':107 department=''108 inst = division + department109 110 else:111 inst = division + ' ' + department112 if len(test_extract) < 8:113 if len(test_extract) == 3:114 papers_count = test_extract[0].strip("ë
¼ë¬¸ì ")115 used_count = test_extract[2].strip("ì´ì©ì ")116 else:117 papers_count = test_extract[3].strip("ë
¼ë¬¸ì ")118 used_count = test_extract[5].strip("ì´ì©ì ")119 citation_count = 0120 else:121 citation_count = test_extract[7].strip("í¼ì¸ì©ì ")122 original_inst = check_college(inst)123 Author.update_one({"_id":doc['_id']},{'$set':{"plusName" :new_name, "inst": inst , "hasInst" : True , "papers_count" : papers_count.strip() , "used_count": used_count.strip(),'citation_count': citation_count, 'originalName': original_inst}})124 Status.find_one_and_update({"_id":4865},{"$inc":{"crawled":1}})125 requests.session().close()126 print("DBPIA Inst. Crawler :", doc['name'], ", Crawled, [inst , (paper, used, citation count)] : {} , ({}, {}, {})".format(inst.strip(), papers_count.strip(), used_count.strip(), citation_count))127 sleep(random.uniform(sl_start, sl_start + 5.0))128 Status.find_one_and_update({"_id":4865},{"$set":{"status":0}})129 print("Inst All Crawled")130 break131 except Exception as e :132 sl_start = sl_start * 1.1133 134 try :135 if 'ë¶í¸' in soup.select('.tit')[0].text:136 Author.update_one({"_id":doc['_id']},{'$set':{"plusName" :'', "inst": '' , "hasInst" : True , "papers_count" : 0 , "used_count": 0,'citation_count': 0}})137 Status.find_one_and_update({"_id":4865},{"$inc":{"crawled":1}})138 print('error no id')139 except Exception as e : 140 Author.delete_one({"_id": doc['_id']})141 Status.find_one_and_update({"_id":4865},{"$inc":{"total":-1}})142 print('Another Error', e)143 print("Inst ìì¸", e)144 print("Sleep Time Increased by ", sl_start)...
extraction.py
Source:extraction.py
1# Packet creation / manipulation2from scapy.all import *3# Turn payload into byte4import binascii5# Hash equivalent of key6import hashlib7test_extract = ""8for i in steganograms:9 temp_bytes = binascii.hexlify(bytes(i))10 payload_ctr = False11 for ctr in range(0, len(temp_bytes) - 2, 2):12 check_byte = temp_bytes[ctr:ctr+2]13 if check_byte == b'44' and temp_bytes[ctr+2:ctr+4] == b'04':14 if payload_ctr:15 temp_hex = temp_bytes[ctr + 6:ctr + 8]16 temp_bin = bin(int(temp_hex, 16))[2:]17 temp_bin = ("0" * (8 - len(temp_bin))) + temp_bin18 temp_bin = temp_bin[:4]19 test_extract += temp_bin20 print(temp_bin)21 else:22 payload_ctr = True23# Compare the binary payload and the binary extracted24print(payloadA, end="\n\n")25print(test_extract, end="\n\n")26print("Checker:" + payloadA == test_extract, end="\n\n")27# Turn the binary into bytes28extracted_payload = bytes(int(test_extract[i : i + 8], 2) for i in range(0, len(test_extract), 8))29# Compare the hash value of payload and extracted payload30print(payloadB.digest())31print(hashlib.sha256(extracted_payload).digest(), end="\n\n")32print("Checker:", payloadB.digest() == hashlib.sha256(extracted_payload).digest(), end="\n\n")33# Comparing the original key and extracted payload34print(key)35print(bytes(extracted_payload))...
test_utils.py
Source:test_utils.py
...10 if os.path.exists('/tmp/test_download.zip'):11 shutil.rmtree('/tmp/test_download.zip')12 download_zip('/tmp/test_download.zip')13 self.assertTrue(os.path.exists('/tmp/test_download.zip'))14 def test_extract(self):15 if os.path.isdir('/tmp/test_extract'):16 shutil.rmtree('/tmp/test_extract')17 if os.path.exists('/tmp/test_download.zip'):18 os.remove('/tmp/test_download.zip')19 download_zip('/tmp/test_extract.zip')20 extract_zip('/tmp/test_extract.zip')...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!