Best Python code snippet using autotest_python
web_scraping.py
Source:web_scraping.py
1# encoding: utf-82import logging3import argparse4import threading5from queue import Queue6import time7import sys8import re9import urllib10from connections import mysql_database, firefox11from constants import element_selectors as select#, filter_words12def find_element(obj, find_by, selector):13 if find_by == 'css':14 return obj.find_elements_by_css_selector(selector)15 elif find_by == 'css singular':16 return obj.find_element_by_css_selector(selector)17 elif find_by == 'partial link':18 return obj.find_elements_by_partial_link_text(selector)19def get_page(search_term, queue):20 get_success = False21 get_failure = 022 try:23 search_term = search_term.encode('utf-8')24 except UnicodeError:25 sys.exit('cannot encode search term to utf-8.')26 quoted_search_term = urllib.parse.quote(search_term)27 while get_success is False and get_failure < 3:28 try:29 driver = firefox.start()30 logging.info('\ngetting the page')31 driver.get('http://s.weibo.com/weibo/{}&Refer=STopic_box'.format(quoted_search_term))32 get_success = True33 except Exception as e:34 logging.info('\nAn error occured trying to visit the page {}'.format(e))35 get_success = False36 get_failure += 137 driver.quit()38 time.sleep(5)39 else:40 get_success, get_failure = expand_content(driver, get_success, get_failure)41 if get_success is True:42 data = scrape(driver)43 queue.put(data)44 logging.info('\nscraping done')45 return46def expand_content(driver, get_success, get_failure):47 expand_success = False48 expand_failure = 049 while expand_success is False and expand_failure < 3:50 try:51 expand = find_element(driver, 'partial link', select.EXPAND_TEXT)52 clicked = 053 logging.info('\nexpanding elements')54 for ele in expand:55 ele.click()56 ## allow time for the click to complete57 time.sleep(5)58 clicked += 159 except Exception as e:60 logging.info('\nAn error occured trying to expand element {}'.format(e))61 expand_success = False62 expand_failure += 163 time.sleep(5)64 else:65 collapse = find_element(driver, 'partial link', select.COLLAPSE_TEXT)66 ## if not aligning, restart the driver and revisit the page again67 if clicked != len(collapse):68 logging.info('\nno. of expand and collapse do not match. Start again...')69 get_success = False70 get_failure += 171 driver.quit()72 time.sleep(10)73 break74 else:75 expand_success = True76 return get_success, get_failure77def scrape(driver):78 full_content_div = find_element(driver, 'css', select.FULL_CONTENT)79 content = []80 url = []81 mid = []82 for ele in full_content_div:83 ## filter posts that contain media, retweet, and links84 if (not find_element(ele, 'css', select.MEDIA)) and \85 (not find_element(ele, 'css', select.REBLOG)) and \86 (not find_element(ele, 'css', select.REBLOG_2)) and \87 (not find_element(ele, 'partial link', select.LINK)) and \88 (not find_element(ele, 'css', select.VIDEO)):89 ## filter short posts that are blank or contain certain keywords90 if (find_element(ele, 'css', select.CONTENT)) and \91 (find_element(ele, 'css singular', select.CONTENT).text != ''): #and \92 #(not(any(word in find_element(ele, 'css singular', select.CONTENT).text for word in filter_words.words))):93 content.append(find_element(ele, 'css singular', select.CONTENT).text)94 mid.append(ele.get_attribute("mid"))95 if find_element(ele, 'css', select.URL):96 url.append(find_element(ele, 'css singular', select.URL))97 ## filter long posts that are blank or contain certain keywords98 if (find_element(ele, 'css', select.EXPANDED_CONTENT)) and \99 (find_element(ele, 'css singular', select.EXPANDED_CONTENT).text != ''): #and \100 #(not(any(word in find_element(ele, 'css singular', select.EXPANDED_CONTENT).text for word in filter_words.words))):101 content.append(find_element(ele, 'css singular', select.EXPANDED_CONTENT).text)102 mid.append(ele.get_attribute("mid"))103 if find_element(ele, 'css', select.URL):104 url.append(find_element(ele, 'css singular', select.URL))105 url = [ele.get_attribute("href") for ele in url]106 if len(url) != len(content):107 logging.info('\nurl len:' + str(len(url)))108 logging.info('\ncontent len:' + str(len(content)))109 driver.quit()110 sys.exit('\nscraped content not aligning')111 logging.info('\nno. of posts scraped: {}'.format(str(len(content))))112 simplified_url = [u[0:u.find('?')] for u in url]113 uid = [u.split('/')[-2] for u in simplified_url]114 pid = [u.split('/')[-1] for u in simplified_url]115 cleaned_content = []116 EMOJI = re.compile('[\U00010000-\U0010ffff]', flags=re.UNICODE)117 for c in content:118 c = re.sub(r'\n+', ' ', c)119 c = re.sub(EMOJI, ' ', c)120 c= re.sub(',', 'ï¼', c)121 cleaned_content.append(c)122 driver.quit()123 data = {'content': cleaned_content, 'url': simplified_url, 'uid': uid, 'pid': pid, 'mid': mid}124 print(data)125 return data126def save_to_db(table, queue):127 saved = 0128 skipped = 0129 conn, cur = mysql_database.connect()130 while not queue.empty():131 cur.execute('SELECT url from {}'.format(table))132 ## turn tuple of tuples into list of strings133 exisiting_urls = [''.join(ele) for urls in list(cur.fetchall()) for ele in urls] 134 data = queue.get()135 for i in range(len(data["content"])):136 if data["url"][i] not in exisiting_urls:137 try:138 cur.execute('''INSERT INTO {} (Post_Id, Content, url, uid, pid, mid, pubdate, tested, test_date, status)139 VALUES (NULL, %s,%s, %s, %s, %s, CURDATE(), DEFAULT, DEFAULT, DEFAULT)'''.format(table), \140 (data["content"][i], data["url"][i], data["uid"][i], data["pid"][i], data["mid"][i]))141 logging.info('saved pid: {}'.format(data["pid"][i]))142 saved += 1143 cur.connection.commit()144 except Exception as e:145 logging.info('\nunable to insert pid {} into table. {}'.format(data["pid"][i], e))146 continue147 else:148 skipped += 1149 logging.info('\nsaved: {}; skipped: {}'.format(saved, skipped))150 cur.execute('SELECT COUNT(*) FROM {}'.format(table))151 no_of_rows = str(cur.fetchone()[0])152 logging.info('\nno. of rows in database: {}'.format(no_of_rows))153 mysql_database.disconnect(conn, cur)154 return155if __name__ == '__main__':156 logging.basicConfig(level=logging.INFO, format='%(asctime)s:%(threadName)s:%(message)s')157 parser = argparse.ArgumentParser()158 parser.add_argument('terms', nargs='+')159 parser.add_argument('table', nargs=1)160 args = vars(parser.parse_args())161 terms = args["terms"]162 table = args["table"][0]163 queue = Queue()164 try:165 threads = [threading.Thread(target=get_page, args=(terms[i], queue)) for i in range(len(terms))]166 except Exception as e:167 logging.info('\nunable to start scraping threads.')168 for thread in threads:169 thread.start()170 # wait for all threads to complete.171 for thread in threads:172 thread.join()173 try:174 threading.Thread(target=save_to_db, args=(table, queue)).start()175 except Exception as e:...
test_config_processing.py
Source:test_config_processing.py
...36def test_get_group():37 match = re.match('abc:\s*(1)', 'abc: 1')38 assert get_pass(match) == (None, None)39 assert get_success(match)40 assert not get_failure(match)41 match = re.match('abc: (foo)?(?P<success>1)', 'abc: 1')42 assert get_pass(match) == (None, None)43 assert get_success(match)44 assert get_failure(match) # faiure looks at group 245 assert regex_test('pass', match)46 assert regex_test('pass', match)[0] == 'success'47 match = re.match('abc: (foo)?(?P<failure>1)', 'abc: 1')48 assert get_pass(match) == (None, None)49 assert not get_success(match)50 assert get_failure(match)51def test_check_results():52 data = [(1, ('success', (0, 1)), 'bar', True)]53 assert check_result(data)54 data = [55 (1, ('success', (0, 1)), 'bar', True),56 (2, ('success', (1, 2)), 'bar', True)57 ]58 assert check_result(data)59 data = [60 (1, ('success', (0, 1)), 'bar', True),61 (2, ('failure', (1, 2)), 'bar', True)62 ]63 assert not check_result(data)64 data = [...
combination-of-fruits.py
Source:combination-of-fruits.py
1from typing import List2import unittest3def get_failure(group):4 failure = [0] * len(group)5 j = 06 for i in range(1, len(group)):7 fruit = group[i]8 while j > 0 and not (group[i] == group[j] or group[i] == "anything" or group[j] == "anything"):9 j = failure[j-1]10 if group[i] == group[j] or group[i] == "anything" or group[j] == "anything":11 j += 112 failure[i] = j13 return failure14def checkWinner(codeList, shoppingCart):15 failures = [get_failure(group) for group in codeList]16 # print(f"failures: {failures}")17 # failures = [[0, 1], [0, 1, 2]]18 group_idx = 019 fruit_idx = 020 group, failure = codeList[group_idx], failures[group_idx]21 for fruit in shoppingCart:22 # print()23 # print(f"fruit={fruit}")24 # print(f"group={group}, fruit_idx={fruit_idx}")25 while fruit_idx > 0 and not (fruit == group[fruit_idx] or group[fruit_idx] == "anything"):26 fruit_idx = failure[fruit_idx-1]27 # print(f"failure: fruit_idx <- {fruit_idx}")28 # print(f"group={group}, fruit_idx={fruit_idx}")29 if fruit == group[fruit_idx] or group[fruit_idx] == "anything":...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!