Best Python code snippet using localstack_python
test_postagger.py
Source:test_postagger.py
1"""2A series of tests to check that pos tagging for each package occurs as expected3"""4from source.tag_pos import _pos_tag_sentence, _read_tag_map, map_results_to_universal_tags5import pytest6from source.tokenizer_functions import tokenize7LIST_PACKAGES = ['nltk', 'stanza', 'spacy', 'flair', 'article']8@pytest.fixture9def documents():10 documents_list = [11 "Thats an approach I had not considered . I'd end up polling from the Javascript until something changes , but that might work .",12 "URL decoding in Javascript",13 "How to register a JavaScript callback in a Java Applet ?",14 "I would not disagree with this .",15 ]16 return documents_list17@pytest.fixture18def documents_pos_tagging_article():19 documents_list = [20 "Using JavaScript's escape / unescape function is almost always the wrong thing , it is incompatible with URL-encoding or any other "21 "standard encoding on the web . Non-ASCII characters are treated unexpectedly as well as spaces , and older browsers don't necessarily"22 " have the same behaviour . As mentioned by roenving , the method you want is decodeURIComponent() . This is a newer addition which you "23 "won't find on IE 5.0 , so if you need to support that browser ( let's hope not , nowadays ! ) you'd need to implement the function yourself ."24 " And for non-ASCII characters that means you need to implement a UTF-8 encoder . Code is available if you need it ."25 ]26 return documents_list27@pytest.fixture28def documents_pos_tagging_article2():29 documents_list = [30 "Thats an approach I had not considered . I'd end up polling from the Javascript until something changes , but that might work ."31 ]32 return documents_list33def test_pos_tagging_article(documents_pos_tagging_article: list):34 """35 The idea is to confirm that the document that is tagged is the same as the original tokenized document36 """37 # tokens are the same38 for doc in documents_pos_tagging_article:39 assert [i[1] for i in _pos_tag_sentence('article', doc)][1] == '^'40def test_pos_tagging_article2(documents_pos_tagging_article2: list):41 """42 The idea is to confirm that the document that is tagged is the same as the original tokenized document43 """44 # tokens are the same45 for doc in documents_pos_tagging_article2:46 assert [i[1] for i in _pos_tag_sentence('article', doc)][2] == 'V'47def test_tokens_for_each_packages(documents: list):48 """49 The idea is to confirm that the document that is tagged is the same as the original tokenized document50 """51 # tokens are the same52 for doc in documents:53 flatten_list_tokens = [item for sublist in tokenize(doc) for item in sublist]54 for lib in LIST_PACKAGES:55 assert [i[0] for i in _pos_tag_sentence(lib, doc)] == flatten_list_tokens56def test_each_package_returns_same_number_results(documents: list):57 """58 Some packages work on batches and others on individual sentences, make sure the resulted tagging has the correct number59 of tokens per document and that the tokens are the same60 """61 # tokens are the same62 for doc in documents:63 tokens_ref = [i[0] for i in _pos_tag_sentence(LIST_PACKAGES[0], doc)]64 for lib in LIST_PACKAGES:65 assert tokens_ref == [i[0] for i in _pos_tag_sentence(lib, doc)]66 # same number of tokens67 for doc in documents:68 length_ref = len(_pos_tag_sentence(LIST_PACKAGES[0], doc))69 for lib in LIST_PACKAGES:70 assert length_ref == len(_pos_tag_sentence(lib, doc))71def test_each_token_has_a_tag(documents: list):72 """73 Check that each tag is a valid key in the tag_map74 """75 mappings = _read_tag_map()76 keys = list(mappings['UNIV'].keys()) + list(mappings['PTB-UNIV'].keys()) + list(mappings['ARTICLE-UNIV'].keys())77 for doc in documents:78 for lib in LIST_PACKAGES:79 assert all(item in keys for item in [i[1] for i in _pos_tag_sentence(lib, doc)]) == True80def test_each_token_has_a_mapped_correct_tag(documents: list):81 """82 Check that each mapped tag is a valid value83 """84 mappings = _read_tag_map()85 values = list(mappings['UNIV'].values()) + list(mappings['PTB-UNIV'].values()) + list(86 mappings['ARTICLE-UNIV'].values())87 for doc in documents:88 for lib in LIST_PACKAGES:89 assert all(90 item in values for item in...
test_tag_statistics.py
Source:test_tag_statistics.py
1"""2Sanity checks that the analysis produce valid results3"""4import pytest5from source.tag_pos import _pos_tag_sentence, map_results_to_universal_tags6from source.tag_statistics import *7LIST_PACKAGES = ['nltk', 'stanza', 'spacy', 'flair', 'article']8@pytest.fixture9def documents():10 documents_list = [11 "Thats an approach I had not considered . I'd end up polling from the Javascript until something changes , but that might work .",12 "URL decoding in Javascript",13 "How to register a JavaScript callback in a Java Applet ?",14 "I would not disagree with this .",15 ]16 return documents_list17def test_nb_votes(documents: list):18 """we check that we have the right number of votes for a given token19 """20 for doc in documents:21 iterables = [map_results_to_universal_tags(_pos_tag_sentence(lib, doc), lib) for lib in LIST_PACKAGES]22 for list_token_tags in zip(*iterables):23 assert len([i[1] for i in list_token_tags]) == len(LIST_PACKAGES)24 assert len(list(set([i[0] for i in list_token_tags]))) == 125def test_token_majority(documents: list):26 """we check that we have the token with the majority votes is indeed the one having most votes, and we check how many votes he gets27 """28 for doc in documents:29 iterables = [map_results_to_universal_tags(_pos_tag_sentence(lib, doc), lib) for lib in LIST_PACKAGES]30 for list_token_tags in zip(*iterables):31 number_votes_majority_token = return_number_votes_majority_token(list_token_tags)32 assert number_votes_majority_token <= len(list_token_tags)33 assert number_votes_majority_token >= 134 assert number_votes_majority_token == max([list_token_tags.count(i) for i in list_token_tags])35def test_unique_tokens_voted(documents: list):36 """we check that the number of unique tokens voted is the right number37 """38 for doc in documents:39 iterables = [map_results_to_universal_tags(_pos_tag_sentence(lib, doc), lib) for lib in LIST_PACKAGES]40 for list_token_tags in zip(*iterables):41 nb_unique_tokens = return_unique_tokens(list_token_tags)42 number_votes_majority_token = return_number_votes_majority_token(list_token_tags)43 assert nb_unique_tokens <= len(list_token_tags)44 assert nb_unique_tokens >= 145 assert nb_unique_tokens <= len(list_token_tags) - number_votes_majority_token + 146def test_wether_majority_token_equals_gt(documents: list):47 """we check whether the comparison between majority and GT is correct48 """49 for doc in documents:50 iterables = [map_results_to_universal_tags(_pos_tag_sentence(lib, doc), lib) for lib in LIST_PACKAGES]51 for list_token_tags in zip(*iterables):52 majority_token = return_majority_token(list_token_tags)53 bool = return_wether_majority_token_equals_gt(list_token_tags)54 assert bool in [True, False]...
Scraper.py
Source:Scraper.py
1import requests2import re3from bs4 import BeautifulSoup4def scrap_n_top_packages_per_day(n):5 response = requests.get("https://pypistats.org/top")6 soup = BeautifulSoup(response.content,'html.parser')7 all_a = []8 for el in soup.find_all('a'):9 all_a.append(el.text)10 start = 411 end = start + n12 top_n_packages_per_day = all_a[start:end]13 return top_n_packages_per_day14list_packages = scrap_n_top_packages_per_day(n=10)15print(list_packages)16print('')17def scrap_requirements_for_package_list(list_packages):18 github_adresses = []19 for package_name in list_packages:20 response = requests.get("https://pypi.org/project/" + str(package_name))21 soup = BeautifulSoup(response.content,'html.parser')22 for el in soup.find_all('a',href=True):23 if 'github' in el['href'] and el['href'].endswith(str(package_name)):24 github_adresses.append(el['href'])25 break26 return github_adresses27ga = scrap_requirements_for_package_list(list_packages)28print(ga)29print('')30def finding_requirement_address_for_packages(list_packages, github_adresses):31 requirements = {}32 for index,adresses in enumerate(github_adresses):33 response = requests.get(str(adresses))34 soup = BeautifulSoup(response.content,'html.parser')35 for el in soup.find_all('a',href=True):36 if 'requirements' in el['href']:37 requirements[list_packages[index]] = (el['href'])38 break39 return requirements40requirement_address_dict = finding_requirement_address_for_packages(list_packages, ga)41print(requirement_address_dict)42print('')43def get_requirements_for_package(requirement_address_dict):44 requirements = {}45 for package_name, adresses in requirement_address_dict.items():46 response = requests.get('https://github.com/' + str(adresses))47 soup = BeautifulSoup(response.content,'html.parser')48 requirements_package=[]49 for el in soup.find_all('tr'):50 requirements_package.append(el.text.split("=")[0].strip().rstrip('>'))51 requirements[package_name] = requirements_package52 return requirements53requirements_dict = get_requirements_for_package(requirement_address_dict)54def plot_requirements_Graph_for_package(requirements_dict):55 for package_name, requirement_list in requirements_dict.items():56 print(f'{package_name}')57 for el in requirement_list:58 print(f'âââ {el}' )59 print('')60 return None...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!