Best Python code snippet using selene_python
read_data.py
Source: read_data.py
1import data_prep2def read_dataset(dataset_key):3 if dataset_key == "KDD_train+" or dataset_key == "KDD_test+":4 if dataset_key == "KDD_train+":5 file_path = "res/โmod_NSL/โKDDTrain+.txt"6 else:7 file_path = "res/โmod_NSL/โKDDTest+.txt"8 dataset = []9 attr_names = [10 'duration',11 'protocol_type',12 'service',13 'flag',14 'src_bytes',15 'dst_bytes',16 'land',17 'wrong_fragment',18 'urgent',19 'hot',20 'num_failed_logins',21 'logged_in',22 'num_compromised',23 'root_shell',24 'su_attempted',25 'num_root',26 'num_file_creations',27 'num_shells',28 'num_access_files',29 'num_outbound_cmds',30 'is_host_login',31 'is_guest_login',32 'count',33 'srv_count',34 'serror_rate',35 'srv_serror_rate',36 'rerror_rate',37 'srv_rerror_rate',38 'same_srv_rate',39 'diff_srv_rate',40 'srv_diff_host_rate',41 'dst_host_count',42 'dst_host_srv_count',43 'dst_host_same_srv_rate',44 'dst_host_diff_srv_rate',45 'dst_host_same_src_port_rate',46 'dst_host_srv_diff_host_rate',47 'dst_host_serror_rate',48 'dst_host_srv_serror_rate',49 'dst_host_rerror_rate',50 'dst_host_srv_rerror_rate',51 'class'52 ]53 value_space_dict = {'protocol_type': ['tcp', 'udp', 'icmp'],54 'service': ['aol', 'auth', 'bgp', 'courier', 'csnet_ns', 'ctf', 'daytime', 'discard',55 'domain', 'domain_u', 'echo', 'eco_i', 'ecr_i', 'efs', 'exec', 'finger', 'ftp',56 'ftp_data', 'gopher', 'harvest', 'hostnames', 'http', 'http_2784', 'http_443',57 'http_8001', 'imap4', 'IRC', 'iso_tsap', 'klogin', 'kshell', 'ldap', 'link',58 'login', 'mtp', 'name', 'netbios_dgm', 'netbios_ns', 'netbios_ssn', 'netstat',59 'nnsp', 'nntp', 'ntp_u', 'other', 'pm_dump', 'pop_2', 'pop_3', 'printer',60 'private', 'red_i', 'remote_job', 'rje', 'shell', 'smtp', 'sql_net', 'ssh',61 'sunrpc', 'supdup', 'systat', 'telnet', 'tftp_u', 'tim_i', 'time', 'urh_i',62 'urp_i', 'uucp', 'uucp_path', 'vmnet', 'whois', 'X11', 'Z39_50'],63 'flag': ['OTH', 'REJ', 'RSTO', 'RSTOS0', 'RSTR', 'S0', 'S1', 'S2', 'S3', 'SF', 'SH'],64 'class': []}65 with open(file_path, 'r') as file:66 for line in file:67 attribute_values = line.split(',')68 # Remove "\n" at the last attribute69 attribute_values[len(attribute_values) - 1] = attribute_values[len(attribute_values) - 1][:len(attribute_values[len(attribute_values) - 1]) - 1]70 # Remove the last (weird) attribute in each row71 attribute_values.pop(len(attribute_values) - 1)72 attribute_values[0] = float(attribute_values[0])73 for index in range(4, 41):74 attribute_values[index] = float(attribute_values[index])75 data_row = dict(zip(attr_names, attribute_values))76 if data_row['class'] not in value_space_dict['class']:77 value_space_dict['class'].append(data_row['class'])78 dataset.append(data_row)79 return dataset, value_space_dict80 elif dataset_key == "iris_train" or dataset_key == "iris_test":81 if dataset_key == "iris_train":82 file_path = "res/โiris_data/โiris_train.data"83 else:84 file_path = "res/โiris_data/โiris_test.data"85 attr_names = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]86 dataset = []87 value_space_dict = {'class' : []}88 with open(file_path, 'r') as file:89 for line in file:90 if len(line) < 4:91 continue92 attribute_values = line.split(',')93 # Remove "\n" at the last attribute94 attribute_values[len(attribute_values) - 1] = attribute_values[len(attribute_values) - 1][:len(attribute_values[len(attribute_values) - 1]) - 1]95 for index in range(0, 4):96 attribute_values[index] = float(attribute_values[index])97 data_row = dict(zip(attr_names, attribute_values))98 if data_row['class'] not in value_space_dict['class']:99 value_space_dict['class'].append(data_row['class'])100 dataset.append(data_row)101 return dataset, value_space_dict102 elif dataset_key == "NB15_train" or dataset_key == "NB15_test":103 if dataset_key == "NB15_train":104 file_path = "res/โUNSW-NB15/โUNSW_NB15_training-set.csv"105 else:106 file_path = "res/โUNSW-NB15/โUNSW_NB15_testing-set.csv"107 attr_names = ["dur","proto","service","state","spkts","dpkts","sbytes","dbytes","rate","sttl","dttl","sload","dload","sloss","dloss","sinpkt","dinpkt","sjit","djit","swin","stcpb","dtcpb","dwin","tcprtt","synack","ackdat","smean","dmean","trans_depth","response_body_len","ct_srv_src","ct_state_ttl","ct_dst_ltm","ct_src_dport_ltm","ct_dst_sport_ltm","ct_dst_src_ltm","is_ftp_login","ct_ftp_cmd","ct_flw_http_mthd","ct_src_ltm","ct_srv_dst","is_sm_ips_ports","class"]108 dataset = []109 value_space_dict = {'class': ["Normal", "Intrusion"]}110 line_num = 0111 with open(file_path, 'r') as file:112 for line in file:113 if line_num == 0:114 line_num += 1115 continue116 if len(line) < 4:117 continue118 attribute_values = line.split(',')119 # Remove the "id" attribute and the attack_type120 attribute_values.pop(0)121 attribute_values.pop(len(attribute_values)-1)122 # Remove "\n" at the last attribute123 # attribute_values[len(attribute_values) - 1] = attribute_values[len(attribute_values) - 1][:len(attribute_values[len(attribute_values) - 1]) - 1]124 # Make all the numbers to floats125 attribute_values[0] = float(attribute_values[0])126 for index in range(4, len(attribute_values)-2):127 attribute_values[index] = float(attribute_values[index])128 data_row = dict(zip(attr_names, attribute_values))129 if data_row['class'] != "Normal":130 data_row['class'] = "Intrusion"131 dataset.append(data_row)132 return dataset, value_space_dict133def calculate_intervals(dataset_key, dataset, value_space_dict):134 if dataset_key == "KDD_train+" or dataset_key == "NB15_train":135 print("Interval calculations has begun.")136 float_attr = {}137 # Initiate lists for float attributes138 for attr in dataset[0]:139 if isinstance(dataset[0][attr], float):140 float_attr[attr] = []141 print("Floats initiated.")142 # Parse all float values to lists143 for data_row in dataset:144 for attr in float_attr:145 float_attr[attr].append([data_row[attr], data_row['class']])146 print("Floats parsed to lists")147 # Calculate the interval cuts148 interval_cuts = {}149 count_finished_attr = 0150 for attr in float_attr:151 interval_cuts[attr] = data_prep.mdlp(float_attr[attr])152 value_space_dict[attr] = interval_cuts[attr]153 count_finished_attr += 1154 print("Calculate intervals status: {} of {} attribute intervals created.".format(count_finished_attr, len(float_attr)))155 return interval_cuts156 else:157 print("Error: Unkown dataset key {} for interval calculation.".format(dataset_key))158def find_interval_value(data_value, interval_cuts):159 for cut in interval_cuts:160 if data_value <= cut:161 return str(cut)162 return str(-1)163def discretize_to_intervals(dataset, intervals):164 for data_row in dataset:165 for attr_name in intervals:...
alibaba_scrapper.py
Source: alibaba_scrapper.py
1import requests2from bs4 import BeautifulSoup3import bs44import re5import pandas as pd6def init_attribute_holders(attributes_for_selection, attribute_values):7 for attribute in attributes_for_selection:8 attribute_values[attribute] = []9def update_attributes(soup_element, attributes_for_selection, attribute_values):10 soup_attribute_name = soup_element.find('span')['title']11 for attribute_name in attributes_for_selection:12 if attribute_name == soup_attribute_name:13 soup_value = soup_element.find('div')['title']14 attribute_values[soup_attribute_name].append(soup_value)15 return soup_attribute_name16def fill_in_not_updated_attributes(not_updated_attributes, attribute_values):17 for attribute in not_updated_attributes:18 attribute_values[attribute].append(None)19def scrap_item(link, attributes_for_selection, attribute_values):20 page = requests.get(link)21 # check the page status; if success then it should be 20022 if page.status_code != 200:23 return24 soup = BeautifulSoup(page.content, 'html.parser')25 # this selector gets the tag where the main image is located26 main_image_element = soup.select_one('img#J-dcv-image-trigger')27 # to check if the image element exists'(some pages have video instead)28 if main_image_element is None:29 return30 attribute_values["image_link"].append(main_image_element["data-src"])31 # the following element contains product details; 0 element is the list of details32 details = soup.select_one('div.do-entry-list')33 not_updates_attributes = set(attributes_for_selection)34 for detail in details:35 # There are strings in the details list too, this condition prevents them from scrapping36 if isinstance(detail, bs4.element.Tag):37 updated_attribute = update_attributes(detail, attributes_for_selection, attribute_values)38 if updated_attribute in not_updates_attributes:39 not_updates_attributes.remove(updated_attribute)40 # to make the length of a list of values the same for a postprocessing step, not present values are updated as well41 fill_in_not_updated_attributes(not_updates_attributes, attribute_values)42 return43def get_next_page_link(soup, current_page_number, seller_main_page):44 pagination_list = soup.select_one('div.next-pagination-list')45 displayed_pages = pagination_list.select('a.next-pagination-item')46 for page in displayed_pages:47 # the next page number must be higher than the current.48 # since page number are shown in ascending order, this check will allow to select only the next page49 if int(page.text) > current_page_number:50 next_page_link = seller_main_page + page["href"]51 return next_page_link52def scrap_items(soup, attributes_for_selection, attribute_values, seller_main_page):53 product_list = soup.select_one('div.component-product-list')54 items = product_list.select('div.product-info')55 item_links = []56 for item in items:57 item_links.append(seller_main_page + item.select_one('a.title-link')['href'])58 for item_link in item_links:59 scrap_item(item_link, attributes_for_selection, attribute_values)60def scrap_items_page(item_list_link, seller_main_page, current_page_number, attributes_for_selection, attribute_values):61 page = requests.get(item_list_link)62 # check the page status; if success then it should be 20063 if page.status_code != 200:64 return65 soup = BeautifulSoup(page.content, 'html.parser')66 scrap_items(soup, attributes_for_selection, attribute_values, seller_main_page)67 next_page_link = get_next_page_link(soup, current_page_number, seller_main_page)68 return next_page_link69def scrap_init_seller_page(init_link, attributes_for_selection, attribute_values):70 page = requests.get(init_link)71 seller_main_page = re.search("https:/โ/โ[\w\.]+", init_link).group(0)72 # check the page status; if success then it should be 20073 if page.status_code != 200:74 return75 # the initial page is the first page with items76 next_seller_page = init_link77 # initial page is 178 current_page_number = 179 while next_seller_page is not None:80 next_seller_page = scrap_items_page(next_seller_page, seller_main_page, current_page_number,81 attributes_for_selection, attribute_values)82 current_page_number += 183def scrap_sellers(seller_links, tags_for_selection):84 # to prevent the image link to be scrapped as a text attribute, an additional holder for the link is created85 all_attributes = tags_for_selection + ["image_link"]86 attribute_values = {}87 init_attribute_holders(all_attributes, attribute_values)88 # tags for selection doesn't include image. Image is processed separately89 for link in seller_links:90 scrap_init_seller_page(link, tags_for_selection, attribute_values)...
Check out the latest blogs from LambdaTest on this topic:
There are times when developers get stuck with a problem that has to do with version changes. Trying to run the code or test without upgrading the package can result in unexpected errors.
Automating testing is a crucial step in the development pipeline of a software product. In an agile development environment, where there is continuous development, deployment, and maintenance of software products, automation testing ensures that the end software products delivered are error-free.
Web applications continue to evolve at an unbelievable pace, and the architecture surrounding web apps get more complicated all of the time. With the growth in complexity of the web application and the development process, web application testing also needs to keep pace with the ever-changing demands.
โTest frequently and early.โ If youโve been following my testing agenda, youโre probably sick of hearing me repeat that. However, it is making sense that if your tests detect an issue soon after it occurs, it will be easier to resolve. This is one of the guiding concepts that makes continuous integration such an effective method. Iโve encountered several teams who have a lot of automated tests but donโt use them as part of a continuous integration approach. There are frequently various reasons why the team believes these tests cannot be used with continuous integration. Perhaps the tests take too long to run, or they are not dependable enough to provide correct results on their own, necessitating human interpretation.
Hey LambdaTesters! Weโve got something special for you this week. ????
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!