Join the 1-day Testing & QA Summit featuring 15+ Expert Speakers.Register for FREE! Join TestMu Conference

How to use attribute_values method in Selene

Best Python code snippet using selene_python

read_data.py

Source:read_data.py

1import data_prep2def read_dataset(dataset_key):3    if dataset_key == "KDD_train+" or dataset_key == "KDD_test+":4        if dataset_key == "KDD_train+":5            file_path = "res/mod_NSL/KDDTrain+.txt"6        else:7            file_path = "res/mod_NSL/KDDTest+.txt"8        dataset = []9        attr_names = [10            'duration',11            'protocol_type',12            'service',13            'flag',14            'src_bytes',15            'dst_bytes',16            'land',17            'wrong_fragment',18            'urgent',19            'hot',20            'num_failed_logins',21            'logged_in',22            'num_compromised',23            'root_shell',24            'su_attempted',25            'num_root',26            'num_file_creations',27            'num_shells',28            'num_access_files',29            'num_outbound_cmds',30            'is_host_login',31            'is_guest_login',32            'count',33            'srv_count',34            'serror_rate',35            'srv_serror_rate',36            'rerror_rate',37            'srv_rerror_rate',38            'same_srv_rate',39            'diff_srv_rate',40            'srv_diff_host_rate',41            'dst_host_count',42            'dst_host_srv_count',43            'dst_host_same_srv_rate',44            'dst_host_diff_srv_rate',45            'dst_host_same_src_port_rate',46            'dst_host_srv_diff_host_rate',47            'dst_host_serror_rate',48            'dst_host_srv_serror_rate',49            'dst_host_rerror_rate',50            'dst_host_srv_rerror_rate',51            'class'52        ]53        value_space_dict = {'protocol_type': ['tcp', 'udp', 'icmp'],54                            'service': ['aol', 'auth', 'bgp', 'courier', 'csnet_ns', 'ctf', 'daytime', 'discard',55                                        'domain', 'domain_u', 'echo', 'eco_i', 'ecr_i', 'efs', 'exec', 'finger', 'ftp',56                                        'ftp_data', 'gopher', 'harvest', 'hostnames', 'http', 'http_2784', 'http_443',57                                        'http_8001', 'imap4', 'IRC', 'iso_tsap', 'klogin', 'kshell', 'ldap', 'link',58                                        'login', 'mtp', 'name', 'netbios_dgm', 'netbios_ns', 'netbios_ssn', 'netstat',59                                        'nnsp', 'nntp', 'ntp_u', 'other', 'pm_dump', 'pop_2', 'pop_3', 'printer',60                                        'private', 'red_i', 'remote_job', 'rje', 'shell', 'smtp', 'sql_net', 'ssh',61                                        'sunrpc', 'supdup', 'systat', 'telnet', 'tftp_u', 'tim_i', 'time', 'urh_i',62                                        'urp_i', 'uucp', 'uucp_path', 'vmnet', 'whois', 'X11', 'Z39_50'],63                            'flag': ['OTH', 'REJ', 'RSTO', 'RSTOS0', 'RSTR', 'S0', 'S1', 'S2', 'S3', 'SF', 'SH'],64                            'class': []}65        with open(file_path, 'r') as file:66            for line in file:67                attribute_values = line.split(',')68                #   Remove "\n" at the last attribute69                attribute_values[len(attribute_values) - 1] = attribute_values[len(attribute_values) - 1][:len(attribute_values[len(attribute_values) - 1]) - 1]70                #   Remove the last (weird) attribute in each row71                attribute_values.pop(len(attribute_values) - 1)72                attribute_values[0] = float(attribute_values[0])73                for index in range(4, 41):74                    attribute_values[index] = float(attribute_values[index])75                data_row = dict(zip(attr_names, attribute_values))76                if data_row['class'] not in value_space_dict['class']:77                    value_space_dict['class'].append(data_row['class'])78                dataset.append(data_row)79        return dataset, value_space_dict80    elif dataset_key == "iris_train" or dataset_key == "iris_test":81        if dataset_key == "iris_train":82            file_path = "res/iris_data/iris_train.data"83        else:84            file_path = "res/iris_data/iris_test.data"85        attr_names = ["sepal_length", "sepal_width", "petal_length", "petal_width", "class"]86        dataset = []87        value_space_dict = {'class' : []}88        with open(file_path, 'r') as file:89            for line in file:90                if len(line) < 4:91                    continue92                attribute_values = line.split(',')93                #   Remove "\n" at the last attribute94                attribute_values[len(attribute_values) - 1] = attribute_values[len(attribute_values) - 1][:len(attribute_values[len(attribute_values) - 1]) - 1]95                for index in range(0, 4):96                    attribute_values[index] = float(attribute_values[index])97                data_row = dict(zip(attr_names, attribute_values))98                if data_row['class'] not in value_space_dict['class']:99                    value_space_dict['class'].append(data_row['class'])100                dataset.append(data_row)101        return dataset, value_space_dict102    elif dataset_key == "NB15_train" or dataset_key == "NB15_test":103        if dataset_key == "NB15_train":104            file_path = "res/UNSW-NB15/UNSW_NB15_training-set.csv"105        else:106            file_path = "res/UNSW-NB15/UNSW_NB15_testing-set.csv"107        attr_names = ["dur","proto","service","state","spkts","dpkts","sbytes","dbytes","rate","sttl","dttl","sload","dload","sloss","dloss","sinpkt","dinpkt","sjit","djit","swin","stcpb","dtcpb","dwin","tcprtt","synack","ackdat","smean","dmean","trans_depth","response_body_len","ct_srv_src","ct_state_ttl","ct_dst_ltm","ct_src_dport_ltm","ct_dst_sport_ltm","ct_dst_src_ltm","is_ftp_login","ct_ftp_cmd","ct_flw_http_mthd","ct_src_ltm","ct_srv_dst","is_sm_ips_ports","class"]108        dataset = []109        value_space_dict = {'class': ["Normal", "Intrusion"]}110        line_num = 0111        with open(file_path, 'r') as file:112            for line in file:113                if line_num == 0:114                    line_num += 1115                    continue116                if len(line) < 4:117                    continue118                attribute_values = line.split(',')119                #   Remove the "id" attribute and the attack_type120                attribute_values.pop(0)121                attribute_values.pop(len(attribute_values)-1)122                #   Remove "\n" at the last attribute123                # attribute_values[len(attribute_values) - 1] = attribute_values[len(attribute_values) - 1][:len(attribute_values[len(attribute_values) - 1]) - 1]124                #   Make all the numbers to floats125                attribute_values[0] = float(attribute_values[0])126                for index in range(4, len(attribute_values)-2):127                    attribute_values[index] = float(attribute_values[index])128                data_row = dict(zip(attr_names, attribute_values))129                if data_row['class'] != "Normal":130                    data_row['class'] = "Intrusion"131                dataset.append(data_row)132        return dataset, value_space_dict133def calculate_intervals(dataset_key, dataset, value_space_dict):134    if dataset_key == "KDD_train+" or dataset_key == "NB15_train":135        print("Interval calculations has begun.")136        float_attr = {}137        #   Initiate lists for float attributes138        for attr in dataset[0]:139            if isinstance(dataset[0][attr], float):140                float_attr[attr] = []141        print("Floats initiated.")142        #   Parse all float values to lists143        for data_row in dataset:144            for attr in float_attr:145                float_attr[attr].append([data_row[attr], data_row['class']])146        print("Floats parsed to lists")147        #   Calculate the interval cuts148        interval_cuts = {}149        count_finished_attr = 0150        for attr in float_attr:151            interval_cuts[attr] = data_prep.mdlp(float_attr[attr])152            value_space_dict[attr] = interval_cuts[attr]153            count_finished_attr += 1154            print("Calculate intervals status: {} of {} attribute intervals created.".format(count_finished_attr, len(float_attr)))155        return interval_cuts156    else:157        print("Error: Unkown dataset key {} for interval calculation.".format(dataset_key))158def find_interval_value(data_value, interval_cuts):159    for cut in interval_cuts:160        if data_value <= cut:161            return str(cut)162    return str(-1)163def discretize_to_intervals(dataset, intervals):164    for data_row in dataset:165        for attr_name in intervals:...

alibaba_scrapper.py

Source:alibaba_scrapper.py

1import requests2from bs4 import BeautifulSoup3import bs44import re5import pandas as pd6def init_attribute_holders(attributes_for_selection, attribute_values):7    for attribute in attributes_for_selection:8        attribute_values[attribute] = []9def update_attributes(soup_element, attributes_for_selection, attribute_values):10    soup_attribute_name = soup_element.find('span')['title']11    for attribute_name in attributes_for_selection:12        if attribute_name == soup_attribute_name:13            soup_value = soup_element.find('div')['title']14            attribute_values[soup_attribute_name].append(soup_value)15            return soup_attribute_name16def fill_in_not_updated_attributes(not_updated_attributes, attribute_values):17    for attribute in not_updated_attributes:18        attribute_values[attribute].append(None)19def scrap_item(link, attributes_for_selection, attribute_values):20    page = requests.get(link)21    # check the page status; if success then it should be 20022    if page.status_code != 200:23        return24    soup = BeautifulSoup(page.content, 'html.parser')25    # this selector gets the tag where the main image is located26    main_image_element = soup.select_one('img#J-dcv-image-trigger')27    # to check if the image element exists'(some pages have video instead)28    if main_image_element is None:29        return30    attribute_values["image_link"].append(main_image_element["data-src"])31    # the following element contains product details; 0 element is the list of details32    details = soup.select_one('div.do-entry-list')33    not_updates_attributes = set(attributes_for_selection)34    for detail in details:35        # There are strings in the details list too, this condition prevents them from scrapping36        if isinstance(detail, bs4.element.Tag):37            updated_attribute = update_attributes(detail, attributes_for_selection, attribute_values)38            if updated_attribute in not_updates_attributes:39                not_updates_attributes.remove(updated_attribute)40    # to make the length of a list of values the same for a postprocessing step, not present values are updated as well41    fill_in_not_updated_attributes(not_updates_attributes, attribute_values)42    return43def get_next_page_link(soup, current_page_number, seller_main_page):44    pagination_list = soup.select_one('div.next-pagination-list')45    displayed_pages = pagination_list.select('a.next-pagination-item')46    for page in displayed_pages:47        # the next page number must be higher than the current.48        # since page number are shown in ascending order, this check will allow to select only the next page49        if int(page.text) > current_page_number:50            next_page_link = seller_main_page + page["href"]51            return next_page_link52def scrap_items(soup, attributes_for_selection, attribute_values, seller_main_page):53    product_list = soup.select_one('div.component-product-list')54    items = product_list.select('div.product-info')55    item_links = []56    for item in items:57        item_links.append(seller_main_page + item.select_one('a.title-link')['href'])58    for item_link in item_links:59        scrap_item(item_link, attributes_for_selection, attribute_values)60def scrap_items_page(item_list_link, seller_main_page, current_page_number, attributes_for_selection, attribute_values):61    page = requests.get(item_list_link)62    # check the page status; if success then it should be 20063    if page.status_code != 200:64        return65    soup = BeautifulSoup(page.content, 'html.parser')66    scrap_items(soup, attributes_for_selection, attribute_values, seller_main_page)67    next_page_link = get_next_page_link(soup, current_page_number, seller_main_page)68    return next_page_link69def scrap_init_seller_page(init_link, attributes_for_selection, attribute_values):70    page = requests.get(init_link)71    seller_main_page = re.search("https://[\w\.]+", init_link).group(0)72    # check the page status; if success then it should be 20073    if page.status_code != 200:74        return75    # the initial page is the first page with items76    next_seller_page = init_link77    # initial page is 178    current_page_number = 179    while next_seller_page is not None:80        next_seller_page = scrap_items_page(next_seller_page, seller_main_page, current_page_number,81                                            attributes_for_selection, attribute_values)82        current_page_number += 183def scrap_sellers(seller_links, tags_for_selection):84    # to prevent the image link to be scrapped as a text attribute, an additional holder for the link is created85    all_attributes = tags_for_selection + ["image_link"]86    attribute_values = {}87    init_attribute_holders(all_attributes, attribute_values)88    # tags for selection doesn't include image. Image is processed separately89    for link in seller_links:90        scrap_init_seller_page(link, tags_for_selection, attribute_values)...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.