Best Python code snippet using playwright-python
table_preprocess.py
Source:table_preprocess.py
1#!/usr/bin/env python2# -*- coding:utf-8 -*-3"""4 èæ¬å: è¡¨æ ¼é¢å¤ç5Created on 2018-06-136@author:David Yisun7@group:data8"""9import os10from bs4 import BeautifulSoup11import codecs12import re13import itertools14import numpy as np15# 读å
¥htmls 以åå
¸å½¢å¼ä¿å16def read_html():17 classify = ['é大åå', 'å¢åæ', 'å®å¢']18 file_list = []19 for _classify in classify:20 path = './data/round2_adjust/{0}/html/'.format(_classify)21 files_name = os.listdir(path)22 file_list = [{'file_name': i, 'path': path + i, 'classify': _classify} for i in files_name] + file_list23 html_dict = {}24 text_dict = {}25 for i, _file in enumerate(file_list):26 with codecs.open(_file['path'], 'r', 'utf8') as f:27 data = f.read()28 print('read {0}'.format(_file['file_name']))29 # å»ææ¢è¡ç¬¦30 data = re.sub(re.compile('>\n* *<'), '><', data)31 data = re.sub(re.compile('\n'), '', data)32 _html = BeautifulSoup(data, 'lxml', from_encoding='utf-8')33 html_dict[_file['file_name']] = {'classify': _file['classify'], 'h': _html}34 text_dict[_file['file_name']] = {'classify': _file['classify'], 't': data}35 return html_dict36"""37è¾åºæ ·ä¾: html_dict['100103.html']['h']38 { '100103.html':{'classify': 'å®å¢',39 'h': <html><head></head><body><div title="è¾½å®æâ¦â¦â¦â¦â¦â¦}40 } 41"""42def get_all_tables():43 # --- è·åææè¡¨æ ¼è¿è¡åæ ---44 tables_tag = []45 html_dict = read_html()46 for index in html_dict:47 print(index)48 t = html_dict[index]49 # å å»ä¸å«text ç"tr" "tbody"50 # --- tr51 m = t['h'].find_all('tr')52 for j in m:53 if j.find_all(text=True) == []:54 j.decompose()55 # --- tbody56 m = t['h'].find_all('tbody')57 for j in m:58 if j.find_all(text=True) == []:59 j.decompose()60 _tables_tag = t['h'].find_all('tbody')61 # ä¸å«è¡¨æ ¼çå
¬å滤è¿62 if _tables_tag == []:63 continue64 # è¿æ»¤ç©ºè¡¨æ ¼65 tables_tag = tables_tag+list(itertools.zip_longest([index], _tables_tag, fillvalue=index))66 # --- æ£é¤ ãéä¹ã67 tables_tag_new = []68 for i, t in enumerate(tables_tag):69 text = t[0]70 annotation = t[1].find_all(text=re.compile('^ *æ *$'))71 if len(annotation) > 10:72 continue73 tables_tag_new.append(t)74 return tables_tag_new75def text_type(s):76 """77 å¤æåå
æ ¼æ°æ®ç±»å78 :param s:79 :return:80 """81 res = 'string'82 return res83 84def td_processing(td):85 """86 åå
æ ¼ä¿¡æ¯87 :param td: tag åå
æ ¼88 :return: åå
¸å
æ¬ä»¥ä¸ï¼89 å
容 ç±»å è·¨å è·¨è¡90 """91 td_rowspan = 192 td_colspan = 193 if td.has_attr('rowspan'):94 td_rowspan = int(td['rowspan'])95 if td.has_attr('colspan'):96 td_colspan = int(td['colspan'])97 # -å¡«å
空åå
æ ¼98 if td.text == '':99 for t_child in td.stripped_strings:100 t_child.replace_with('---')101 print(text_type(td.text))102 td_type = np.array([[text_type(td.text)] * td_colspan] * td_rowspan).reshape(td_rowspan, td_colspan)103 td_content = np.array([[td.text] * td_colspan] * td_rowspan).reshape(td_rowspan, td_colspan)104 res = {'td_content': td_content,105 'td_type': td_type,106 'td_colspan': td_colspan,107 'td_rowspan': td_rowspan}108 return res109def tr_processing(tr):110 """111 è¡ä¿¡æ¯112 :param tr: tag è¡113 :return: åå
¸114 """115 print('good boy')116 tr_most_rowspan = 1 # æ大跨è¡æ°117 tr_most_colspan = 1 # æ大跨åæ°118 tds = tr.find_all('td')119 n_tds = len(tds) # è¡æå«åå
æ ¼æ°120 tr_content = [] # è¡ååå
å
容121 tr_type = [] # è¡ååå
æ ¼æ°æ®ç±»å122 tr_cols = 0 # è¡é¿åº¦123 count_tr_colspan = 0 # colspan大äº1çtd个æ°ç»è®¡124 for td in tds:125 data = td_processing(td)126 if data['td_colspan'] > tr_most_colspan:127 tr_most_colspan = data['td_colspan']128 if data['td_rowspan'] > tr_most_rowspan:129 tr_most_rowspan = data['td_rowspan']130 tr_content.append(data['td_content'])131 tr_type.append(data['td_type'])132 tr_cols = tr_cols+data['td_colspan']133 if data['td_colspan'] > 1:134 count_tr_colspan += 1135 if count_tr_colspan == n_tds:136 all_has_multi_colspan = True137 else:138 all_has_multi_colspan = False139 res = {'tr_most_colspan': tr_most_colspan,140 'tr_most_rowspan': tr_most_rowspan,141 'n_tds': n_tds,142 'all_has_multi_colspan': all_has_multi_colspan,143 'tr_content': tr_content,144 'tr_cols': tr_cols}145 return res146def find_title(tr):147 """148 æ¥æ¾è¡¨æ ¼title149 :param tr:150 :return: è¥æ è¾åº text; å¦å è¾åº -1151 """152 data = tr_processing(tr)153 if data['n_tds'] == 1:154 return data['tr_content'][0][0][0]155 else:156 return -1157def check_headers(tr):158 """159 æ£æ¥è¡¨å¤´headers160 :param tr:161 :return:162 """163 data = tr_processing(tr)164 type = ''165 headers_array = np.empty(shape=(data['tr_most_rowspan'], data['tr_cols']), dtype='object')166 res = {'type': type,167 'headers_array': headers_array}168 # --- æ´ä¸ªheader被å为å¤ä¸ªç¬ç«åheader æä¸å¤ç example 10169 if data['all_has_multi_colspan']:170 res['type'] = 'multi_sub_tables'171 return res172 # --- è¿ç»æ´è¡ æä¸å¤ç173 if data['n_tds'] == 1:174 res['type'] = 'continous_rows'175 return res176 # --- åè¡å¤å ç´æ¥æå177 if data['tr_most_colspan'] == 1 and data['tr_most_rowspan'] == 1:178 pass179 # --- ä¸å«rowspan é¨åtdæcolspan å·¦å³æ该åå
æ ¼ ç¶åå°å
¶ä¸æ°æ®å并 è®°å½colspançä½ç½® example 8180 # --- ä¸å«colspan é¨åtdærowspan å
æåå181def complete_headers(tr, headers_array):182 """183 å®å表头184 :param tr:185 :param headers_array:186 :return:187 """188 return189def table_processing(tbody):190 """191 è¡¨æ ¼å¤ç192 :param tbody:str htmlæ ¼å¼193 :return: df or str : pandas dateframe æ åäºç»´è¡¨ æè
æ¯ æ æ³è¯å«çè¡¨æ ¼ç±»å194 type: ['df', 'df_no_title', 'no_parse', 'part_df_content', 'only_one']195 df: ndarray å®å
¨è§£æ196 df_no_title: ndarray æ title197 no_parse: html.tbody æ æ³è§£æ198 part_df_content: ndarray+html.trs é¨å解æ199 only_one: list ç¬è¡200 """201 trs = tbody.find_all('tr', recursive=False)202 n_row = len(trs) # 表è¡æ°203 title = None # 表å204 headers_type = '' # 表头类å205 headers = [] # 表头206 headers_array = np.array([None]) # 表头ç©éµ207 fields_type = [] # 表å段类å208 type = ''209 # --- éè¡å¡«è¡¨ ---210 for i, tr in enumerate(trs):211 # --- check title ---212 if title == None:213 title = find_title(tr)214 if title != -1: # è¡¨æ ¼å
é¨å«title è¿ä»£ä¸ä¸ä¸ªtr215 continue216 else: # æ title å®ä¹ä¸ºå¤è¡¨åµå¥ï¼å为æ¯å表å并åè¡¨ï¼ æä¸èè217 sub_title = find_title(tr)218 if sub_title != -1: # åå¨å¤è¡¨åµå¥219 type = 'multi-tables'220 # --- check headers ---221 if headers_type == '':222 # è¿æ²¡æ表头223 headers_array = check_headers(tr)224 continue225 if headers_type == 'part_headers':226 # æ®ç¼ºè¡¨å¤´227 headers_array = complete_headers(tr, headers_array)228if __name__ == '__main__':229 tables = get_all_tables()230 data_list = []231 for i, t in enumerate(tables):232 text = t[0]233 print('{0}:{1}'.format(i, text))234 d = table_processing(t[1])235 if d == None:236 continue237 if d['all_has_multi_colspan'] and d['tr_most_rowspan']== True:238 data_list.append(t)...
decisionTrees.py
Source:decisionTrees.py
1" Created by Ecem Balıkçı on 1/11/2021 at 7:16 AM (Contact: balikci8ecem@gmail.com) "2import csv3import numpy as np4import matplotlib.pyplot as plt5from sklearn.tree import export_text6from sklearn.tree import DecisionTreeRegressor78exp_list = np.array([])9salary_list = np.array([])10age_list = np.array([])11pow_list = np.array([])12headers_arr = np.array([])13headers_array = np.array([])14with open("team_big.csv", encoding='Latin-1') as f:15 csv_list = list(csv.reader(f))16 for a in csv_list:17 if a == csv_list[0]:18 headers_arr = np.append(exp_list, csv_list[0])19 headers_array = np.append(headers_array, headers_arr[4])20 headers_array = np.append(headers_array, headers_arr[6])21 headers_array = np.append(headers_array, headers_arr[7])2223 if a != csv_list[0]:24 exp_list = np.append(exp_list, int(a[6]))25 salary_list = np.append(salary_list, int(a[8]))26 age_list = np.append(age_list, int(a[4]))27 pow_list = np.append(pow_list, float(a[7]))2829X = np.column_stack((age_list, exp_list, pow_list))30y = salary_list3132x_train = X[:30]33y_train = y[:30]34x_test = X[30:]35y_test = y[30:]3637reg_1 = DecisionTreeRegressor(random_state=0, max_depth=1)38reg_1.fit(x_train, y_train)39y_hat = reg_1.predict(x_test)40mse = np.mean(np.square(y_hat - y_test))41print("âââââââââ<<<<<<<<<<Results for Decision Tree 1>>>>>>>>>>ââââââââ")42print("MSE: ", mse)43print("The feature importances: ", reg_1.feature_importances_)44print()45titles = export_text(reg_1, feature_names=[headers_array[0], headers_array[1], headers_array[2]])46print(titles)4748reg_2 = DecisionTreeRegressor(random_state=0, max_depth=3)49reg_2.fit(x_train, y_train)50y_hat_2 = reg_2.predict(x_test)51mse = np.mean(np.square(y_hat_2 - y_test))52print("âââââââââ<<<<<<<<<<Results for Decision Tree 2>>>>>>>>>>ââââââââ")53print("MSE: ", mse)54print("The feature importances: ", reg_2.feature_importances_)55print()56titles = export_text(reg_2, feature_names=[headers_array[0], headers_array[1], headers_array[2]])57print(titles)58# I don't know if its about the python/pycharm version but mine doesn't have feature_name.59# it has feature_names and doesn't accept headers_array directly, so I had to do it that way6061reg_3 = DecisionTreeRegressor(random_state=0, max_depth=None)62reg_3.fit(x_train, y_train)63y_hat_3 = reg_3.predict(x_test)64mse = np.mean(np.square(y_hat_3 - y_test))65print("âââââââââ<<<<<<<<<<Results for Decision Tree 3>>>>>>>>>>ââââââââ")66print("MSE: ", mse)67print("The feature importances: ", reg_3.feature_importances_)68print()69titles = export_text(reg_3, feature_names=[headers_array[0], headers_array[1], headers_array[2]])70print(titles)717273plt.plot([1, 15000, 25000], [1, 15000, 25000], c="lavender")74plt.scatter(y_test, y_hat, c="mediumpurple")75plt.scatter(y_test, y_hat_2, c="palevioletred")76plt.scatter(y_test, y_hat_3, c="mediumturquoise")77plt.title("Decision Trees: Predictions vs. Actual Values")78plt.xlabel("Actual Salary Values for Test Data")79plt.ylabel("Salary Predictions for Test Data")80plt.legend(["No-error line", "Decision Tree 1(Max depth: 1)",81 "Decision Tree 2(Max depth: 3)", "Decision Tree 3(Max depth: None)"])
...
httpclient.py
Source:httpclient.py
1#! /usr/bin/env python2# Dzmitry Kuzmitch34import sys5import socket6import struct7import random8import datetime, time9import os.path1011link = sys.argv[1]121314#=================== Getting link info and setting connection15link = link.split('/')16host = link[0].split(':')[0]17port = link[0].split(':')[1]18filename = link[1]19server_address = (host, int(port))20#=================== Getting link info and setting connection212223#=================== Setting headers24message = 'GET /' + filename + ' HTTP/1.1\r\n'25message += 'Host: ' + link[0] + '\r\n'2627#=================== If cache exists28if os.path.isfile(filename.split('.')[0] + '.cache'):29 secs = os.path.getmtime(filename.split('.')[0] + '.cache')30 tg = time.gmtime(secs)31 last_mod_time = time.strftime("%a, %d %b %Y %H:%M:%S GMT\r\n", tg)3233 message += 'If-Modified-Since: ' + last_mod_time + '\r\n'34 #=============== If cache exists3536message += '\r\n'37#=================== Setting headers383940#=================== Trying to connect41sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)4243print("")44print("")45print("")46print(message)4748try:49 sock.connect(server_address)50 sock.sendall(message.encode())51 #=============== Reading data from buffer52 data = b''53 while True:54 buf = sock.recv(1024)55 if not buf:56 break57 data += buf58 sock.close()59 #=============== Reading data from buffer60 data = data.decode()61 headers_array = data.split("\n")6263 headers = ""64 for item in headers_array[:6]:65 headers += item66 headers += "\n"6768 print(headers)6970 content = ""71 if headers_array[0].split(" ")[1] == "200":72 for item in headers_array[6:]:73 content += item74 content += "\n"75 f = open(filename.split('.')[0] + '.cache', "w+")76 f.write(content)77 f.close()7879 elif headers_array[0].split(" ")[1] == "304":80 content = ""81 #f = open(filename.split('.')[0] + '.cache', "r")82 #if f.mode == 'r':83 #content = f.read()84 #f.close()8586 elif headers_array[0].split(" ")[1] == "404":87 content = "404 Not Found"8889 print(content)90 sock.close()91 92except socket.timeout as e:93 print('\nRequest attempt timed out')9495except OSError as e:96 print('\nRequest attempt timed out (with an error)')
...
Statistic.py
Source:Statistic.py
1import numpy as np2from scipy.stats import ttest_ind, rankdata, ranksums3def t_student(headers_array, scores, alfa=.05):4 t_statistic = np.zeros((len(headers_array), len(headers_array)))5 p_value = np.zeros((len(headers_array), len(headers_array)))6 # Wyliczenie t_statystyki i p-value dla wszytskich par7 for i in range(len(headers_array)):8 for j in range(len(headers_array)):9 t_statistic[i, j], p_value[i, j] = ttest_ind(scores[i], scores[j])10 # Wyliczenie przewagi danego algorytmu11 advantage = np.zeros((len(headers_array), len(headers_array)))12 advantage[t_statistic > 0] = 113 # Wyliczenie które algorytmy sa statystycznie różne14 significance = np.zeros((len(headers_array), len(headers_array)))15 significance[p_value <= alfa] = 116 # Wymnożenie macieży przewag i macieży znacznoÅci17 stat_better = significance * advantage18 return stat_better19def wilcoxon(headers_array, scores, alpha=.05):20 # Årednie wyniki dla każdego z foldów21 mean_scores = np.mean(scores, axis=2).T22 # Przypisanie rang od 1 do (liczby estymatorów) w przypadku remisów uÅredniamy23 ranks = []24 for ms in mean_scores:25 ranks.append(rankdata(ms).tolist())26 ranks = np.array(ranks)27 mean_ranks = np.mean(ranks, axis=0)28 # Obliczenie t-statisticy i p-value29 w_statistic = np.zeros((len(headers_array), len(headers_array)))30 p_value = np.zeros((len(headers_array), len(headers_array)))31 for i in range(len(headers_array)):32 for j in range(len(headers_array)):33 w_statistic[i, j], p_value[i, j] = ranksums(ranks.T[i], ranks.T[j])34 advantage = np.zeros((len(headers_array), len(headers_array)))35 advantage[w_statistic > 0] = 136 significance = np.zeros((len(headers_array), len(headers_array)))37 significance[p_value <= alpha] = 138 # Wymnożenie macieży przewag i macieży znacznoÅci39 stat_better = significance * advantage...
LambdaTest’s Playwright tutorial will give you a broader idea about the Playwright automation framework, its unique features, and use cases with examples to exceed your understanding of Playwright testing. This tutorial will give A to Z guidance, from installing the Playwright framework to some best practices and advanced concepts.
Get 100 minutes of automation test minutes FREE!!