Best Python code snippet using playwright-python
table_preprocess.py
Source: table_preprocess.py
1#!/usr/bin/env python2# -*- coding:utf-8 -*-3"""4 èæ¬å: è¡¨æ ¼é¢å¤ç5Created on 2018-06-136@author:David Yisun7@group:data8"""9import os10from bs4 import BeautifulSoup11import codecs12import re13import itertools14import numpy as np15# 读å
¥htmls 以åå
¸å½¢å¼ä¿å16def read_html():17 classify = ['é大åå', 'å¢åæ', 'å®å¢']18 file_list = []19 for _classify in classify:20 path = './data/round2_adjust/{0}/html/'.format(_classify)21 files_name = os.listdir(path)22 file_list = [{'file_name': i, 'path': path + i, 'classify': _classify} for i in files_name] + file_list23 html_dict = {}24 text_dict = {}25 for i, _file in enumerate(file_list):26 with codecs.open(_file['path'], 'r', 'utf8') as f:27 data = f.read()28 print('read {0}'.format(_file['file_name']))29 # å»ææ¢è¡ç¬¦30 data = re.sub(re.compile('>\n* *<'), '><', data)31 data = re.sub(re.compile('\n'), '', data)32 _html = BeautifulSoup(data, 'lxml', from_encoding='utf-8')33 html_dict[_file['file_name']] = {'classify': _file['classify'], 'h': _html}34 text_dict[_file['file_name']] = {'classify': _file['classify'], 't': data}35 return html_dict36"""37è¾åºæ ·ä¾: html_dict['100103.html']['h']38 { '100103.html':{'classify': 'å®å¢',39 'h': <html><head></head><body><div title="è¾½å®æâ¦â¦â¦â¦â¦â¦}40 } 41"""42def get_all_tables():43 # --- è·åææè¡¨æ ¼è¿è¡åæ ---44 tables_tag = []45 html_dict = read_html()46 for index in html_dict:47 print(index)48 t = html_dict[index]49 # å å»ä¸å«text ç"tr" "tbody"50 # --- tr51 m = t['h'].find_all('tr')52 for j in m:53 if j.find_all(text=True) == []:54 j.decompose()55 # --- tbody56 m = t['h'].find_all('tbody')57 for j in m:58 if j.find_all(text=True) == []:59 j.decompose()60 _tables_tag = t['h'].find_all('tbody')61 # ä¸å«è¡¨æ ¼çå
¬å滤è¿62 if _tables_tag == []:63 continue64 # è¿æ»¤ç©ºè¡¨æ ¼65 tables_tag = tables_tag+list(itertools.zip_longest([index], _tables_tag, fillvalue=index))66 # --- æ£é¤ ãéä¹ã67 tables_tag_new = []68 for i, t in enumerate(tables_tag):69 text = t[0]70 annotation = t[1].find_all(text=re.compile('^ *æ *$'))71 if len(annotation) > 10:72 continue73 tables_tag_new.append(t)74 return tables_tag_new75def text_type(s):76 """77 å¤æåå
æ ¼æ°æ®ç±»å78 :param s:79 :return:80 """81 res = 'string'82 return res83 84def td_processing(td):85 """86 åå
æ ¼ä¿¡æ¯87 :param td: tag åå
æ ¼88 :return: åå
¸å
æ¬ä»¥ä¸ï¼89 å
容 ç±»å è·¨å è·¨è¡90 """91 td_rowspan = 192 td_colspan = 193 if td.has_attr('rowspan'):94 td_rowspan = int(td['rowspan'])95 if td.has_attr('colspan'):96 td_colspan = int(td['colspan'])97 # -å¡«å
空åå
æ ¼98 if td.text == '':99 for t_child in td.stripped_strings:100 t_child.replace_with('---')101 print(text_type(td.text))102 td_type = np.array([[text_type(td.text)] * td_colspan] * td_rowspan).reshape(td_rowspan, td_colspan)103 td_content = np.array([[td.text] * td_colspan] * td_rowspan).reshape(td_rowspan, td_colspan)104 res = {'td_content': td_content,105 'td_type': td_type,106 'td_colspan': td_colspan,107 'td_rowspan': td_rowspan}108 return res109def tr_processing(tr):110 """111 è¡ä¿¡æ¯112 :param tr: tag è¡113 :return: åå
¸114 """115 print('good boy')116 tr_most_rowspan = 1 # æ大跨è¡æ°117 tr_most_colspan = 1 # æ大跨åæ°118 tds = tr.find_all('td')119 n_tds = len(tds) # è¡æå«åå
æ ¼æ°120 tr_content = [] # è¡ååå
å
容121 tr_type = [] # è¡ååå
æ ¼æ°æ®ç±»å122 tr_cols = 0 # è¡é¿åº¦123 count_tr_colspan = 0 # colspan大äº1çtd个æ°ç»è®¡124 for td in tds:125 data = td_processing(td)126 if data['td_colspan'] > tr_most_colspan:127 tr_most_colspan = data['td_colspan']128 if data['td_rowspan'] > tr_most_rowspan:129 tr_most_rowspan = data['td_rowspan']130 tr_content.append(data['td_content'])131 tr_type.append(data['td_type'])132 tr_cols = tr_cols+data['td_colspan']133 if data['td_colspan'] > 1:134 count_tr_colspan += 1135 if count_tr_colspan == n_tds:136 all_has_multi_colspan = True137 else:138 all_has_multi_colspan = False139 res = {'tr_most_colspan': tr_most_colspan,140 'tr_most_rowspan': tr_most_rowspan,141 'n_tds': n_tds,142 'all_has_multi_colspan': all_has_multi_colspan,143 'tr_content': tr_content,144 'tr_cols': tr_cols}145 return res146def find_title(tr):147 """148 æ¥æ¾è¡¨æ ¼title149 :param tr:150 :return: è¥æ è¾åº text; å¦å è¾åº -1151 """152 data = tr_processing(tr)153 if data['n_tds'] == 1:154 return data['tr_content'][0][0][0]155 else:156 return -1157def check_headers(tr):158 """159 æ£æ¥è¡¨å¤´headers160 :param tr:161 :return:162 """163 data = tr_processing(tr)164 type = ''165 headers_array = np.empty(shape=(data['tr_most_rowspan'], data['tr_cols']), dtype='object')166 res = {'type': type,167 'headers_array': headers_array}168 # --- æ´ä¸ªheader被å为å¤ä¸ªç¬ç«åheader æä¸å¤ç example 10169 if data['all_has_multi_colspan']:170 res['type'] = 'multi_sub_tables'171 return res172 # --- è¿ç»æ´è¡ æä¸å¤ç173 if data['n_tds'] == 1:174 res['type'] = 'continous_rows'175 return res176 # --- åè¡å¤å ç´æ¥æå177 if data['tr_most_colspan'] == 1 and data['tr_most_rowspan'] == 1:178 pass179 # --- ä¸å«rowspan é¨åtdæcolspan å·¦å³æ该åå
æ ¼ ç¶åå°å
¶ä¸æ°æ®å并 è®°å½colspançä½ç½® example 8180 # --- ä¸å«colspan é¨åtdærowspan å
æåå181def complete_headers(tr, headers_array):182 """183 å®å表头184 :param tr:185 :param headers_array:186 :return:187 """188 return189def table_processing(tbody):190 """191 è¡¨æ ¼å¤ç192 :param tbody:str htmlæ ¼å¼193 :return: df or str : pandas dateframe æ åäºç»´è¡¨ æè
æ¯ æ æ³è¯å«çè¡¨æ ¼ç±»å194 type: ['df', 'df_no_title', 'no_parse', 'part_df_content', 'only_one']195 df: ndarray å®å
¨è§£æ196 df_no_title: ndarray æ title197 no_parse: html.tbody æ æ³è§£æ198 part_df_content: ndarray+html.trs é¨å解æ199 only_one: list ç¬è¡200 """201 trs = tbody.find_all('tr', recursive=False)202 n_row = len(trs) # 表è¡æ°203 title = None # 表å204 headers_type = '' # 表头类å205 headers = [] # 表头206 headers_array = np.array([None]) # 表头ç©éµ207 fields_type = [] # 表å段类å208 type = ''209 # --- éè¡å¡«è¡¨ ---210 for i, tr in enumerate(trs):211 # --- check title ---212 if title == None:213 title = find_title(tr)214 if title != -1: # è¡¨æ ¼å
é¨å«title è¿ä»£ä¸ä¸ä¸ªtr215 continue216 else: # æ title å®ä¹ä¸ºå¤è¡¨åµå¥ï¼å为æ¯å表å并åè¡¨ï¼ æä¸èè217 sub_title = find_title(tr)218 if sub_title != -1: # åå¨å¤è¡¨åµå¥219 type = 'multi-tables'220 # --- check headers ---221 if headers_type == '':222 # è¿æ²¡æ表头223 headers_array = check_headers(tr)224 continue225 if headers_type == 'part_headers':226 # æ®ç¼ºè¡¨å¤´227 headers_array = complete_headers(tr, headers_array)228if __name__ == '__main__':229 tables = get_all_tables()230 data_list = []231 for i, t in enumerate(tables):232 text = t[0]233 print('{0}:{1}'.format(i, text))234 d = table_processing(t[1])235 if d == None:236 continue237 if d['all_has_multi_colspan'] and d['tr_most_rowspan']== True:238 data_list.append(t)...
decisionTrees.py
Source: decisionTrees.py
1" Created by Ecem Balıkçı on 1/11/2021 at 7:16 AM (Contact: balikci8ecem@gmail.com) "2import csv3import numpy as np4import matplotlib.pyplot as plt5from sklearn.tree import export_text6from sklearn.tree import DecisionTreeRegressor78exp_list = np.array([])9salary_list = np.array([])10age_list = np.array([])11pow_list = np.array([])12headers_arr = np.array([])13headers_array = np.array([])14with open("team_big.csv", encoding='Latin-1') as f:15 csv_list = list(csv.reader(f))16 for a in csv_list:17 if a == csv_list[0]:18 headers_arr = np.append(exp_list, csv_list[0])19 headers_array = np.append(headers_array, headers_arr[4])20 headers_array = np.append(headers_array, headers_arr[6])21 headers_array = np.append(headers_array, headers_arr[7])2223 if a != csv_list[0]:24 exp_list = np.append(exp_list, int(a[6]))25 salary_list = np.append(salary_list, int(a[8]))26 age_list = np.append(age_list, int(a[4]))27 pow_list = np.append(pow_list, float(a[7]))2829X = np.column_stack((age_list, exp_list, pow_list))30y = salary_list3132x_train = X[:30]33y_train = y[:30]34x_test = X[30:]35y_test = y[30:]3637reg_1 = DecisionTreeRegressor(random_state=0, max_depth=1)38reg_1.fit(x_train, y_train)39y_hat = reg_1.predict(x_test)40mse = np.mean(np.square(y_hat - y_test))41print("âââââââââ<<<<<<<<<<Results for Decision Tree 1>>>>>>>>>>ââââââââ")42print("MSE: ", mse)43print("The feature importances: ", reg_1.feature_importances_)44print()45titles = export_text(reg_1, feature_names=[headers_array[0], headers_array[1], headers_array[2]])46print(titles)4748reg_2 = DecisionTreeRegressor(random_state=0, max_depth=3)49reg_2.fit(x_train, y_train)50y_hat_2 = reg_2.predict(x_test)51mse = np.mean(np.square(y_hat_2 - y_test))52print("âââââââââ<<<<<<<<<<Results for Decision Tree 2>>>>>>>>>>ââââââââ")53print("MSE: ", mse)54print("The feature importances: ", reg_2.feature_importances_)55print()56titles = export_text(reg_2, feature_names=[headers_array[0], headers_array[1], headers_array[2]])57print(titles)58# I don't know if its about the python/pycharm version but mine doesn't have feature_name.59# it has feature_names and doesn't accept headers_array directly, so I had to do it that way6061reg_3 = DecisionTreeRegressor(random_state=0, max_depth=None)62reg_3.fit(x_train, y_train)63y_hat_3 = reg_3.predict(x_test)64mse = np.mean(np.square(y_hat_3 - y_test))65print("âââââââââ<<<<<<<<<<Results for Decision Tree 3>>>>>>>>>>ââââââââ")66print("MSE: ", mse)67print("The feature importances: ", reg_3.feature_importances_)68print()69titles = export_text(reg_3, feature_names=[headers_array[0], headers_array[1], headers_array[2]])70print(titles)717273plt.plot([1, 15000, 25000], [1, 15000, 25000], c="lavender")74plt.scatter(y_test, y_hat, c="mediumpurple")75plt.scatter(y_test, y_hat_2, c="palevioletred")76plt.scatter(y_test, y_hat_3, c="mediumturquoise")77plt.title("Decision Trees: Predictions vs. Actual Values")78plt.xlabel("Actual Salary Values for Test Data")79plt.ylabel("Salary Predictions for Test Data")80plt.legend(["No-error line", "Decision Tree 1(Max depth: 1)",81 "Decision Tree 2(Max depth: 3)", "Decision Tree 3(Max depth: None)"])
...
httpclient.py
Source: httpclient.py
1#! /usr/bin/env python2# Dzmitry Kuzmitch34import sys5import socket6import struct7import random8import datetime, time9import os.path1011link = sys.argv[1]121314#=================== Getting link info and setting connection15link = link.split('/')16host = link[0].split(':')[0]17port = link[0].split(':')[1]18filename = link[1]19server_address = (host, int(port))20#=================== Getting link info and setting connection212223#=================== Setting headers24message = 'GET /' + filename + ' HTTP/1.1\r\n'25message += 'Host: ' + link[0] + '\r\n'2627#=================== If cache exists28if os.path.isfile(filename.split('.')[0] + '.cache'):29 secs = os.path.getmtime(filename.split('.')[0] + '.cache')30 tg = time.gmtime(secs)31 last_mod_time = time.strftime("%a, %d %b %Y %H:%M:%S GMT\r\n", tg)3233 message += 'If-Modified-Since: ' + last_mod_time + '\r\n'34 #=============== If cache exists3536message += '\r\n'37#=================== Setting headers383940#=================== Trying to connect41sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)4243print("")44print("")45print("")46print(message)4748try:49 sock.connect(server_address)50 sock.sendall(message.encode())51 #=============== Reading data from buffer52 data = b''53 while True:54 buf = sock.recv(1024)55 if not buf:56 break57 data += buf58 sock.close()59 #=============== Reading data from buffer60 data = data.decode()61 headers_array = data.split("\n")6263 headers = ""64 for item in headers_array[:6]:65 headers += item66 headers += "\n"6768 print(headers)6970 content = ""71 if headers_array[0].split(" ")[1] == "200":72 for item in headers_array[6:]:73 content += item74 content += "\n"75 f = open(filename.split('.')[0] + '.cache', "w+")76 f.write(content)77 f.close()7879 elif headers_array[0].split(" ")[1] == "304":80 content = ""81 #f = open(filename.split('.')[0] + '.cache', "r")82 #if f.mode == 'r':83 #content = f.read()84 #f.close()8586 elif headers_array[0].split(" ")[1] == "404":87 content = "404 Not Found"8889 print(content)90 sock.close()91 92except socket.timeout as e:93 print('\nRequest attempt timed out')9495except OSError as e:96 print('\nRequest attempt timed out (with an error)')
...
Statistic.py
Source: Statistic.py
1import numpy as np2from scipy.stats import ttest_ind, rankdata, ranksums3def t_student(headers_array, scores, alfa=.05):4 t_statistic = np.zeros((len(headers_array), len(headers_array)))5 p_value = np.zeros((len(headers_array), len(headers_array)))6 # Wyliczenie t_statystyki i p-value dla wszytskich par7 for i in range(len(headers_array)):8 for j in range(len(headers_array)):9 t_statistic[i, j], p_value[i, j] = ttest_ind(scores[i], scores[j])10 # Wyliczenie przewagi danego algorytmu11 advantage = np.zeros((len(headers_array), len(headers_array)))12 advantage[t_statistic > 0] = 113 # Wyliczenie które algorytmy sa statystycznie różne14 significance = np.zeros((len(headers_array), len(headers_array)))15 significance[p_value <= alfa] = 116 # Wymnożenie macieży przewag i macieży znacznoÅci17 stat_better = significance * advantage18 return stat_better19def wilcoxon(headers_array, scores, alpha=.05):20 # Årednie wyniki dla każdego z foldów21 mean_scores = np.mean(scores, axis=2).T22 # Przypisanie rang od 1 do (liczby estymatorów) w przypadku remisów uÅredniamy23 ranks = []24 for ms in mean_scores:25 ranks.append(rankdata(ms).tolist())26 ranks = np.array(ranks)27 mean_ranks = np.mean(ranks, axis=0)28 # Obliczenie t-statisticy i p-value29 w_statistic = np.zeros((len(headers_array), len(headers_array)))30 p_value = np.zeros((len(headers_array), len(headers_array)))31 for i in range(len(headers_array)):32 for j in range(len(headers_array)):33 w_statistic[i, j], p_value[i, j] = ranksums(ranks.T[i], ranks.T[j])34 advantage = np.zeros((len(headers_array), len(headers_array)))35 advantage[w_statistic > 0] = 136 significance = np.zeros((len(headers_array), len(headers_array)))37 significance[p_value <= alpha] = 138 # Wymnożenie macieży przewag i macieży znacznoÅci39 stat_better = significance * advantage...
Playwright error connection refused in docker
playwright-python advanced setup
How to select an input according to a parent sibling label
Error when installing Microsoft Playwright
Trouble waiting for changes to complete that are triggered by Python Playwright `select_option`
Capturing and Storing Request Data Using Playwright for Python
Can Playwright be used to launch a browser instance
Trouble in Clicking on Log in Google Button of Pop Up Menu Playwright Python
Scrapy Playwright get date by clicking button
React locator example
I solved my problem. In fact my docker container (frontend) is called "app" which is also domain name of fronend application. My application is running locally on http. Chromium and geko drivers force httpS connection for some domain names one of which is "app". So i have to change name for my docker container wich contains frontend application.
Check out the latest blogs from LambdaTest on this topic:
The sky’s the limit (and even beyond that) when you want to run test automation. Technology has developed so much that you can reduce time and stay more productive than you used to 10 years ago. You needn’t put up with the limitations brought to you by Selenium if that’s your go-to automation testing tool. Instead, you can pick from various test automation frameworks and tools to write effective test cases and run them successfully.
When it comes to web automation testing, there are a number of frameworks like Selenium, Cypress, PlayWright, Puppeteer, etc., that make it to the ‘preferred list’ of frameworks. The choice of test automation framework depends on a range of parameters like type, complexity, scale, along with the framework expertise available within the team. However, it’s no surprise that Selenium is still the most preferred framework among developers and QAs.
Playwright is a framework that I’ve always heard great things about but never had a chance to pick up until earlier this year. And since then, it’s become one of my favorite test automation frameworks to use when building a new automation project. It’s easy to set up, feature-packed, and one of the fastest, most reliable frameworks I’ve worked with.
The speed at which tests are executed and the “dearth of smartness” in testing are the two major problems developers and testers encounter.
With the rapidly evolving technology due to its ever-increasing demand in today’s world, Digital Security has become a major concern for the Software Industry. There are various ways through which Digital Security can be achieved, Captcha being one of them.Captcha is easy for humans to solve but hard for “bots” and other malicious software to figure out. However, Captcha has always been tricky for the testers to automate, as many of them don’t know how to handle captcha in Selenium or using any other test automation framework.
LambdaTest’s Playwright tutorial will give you a broader idea about the Playwright automation framework, its unique features, and use cases with examples to exceed your understanding of Playwright testing. This tutorial will give A to Z guidance, from installing the Playwright framework to some best practices and advanced concepts.
Get 100 minutes of automation test minutes FREE!!