Best Python code snippet using playwright-python
downloadermiddlewares.py
Source:downloadermiddlewares.py
...80 playwright_installed = is_playwright_installed()81 if not playwright_installed:82 logger.info(83 'playwright libraries not installed, start to install')84 install_playwright()85 else:86 logger.info('playwright libraries already installed')87 # init settings88 cls.window_width = settings.get(89 'GERAPY_PLAYWRIGHT_WINDOW_WIDTH', GERAPY_PLAYWRIGHT_WINDOW_WIDTH)90 cls.window_height = settings.get(91 'GERAPY_PLAYWRIGHT_WINDOW_HEIGHT', GERAPY_PLAYWRIGHT_WINDOW_HEIGHT)92 cls.default_user_agent = settings.get('GERAPY_PLAYWRIGHT_DEFAULT_USER_AGENT',93 GERAPY_PLAYWRIGHT_DEFAULT_USER_AGENT)94 cls.headless = settings.get(95 'GERAPY_PLAYWRIGHT_HEADLESS', GERAPY_PLAYWRIGHT_HEADLESS)96 cls.channel = settings.get(97 'GERAPY_PLAYWRIGHT_CHANNEL', GERAPY_PLAYWRIGHT_CHANNEL)98 cls.slow_mo = settings.get(99 'GERAPY_PLAYWRIGHT_SLOW_MO', GERAPY_PLAYWRIGHT_SLOW_MO)100 # cls.ignore_default_args = settings.get('GERAPY_PLAYWRIGHT_IGNORE_DEFAULT_ARGS',101 # GERAPY_PLAYWRIGHT_IGNORE_DEFAULT_ARGS)102 # cls.handle_sigint = settings.get(103 # 'GERAPY_PLAYWRIGHT_HANDLE_SIGINT', GERAPY_PLAYWRIGHT_HANDLE_SIGINT)104 # cls.handle_sigterm = settings.get(105 # 'GERAPY_PLAYWRIGHT_HANDLE_SIGTERM', GERAPY_PLAYWRIGHT_HANDLE_SIGTERM)106 # cls.handle_sighup = settings.get(107 # 'GERAPY_PLAYWRIGHT_HANDLE_SIGHUP', GERAPY_PLAYWRIGHT_HANDLE_SIGHUP)108 cls.devtools = settings.get(109 'GERAPY_PLAYWRIGHT_DEVTOOLS', GERAPY_PLAYWRIGHT_DEVTOOLS)110 cls.executable_path = settings.get(111 'GERAPY_PLAYWRIGHT_EXECUTABLE_PATH', GERAPY_PLAYWRIGHT_EXECUTABLE_PATH)112 cls.disable_extensions = settings.get('GERAPY_PLAYWRIGHT_DISABLE_EXTENSIONS',113 GERAPY_PLAYWRIGHT_DISABLE_EXTENSIONS)114 cls.hide_scrollbars = settings.get(115 'GERAPY_PLAYWRIGHT_HIDE_SCROLLBARS', GERAPY_PLAYWRIGHT_HIDE_SCROLLBARS)116 cls.mute_audio = settings.get(117 'GERAPY_PLAYWRIGHT_MUTE_AUDIO', GERAPY_PLAYWRIGHT_MUTE_AUDIO)118 cls.no_sandbox = settings.get(119 'GERAPY_PLAYWRIGHT_NO_SANDBOX', GERAPY_PLAYWRIGHT_NO_SANDBOX)120 cls.disable_setuid_sandbox = settings.get('GERAPY_PLAYWRIGHT_DISABLE_SETUID_SANDBOX',121 GERAPY_PLAYWRIGHT_DISABLE_SETUID_SANDBOX)122 cls.disable_gpu = settings.get(123 'GERAPY_PLAYWRIGHT_DISABLE_GPU', GERAPY_PLAYWRIGHT_DISABLE_GPU)124 cls.download_timeout = settings.get('GERAPY_PLAYWRIGHT_DOWNLOAD_TIMEOUT',125 settings.get('DOWNLOAD_TIMEOUT', GERAPY_PLAYWRIGHT_DOWNLOAD_TIMEOUT))126 # cls.ignore_resource_types = settings.get('GERAPY_PLAYWRIGHT_IGNORE_RESOURCE_TYPES',127 # GERAPY_PLAYWRIGHT_IGNORE_RESOURCE_TYPES)128 cls.screenshot = settings.get(129 'GERAPY_PLAYWRIGHT_SCREENSHOT', GERAPY_PLAYWRIGHT_SCREENSHOT)130 cls.pretend = settings.get(131 'GERAPY_PLAYWRIGHT_PRETEND', GERAPY_PLAYWRIGHT_PRETEND)132 cls.sleep = settings.get(133 'GERAPY_PLAYWRIGHT_SLEEP', GERAPY_PLAYWRIGHT_SLEEP)134 # cls.enable_request_interception = settings.getbool('GERAPY_ENABLE_REQUEST_INTERCEPTION',135 # GERAPY_ENABLE_REQUEST_INTERCEPTION)136 cls.retry_enabled = settings.getbool('RETRY_ENABLED')137 cls.max_retry_times = settings.getint('RETRY_TIMES')138 cls.retry_http_codes = set(int(x)139 for x in settings.getlist('RETRY_HTTP_CODES'))140 cls.priority_adjust = settings.getint('RETRY_PRIORITY_ADJUST')141 cls.proxy = settings.get('GERAPY_PLAYWRIGHT_PROXY')142 cls.proxy_credential = settings.get(143 'GERAPY_PLAYWRIGHT_PROXY_CREDENTIAL')144 return cls()145 async def _process_request(self, request, spider):146 """147 use playwright to process spider148 :param request:149 :param spider:150 :return:151 """152 # get playwright meta153 playwright_meta = request.meta.get('playwright') or {}154 logger.debug('playwright_meta %s', playwright_meta)155 if not isinstance(playwright_meta, dict) or len(playwright_meta.keys()) == 0:156 return157 options = {158 'headless': self.headless,159 'args': [],160 }161 if self.executable_path is not None:162 options['executablePath'] = self.executable_path163 if self.slow_mo is not None:164 options['slowMo'] = self.slow_mo165 if self.devtools is not None:166 options['devtools'] = self.devtools167 if self.channel is not None:168 options['channel'] = self.channel169 # if self.ignore_default_args is not None:170 # options['ignoreDefaultArgs'] = self.ignore_default_args171 # if self.handle_sigint:172 # options['handleSIGINT'] = self.handle_sigint173 # if self.handle_sigterm:174 # options['handleSIGTERM'] = self.handle_sigterm175 # if self.handle_sighup:176 # options['handleSIGHUP'] = self.handle_sighup177 if self.disable_extensions is not None:178 options['args'].append('--disable-extensions')179 if self.hide_scrollbars is not None:180 options['args'].append('--hide-scrollbars')181 if self.mute_audio is not None:182 options['args'].append('--mute-audio')183 if self.no_sandbox is not None:184 options['args'].append('--no-sandbox')185 if self.disable_setuid_sandbox is not None:186 options['args'].append('--disable-setuid-sandbox')187 if self.disable_gpu is not None:188 options['args'].append('--disable-gpu')189 # pretend as normal browser190 _pretend = self.pretend # get global pretend setting191 if playwright_meta.get('pretend') is not None:192 # get local pretend setting to overwrite global193 _pretend = playwright_meta.get('pretend')194 # set proxy and proxy credential195 _proxy = self.proxy196 if playwright_meta.get('proxy') is not None:197 _proxy = playwright_meta.get('proxy')198 if _proxy:199 options['proxy'] = {200 'server': _proxy201 }202 _proxy_credential = self.proxy_credential203 if playwright_meta.get('proxy_credential') is not None:204 _proxy_credential = playwright_meta.get('proxy_credential')205 if _proxy_credential and _proxy:206 options['proxy'].update(_proxy_credential)207 logger.debug('set options %s', options)208 # set default user_agent209 _user_agent = self.default_user_agent210 # get Scrapy request ua, exclude default('Scrapy/2.5.0 (+https://scrapy.org)')211 if 'Scrapy' not in request.headers.get('User-Agent').decode():212 _user_agent = request.headers.get(213 'User-Agent').decode()214 async with async_playwright() as playwright:215 browser = await playwright.chromium.launch(**options)216 context = await browser.new_context(217 viewport={'width': self.window_width,218 'height': self.window_height},219 user_agent=_user_agent220 )221 # set cookies222 parse_result = urllib.parse.urlsplit(request.url)223 domain = parse_result.hostname224 _cookies = []225 if isinstance(request.cookies, dict):226 _cookies = [{'name': k, 'value': v, 'domain': domain, 'path': '/'}227 for k, v in request.cookies.items()]228 else:...
utils.py
Source:utils.py
...85 target: Optional[str] = None,86 operation: Optional[Callable[[Page], Awaitable[None]]] = None,87 ) -> Optional[bytes]:88 async with self.lock:89 async with async_playwright() as playwright:90 self.browser = await self.get_browser(playwright)91 self._inter_log("open browser")92 if viewport:93 constext = await self.browser.new_context(94 viewport={95 "width": viewport["width"],96 "height": viewport["height"],97 },98 device_scale_factor=viewport.get("deviceScaleFactor", 1),99 )100 page = await constext.new_page()101 else:102 page = await self.browser.new_page()103 if operation:...
request.py
Source:request.py
1from scrapy import Request2import copy3class PlaywrightRequest(Request):4 """5 Scrapy ``Request`` subclass providing additional arguments6 """7 def __init__(self, url, callback=None, wait_until=None, wait_for=None, script=None, actions=None, proxy=None,8 proxy_credential=None, sleep=None, timeout=None, ignore_resource_types=None, pretend=None, screenshot=None, meta=None,9 *args, **kwargs):10 """11 :param url: request url12 :param callback: callback13 :param wait_until: one of "load", "domcontentloaded", "networkidle".14 see https://playwright.dev/python/docs/api/class-page#page-wait-for-load-state, default is `domcontentloaded`15 :param wait_for: wait for some element to load, also supports dict16 :param script: script to execute17 :param actions: actions defined for execution of Page object18 :param proxy: use proxy for this time, like `http://x.x.x.x:x`19 :param proxy_credential: the proxy credential, like `{'username': 'xxxx', 'password': 'xxxx'}`20 :param sleep: time to sleep after loaded, override `GERAPY_PLAYWRIGHT_SLEEP`21 :param timeout: load timeout, override `GERAPY_PLAYWRIGHT_DOWNLOAD_TIMEOUT`22 :param ignore_resource_types: ignored resource types, override `GERAPY_PLAYWRIGHT_IGNORE_RESOURCE_TYPES`23 :param pretend: pretend as normal browser, override `GERAPY_PLAYWRIGHT_PRETEND`24 :param screenshot: ignored resource types, see25 https://playwright.dev/python/docs/api/class-page#page-screenshot,26 override `GERAPY_PLAYWRIGHT_SCREENSHOT`27 :param args:28 :param kwargs:29 """30 # use meta info to save args31 meta = copy.deepcopy(meta) or {}32 playwright_meta = meta.get('playwright') or {}33 self.wait_until = playwright_meta.get('wait_until') if playwright_meta.get(34 'wait_until') is not None else (wait_until or 'domcontentloaded')35 self.wait_for = playwright_meta.get('wait_for') if playwright_meta.get(36 'wait_for') is not None else wait_for37 self.script = playwright_meta.get('script') if playwright_meta.get(38 'script') is not None else script39 self.actions = playwright_meta.get('actions') if playwright_meta.get(40 'actions') is not None else actions41 self.sleep = playwright_meta.get('sleep') if playwright_meta.get(42 'sleep') is not None else sleep43 self.proxy = playwright_meta.get('proxy') if playwright_meta.get(44 'proxy') is not None else proxy45 self.proxy_credential = playwright_meta.get('proxy_credential') if playwright_meta.get(46 'proxy_credential') is not None else proxy_credential47 self.pretend = playwright_meta.get('pretend') if playwright_meta.get(48 'pretend') is not None else pretend49 self.timeout = playwright_meta.get('timeout') if playwright_meta.get(50 'timeout') is not None else timeout51 # self.ignore_resource_types = playwright_meta.get('ignore_resource_types') if playwright_meta.get(52 # 'ignore_resource_types') is not None else ignore_resource_types53 self.screenshot = playwright_meta.get('screenshot') if playwright_meta.get(54 'screenshot') is not None else screenshot55 playwright_meta = meta.setdefault('playwright', {})56 playwright_meta['wait_until'] = self.wait_until57 playwright_meta['wait_for'] = self.wait_for58 playwright_meta['script'] = self.script59 playwright_meta['actions'] = self.actions60 playwright_meta['sleep'] = self.sleep61 playwright_meta['proxy'] = self.proxy62 playwright_meta['proxy_credential'] = self.proxy_credential63 playwright_meta['pretend'] = self.pretend64 playwright_meta['timeout'] = self.timeout65 playwright_meta['screenshot'] = self.screenshot66 # playwright_meta['ignore_resource_types'] = self.ignore_resource_types...
sync_playwright_remote.py
Source:sync_playwright_remote.py
1import asyncio2import importlib.metadata3from typing import Any4from greenlet import greenlet5from playwright._impl._api_types import Error6from playwright._impl._connection import Connection7from playwright._impl._object_factory import create_remote_object8from playwright._impl._playwright import Playwright9from playwright._impl._transport import WebSocketTransport10from playwright.sync_api._generated import Playwright as SyncPlaywright11class SyncPlaywrightRemoteContextManager:12 def __init__(self, ws_endpoint: str) -> None:13 self._playwright: SyncPlaywright14 self._ws_endpoint = ws_endpoint15 def _make_connection(self, dispatcher_fiber, object_factory, transport, loop) -> Connection:16 if importlib.metadata.version('playwright') < '1.15.0':17 return Connection(18 dispatcher_fiber,19 create_remote_object,20 transport)21 else:22 return Connection(23 dispatcher_fiber,24 create_remote_object,25 transport,26 loop)27 def __enter__(self) -> SyncPlaywright:28 loop: asyncio.AbstractEventLoop29 own_loop = None30 try:31 loop = asyncio.get_running_loop()32 except RuntimeError:33 loop = asyncio.new_event_loop()34 own_loop = loop35 if loop.is_running():36 raise Error(37 """It looks like you are using Playwright Sync API inside the asyncio loop.38Please use the Async API instead."""39 )40 def greenlet_main() -> None:41 loop.run_until_complete(self._connection.run_as_sync())42 if own_loop:43 loop.run_until_complete(loop.shutdown_asyncgens())44 loop.close()45 dispatcher_fiber = greenlet(greenlet_main)46 self._connection = self._make_connection(47 dispatcher_fiber,48 create_remote_object,49 WebSocketTransport(loop, self._ws_endpoint),50 loop)51 g_self = greenlet.getcurrent()52 def callback_wrapper(playwright_impl: Playwright) -> None:53 self._playwright = SyncPlaywright(playwright_impl)54 g_self.switch()55 self._connection.call_on_object_with_known_name(56 "Playwright", callback_wrapper)57 dispatcher_fiber.switch()58 playwright = self._playwright59 playwright.stop = self.__exit__ # type: ignore60 return playwright61 def start(self) -> SyncPlaywright:62 return self.__enter__()63 def __exit__(self, *args: Any) -> None:64 self._connection.stop_sync()65def sync_playwright_remote(ws_endpoint: str) -> SyncPlaywrightRemoteContextManager:...
LambdaTest’s Playwright tutorial will give you a broader idea about the Playwright automation framework, its unique features, and use cases with examples to exceed your understanding of Playwright testing. This tutorial will give A to Z guidance, from installing the Playwright framework to some best practices and advanced concepts.
Get 100 minutes of automation test minutes FREE!!