Best Python code snippet using SeleniumBase
ReadPDFFileV2_test.py
Source:ReadPDFFileV2_test.py
...65 assert expected.items() <= metadata.items()66def test_get_pdf_text_with_encrypted(mocker, tmp_path):67 mocker.patch.object(demisto, 'args', return_value={'userPassword': '1234'})68 from ReadPDFFileV2 import get_pdf_text69 text = get_pdf_text(f'{CWD}/encrypted.pdf', f'{tmp_path}/encrypted.txt')70 expected = "XSL FO Sample Copyright © 2002-2005 Antenna House, Inc. All rights reserved.\n\n" \71 "Links in PDF\nPDF link is classified into two parts, link to the specified position in the PDF " \72 "document, and link to the external document.\n" \73 "The internal-destination property of fo:basic-link indicates to link to the position in the same" \74 " document. The externaldestination property indicates to link to external document. " \75 "Below shows the example.\n\nExample of a link to internal destination\nRefer to Purchasing " \76 "Assistance to get more information.\nExample of a link to external destination\nRefer to Purchasing " \77 "Assistance to get more information."78 assert text.startswith(expected)79def test_get_pdf_text_without_encrypted(tmp_path):80 from ReadPDFFileV2 import get_pdf_text81 try:82 get_pdf_text(f'{CWD}/encrypted.pdf', f'{tmp_path}/encrypted.txt')83 raise Exception("Incorrect password exception should've been thrown")84 except ShellException as e:85 assert 'Incorrect password' in str(e)86 assert 'error code: 1' in str(e)87 text = get_pdf_text(f'{CWD}/text-only.pdf', f'{tmp_path}/text-only.txt')88 expected = "×¢×ר×ת"89 assert expected in text90 assert text.startswith('This is a pdf document with a text line within it.')91 text = get_pdf_text(f'{CWD}/text-with-images.pdf', f'{tmp_path}/text-with-images.txt')92 expected = 'Create an ETD Using Adobe Acrobat'93 assert text.startswith(expected)94 text = get_pdf_text(f'{CWD}/scanned.pdf', f'{tmp_path}/scanned.txt')95 expected = '\x0c'96 assert expected == text97def test_get_pdf_htmls_content_with_encrypted(mocker, tmp_path):98 mocker.patch.object(demisto, 'args', return_value={'userPassword': '1234'})99 from ReadPDFFileV2 import get_pdf_htmls_content100 from ReadPDFFileV2 import get_images_paths_in_path101 html_text = get_pdf_htmls_content(f'{CWD}/encrypted.pdf', tmp_path)102 expected = 'If you are end user who wishes to use XSL Formatter yourself, you may purchase ' \103 'from our Reseller or direct from Antenna<br/>House.<br/>'104 assert len(get_images_paths_in_path(tmp_path)) != 0, 'Failed to get images from html'105 assert expected in html_text106def test_get_pdf_htmls_content_without_encrypted(tmp_path):107 from ReadPDFFileV2 import get_pdf_htmls_content108 from ReadPDFFileV2 import get_images_paths_in_path...
pdf_read.py
Source:pdf_read.py
1#-*- coding:utf-8 -*-2import pdfplumber3f_pdf = "/Users/zhaoxuyang/Library/Containers/com.tencent.xinWeChat/Data/Library/Application Support/com.tencent.xinWeChat/2.0b4.0.9/f4ebfddae17bab807e6cfc88fcc1aa87/Message/MessageTemp/9e20f478899dc29eb19741386f9343c8/File/å°è¯´/å°è¯´/ç§å¯è±æç.pdf"4def get_pdf_text(f_pdf):5 pdf_text = []6 with pdfplumber.open(f_pdf) as fp:7 for page in fp.pages:8 text = page.extract_text()9 if text:10 text = text.replace(" ", " ")11 text = text.replace(" ", " ")12 pdf_text.append(text)13 return "".join(pdf_text)14if __name__ == "__main__":15 text = get_pdf_text(f_pdf)...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!