Best Python code snippet using fMBT_python
duplicates.py
Source:duplicates.py
1import hashlib2import logging3import os4from pathlib import Path5"""6This file is a python file that processes duplicate files. This file is called from the main file PDF_conversion_tool.py, def convert_files.7Author: Joana Cardoso8"""9def remove_duplicates(process_dir, out_dir, progress, style, file_log, remove_duplicates):10 """11 Identifies duplicates and if set_remove_duplicates is set to True it moves duplicates to another folder.12 Parameters13 ----------14 process_dir: str 15 The path to the initial directory to convert16 out_dir: str 17 The output directory where converted files will be placed18 progress: Progressbar 19 The progress bar that is updated while running this file20 style: Style21 The progress bar style22 file_log: list 23 The logging table24 remove_duplicates: bool25 Remove_duplicates is set to True or False, depending whether duplicates are to be removed or not26 Returns27 -------28 out_duplicates: str29 The path to the duplicates directory30 duplicates: set 31 The duplicate files32 pdf_duplos: int33 The number of duplicates that are pdf's34 """35 pdf_duplos = 036 md5_files = []37 out_duplicates = os.path.join(os.path.dirname(38 process_dir), "Duplicates_" + os.path.basename(process_dir))39 logging.info(f'Duplicates directory: {out_duplicates}')40 for root, dirs, files in os.walk(out_dir):41 for name in files:42 md5_files.append(os.path.join(root, name))43 # List duplicates44 md5_dict = {}45 for f in md5_files:46 md5_dict[f] = hashlib.md5(Path(f).read_bytes()).hexdigest()47 result = {}48 for key, value in md5_dict.items():49 if value not in result.values():50 result[key] = value51 keys_a = set(md5_dict.keys())52 keys_b = set(result.keys())53 duplicates = keys_a ^ keys_b54 # Remove duplicates55 logging.info(f'Duplicate files to remove: {len(duplicates)}')56 if len(duplicates) > 0:57 for rmfile in duplicates:58 src = rmfile59 target = rmfile.replace(out_dir, out_duplicates)60 base_dir = os.path.dirname(target)61 if remove_duplicates == True:62 try:63 Path(base_dir).mkdir(parents=True, exist_ok=True)64 logging.debug(f'Creating duplicates directory: {base_dir}')65 except:66 logging.error(67 f'Failed to create duplicates directory: {base_dir}')68 try:69 os.rename(src, target)70 logging.debug(71 f'Moving duplicate file to duplicates directory: {rmfile}')72 except:73 logging.error(74 f'Failed to move duplicate file to duplicates directory: {rmfile}')75 # set progress bar max value76 progress['value'] += 99 / \77 len(duplicates) if len(duplicates) > 0 else 9978 progress.update()79 progress.update_idletasks()80 style.configure(81 'text.Horizontal.TProgressbar',82 text='{:.0f} %'.format(progress['value'])83 )84 # update log table85 try:86 d = next(item for item in file_log if87 os.path.join(os.path.dirname(item['File']), item['File name']) == rmfile)88 d['Duplicate'] = True89 d['Combined'] = False90 logging.debug(91 f'Updating log table for duplicate: {rmfile}')92 except:93 logging.error(94 f'Failed to update log table for duplicate: {rmfile}')95 else:96 # update log table97 try:98 d = next(item for item in file_log if99 os.path.join(os.path.dirname(item['File']), item['File name']) == rmfile)100 d['Duplicate'] = True101 logging.debug(102 f'Updating log table for duplicate: {rmfile}')103 except:104 logging.error(105 f'Failed to update log table for duplicate: {rmfile}')106 if rmfile.endswith('.pdf'):107 pdf_duplos = + 1108 else:109 progress['value'] += 99110 progress.update()111 progress.update_idletasks()112 style.configure(113 'text.Horizontal.TProgressbar',114 text='{:.0f} %'.format(progress['value'])115 )...
test_xlcompare.py
Source:test_xlcompare.py
1#!/usr/bin/env python32import os3import pytest4import subprocess5TESTDIR = os.path.dirname(os.path.realpath(__file__))6OLD_XLS = os.path.join(TESTDIR, 'inputs', 'old.xls')7NEW_XLS = os.path.join(TESTDIR, 'inputs', 'new.xls')8OLD_XLSX = os.path.join(TESTDIR, 'inputs', 'old.xlsx')9NEW_XLSX = os.path.join(TESTDIR, 'inputs', 'new.xlsx')10OLD_COLS_CHG = os.path.join(TESTDIR, 'inputs', 'old_columns_change.xlsx')11NEW_COLS_CHG = os.path.join(TESTDIR, 'inputs', 'new_columns_change.xlsx')12OLD_IDTEST = os.path.join(TESTDIR, 'inputs', 'old_idname.xlsx')13NEW_IDTEST = os.path.join(TESTDIR, 'inputs', 'new_idname.xlsx')14# EXPECTED = os.path.join(TESTDIR, 'expected', 'diffxls.xlsx')15OUTDIFF = os.path.join(TESTDIR, 'diff.xlsx')16SAVEDIFF1 = os.path.join(TESTDIR, 'save1.xlsx')17SAVEDIFF2 = os.path.join(TESTDIR, 'save2.xlsx')18def rmfile(filepath):19 """Remove file if it exists."""20 if os.path.isfile(filepath):21 os.remove(filepath)22def verify_common(cmd):23 result = subprocess.run(cmd, capture_output=True, text=True)24 assert result.stderr == ''25 assert 'Generated' in result.stdout26 assert 'Done.' in result.stdout27 assert os.path.isfile(OUTDIFF)28 return result29# Test basic entry point to xlcompare30def test_entrypoint():31 exit_status = os.system('xlcompare --help')32 assert exit_status == 033# Test comparison of .xls vs .xls34def test_xls_vs_xls():35 rmfile(OUTDIFF)36 cmd = ['xlcompare', OLD_XLS, NEW_XLS, '-o', OUTDIFF]37 result = verify_common(cmd)38 assert 'Deleted rows: 2' in result.stdout39 assert 'Modified rows: 3' in result.stdout40 # save for visual check, no easy way to automate41 rmfile(SAVEDIFF1)42 os.rename(OUTDIFF, SAVEDIFF1)43 rmfile(OUTDIFF)44# Test comparison of .xlsx vs .xlsx45def test_xlsx_vs_xlsx():46 rmfile(OUTDIFF)47 cmd = ['xlcompare', OLD_XLSX, NEW_XLSX, '-o', OUTDIFF]48 result = verify_common(cmd)49 assert 'Deleted rows: 2' in result.stdout50 assert 'Modified rows: 3' in result.stdout51 # save for visual check, no easy way to automate52 rmfile(SAVEDIFF2)53 os.rename(OUTDIFF, SAVEDIFF2)54 rmfile(OUTDIFF)55# Test comparison of .xls vs .xlsx56def test_xls_vs_xlsx():57 rmfile(OUTDIFF)58 cmd = ['xlcompare', OLD_XLS, NEW_XLSX, '-o', OUTDIFF]59 result = verify_common(cmd)60 assert 'Deleted rows: 2' in result.stdout61 assert 'Modified rows: 3' in result.stdout62 rmfile(OUTDIFF)63# Test comparison of .xlsx vs .xls64def test_xlsx_vs_xls():65 rmfile(OUTDIFF)66 cmd = ['xlcompare', OLD_XLSX, NEW_XLS, '-o', OUTDIFF]67 result = verify_common(cmd)68 assert 'Deleted rows: 2' in result.stdout69 assert 'Modified rows: 3' in result.stdout70 rmfile(OUTDIFF)71# Test comparison of column insertion/deletion72def test_col_change():73 rmfile(OUTDIFF)74 cmd = ['xlcompare', OLD_COLS_CHG, NEW_COLS_CHG, '-o', OUTDIFF]75 result = verify_common(cmd)76 assert 'No differences in common columns found' in result.stdout77 # Verify deleted columns - column output not ordered78 assert "Columns in old but not new:" in result.stdout79 assert "'Delete1'" in result.stdout80 assert "'Delete2'" in result.stdout81 assert "'Delete3'" in result.stdout82 # Verify inserted column83 assert "Columns in new but not old: {'Insert 1'}" in result.stdout84 rmfile(OUTDIFF)85# Tests with bad IDs86def test_bad_id_xls():87 rmfile(OUTDIFF)88 cmd = ['xlcompare', OLD_XLS, NEW_XLS, '-o', OUTDIFF, '--id', 'BAD_ID']89 result = subprocess.run(cmd, capture_output=True, text=True)90 assert 'ERROR: Column BAD_ID not found in' in result.stdout91 assert result.returncode == 192 assert not os.path.isfile(OUTDIFF)93def test_bad_id_xlsx():94 rmfile(OUTDIFF)95 cmd = ['xlcompare', OLD_XLSX, NEW_XLSX, '-o', OUTDIFF, '--id', 'BAD_ID']96 result = subprocess.run(cmd, capture_output=True, text=True)97 assert 'ERROR: Column BAD_ID not found in' in result.stdout98 assert result.returncode == 199 assert not os.path.isfile(OUTDIFF)100def test_non_existent_id():101 rmfile(OUTDIFF)102 cmd = ['xlcompare', OLD_IDTEST, NEW_IDTEST, '-o', OUTDIFF]103 result = subprocess.run(cmd, capture_output=True, text=True)104 assert 'ERROR: Column ID not found in' in result.stdout105 assert result.returncode == 1106 assert not os.path.isfile(OUTDIFF)107# Test with non-standard good ID108def test_non_standard_good_id():109 rmfile(OUTDIFF)110 cmd = ['xlcompare', OLD_IDTEST, NEW_IDTEST, '-o', OUTDIFF, '--id', 'REQID']111 result = verify_common(cmd)112 assert 'Deleted rows: 2' in result.stdout113 assert 'Modified rows: 3' in result.stdout...
clearn_deploy_target.py
Source:clearn_deploy_target.py
1#!/usr/bin/env python2# -*- coding: utf-8 -*-34import shutil5import os67print("clearn _docs_repo...")89def safe_rmtree(path):10 if os.path.isdir(path):11 shutil.rmtree(path)1213def safe_rmfile(path):14 if os.path.isfile(path):15 os.remove(path)1617safe_rmtree("_docs_repo/api")18safe_rmtree("_docs_repo/articles")19safe_rmtree("_docs_repo/fonts")20safe_rmtree("_docs_repo/img")21safe_rmtree("_docs_repo/styles")22safe_rmfile("_docs_repo/logo.svg")23safe_rmfile("_docs_repo/manifest.json")24safe_rmfile("_docs_repo/Readme.html")25safe_rmfile("_docs_repo/search-stopwords.json")26safe_rmfile("_docs_repo/toc.html")27safe_rmfile("_docs_repo/xrefmap.yml")28safe_rmfile("_docs_repo/favicon.ico")29safe_rmfile("_docs_repo/index.html")
...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!