Best Python code snippet using avocado_python
hash_compute.py
Source:hash_compute.py
1import hashlib2from typing import Dict, Tuple, List3from collections import namedtuple, defaultdict4import csv5import os6FileChecksum = namedtuple(7 "FileChecksum",8 # checksum, source_file and source_checksum are optional.9 field_names=("checksum", "source", "source_checksum"),10)11ModifiedFiles = namedtuple(12 "ModifiedFiles",13 field_names=["filename", "is_source", "checksum"]14)15FileList = List[str]16def md5sum(filename: str, blocksize: int = 65536) -> str:17 """ Compute the checksum of {filename} in chunks of {blocksize} bits18 :param filename: Filename19 :param blocksize: Size (in bits) for blocks [Avoid memory error]20 :return: Checksum as strin21 >>> md5sum("tests/test_checksum/file1.txt")22 'de0fa52e7c33b285257b47ff21e7b2f8'23 >>> md5sum("tests/test_checksum/file2.txt", blocksize=128)24 '7cbcc25fca397efe06506a1dc0d55a9e'25 """26 _hash = hashlib.md5()27 with open(filename, "rb") as f:28 for block in iter(lambda: f.read(blocksize), b""):29 _hash.update(block)30 return _hash.hexdigest()31def check_md5sum(filenames: Dict[str, str], blocksize: int = 65536) -> Dict[str, Tuple[str, bool]]:32 """ Compute the checksum of {filename} in chunks of {blocksize} bits33 :param filenames: Dictionary of filenames -> Checksum34 :param blocksize: Size (in bits) for blocks [Avoid memory error]35 :return: Dictionary(Filepath -> (New Hash, Bool of status if changed hash))36 >>> check_md5sum({"tests/test_checksum/file1.txt": 'de0fa52e7c33b285257b47ff21e7b2f8'})37 {'tests/test_checksum/file1.txt': ('de0fa52e7c33b285257b47ff21e7b2f8', True)}38 >>> check_md5sum({"tests/test_checksum/file2.txt": "not_the_right_checksum"})39 {'tests/test_checksum/file2.txt': ('7cbcc25fca397efe06506a1dc0d55a9e', False)}40 """41 out = {}42 for file, _hash in filenames.items():43 if not os.path.exists(file):44 print(f"File not found : {file}")45 continue46 new_hash = md5sum(file, blocksize=blocksize)47 out[file] = (new_hash, new_hash == _hash)48 return out49def read_checksum_csv(filepath: str) -> Dict[str, FileChecksum]:50 """ Read the file at {filepath} and parse its content.51 :param filepath: CSV containing at least a key `input` but additionaly checksum, source and source_checksum52 :return: Parsed content as a dict with FileChecksum information53 >>> checksums = read_checksum_csv("tests/test_checksum/checksums.csv")54 >>> checksums["tests/test_checksum/file1.txt"] == FileChecksum(55 ... checksum="de0fa52e7c33b285257b47ff21e7b2f8", source=None, source_checksum=None)56 True57 >>> checksums["tests/test_checksum/file2.txt"] == FileChecksum(58 ... checksum="fake_checksum",59 ... source="tests/test_checksum/source2.txt", source_checksum="c7289364c9c6fcd3c5e6974edfd3abb7")60 True61 """62 out = {}63 with open(filepath) as f:64 reader = csv.DictReader(f)65 for line in reader:66 if not os.path.exists(line["input"]):67 print(f"File not found : {line['input']}")68 continue69 out[line["input"]] = FileChecksum(70 checksum=line.get("checksum"),71 source=line.get("source"),72 source_checksum=line.get("source_checksum")73 )74 return out75def check_checksum_from_file(filepath) -> Tuple[FileList, FileList]:76 """ Given a CSV of input and input's source file, track files which needs relemmatization.77 :param filepath: CSV containing at least a key `input` but additionaly checksum, source and source_checksum78 :return: (List of file needing relemmatization, List of sources which changed)79 >>> check_checksum_from_file("./tests/test_checksum/checksums.csv")80 (['tests/test_checksum/file2.txt', 'tests/test_checksum/file3.txt'], \81['tests/test_checksum/source3.txt'])82 """83 checksum_dict = read_checksum_csv(filepath)84 sources: Dict[str, str] = {}85 inputs: Dict[str, str] = {}86 source_to_input: Dict[str, List[str]] = defaultdict(list) # Source filepath to input filepath map87 for filepath, filechecksum in checksum_dict.items():88 # Register the plain-text file itself89 inputs[filepath] = filechecksum.checksum90 # Register its source and map it to the plain-text file91 # one source can create multiple files, so we have a list there.92 if filechecksum.source:93 sources[filechecksum.source] = filechecksum.source_checksum94 source_to_input[filechecksum.source].append(filepath)95 inputs: Dict[str, Tuple[str, bool]] = check_md5sum(inputs)96 sources: Dict[str, Tuple[str, bool]] = check_md5sum(sources)97 need_relemmatization: List[str] = []98 changed_sources: List[str] = []99 # Check sources files then100 for source_file, (_, status) in sources.items():101 if not status:102 need_relemmatization.extend(source_to_input[source_file])103 changed_sources.append(source_file)104 # Check plain-text files first105 for input_file, (_, status) in inputs.items():106 # Only add it if it was not already necessary because of source_file107 if input_file not in need_relemmatization and not status:108 need_relemmatization.append(input_file)109 return sorted(need_relemmatization), sorted(changed_sources)110def write_csv_checksums(filepath: str, lemmatized_files: List[str], _write=True, update_source=False,111 ) -> Tuple[List[List[str]], List[ModifiedFiles]]:112 """113 :param filepath: CSV file containing the map114 :param lemmatized_files: File which have been lemmatized115 :param update_source: Update the source hash116 :param _write: Whether to write or not117 :return: CSV content as well as the list of modified files and their type118 >>> write_csv_checksums("./tests/test_checksum/checksums.csv", ['tests/test_checksum/file2.txt'], _write=False) == (119 ... [120 ... ['input', 'checksum', 'source', 'source_checksum'],121 ... ['tests/test_checksum/file1.txt', 'de0fa52e7c33b285257b47ff21e7b2f8', '', ''],122 ... ['tests/test_checksum/file2.txt', '7cbcc25fca397efe06506a1dc0d55a9e',123 ... 'tests/test_checksum/source2.txt', 'c7289364c9c6fcd3c5e6974edfd3abb7'],124 ... ['tests/test_checksum/file3.txt', 'ed7c77f2ed00549a0219525d120a72be',125 ... 'tests/test_checksum/source3.txt', 'fake_checksum']126 ... ],127 ... [128 ... ModifiedFiles('tests/test_checksum/file2.txt', checksum='7cbcc25fca397efe06506a1dc0d55a9e',129 ... is_source=False),130 ... ModifiedFiles('tests/test_checksum/source3.txt', checksum='d8203a2bd71947993eb72826b08eb11d',131 ... is_source=True)132 ... ]133 ... )134 True135 """136 checksum_dict = read_checksum_csv(filepath)137 rows = [138 ["input", "checksum", "source", "source_checksum"]139 ]140 modified_files = []141 sources_checksums: Dict[str, str] = {}142 for file, checksum in checksum_dict.items():143 # If we did not relemmatize, plainly add it to the new file144 if file not in lemmatized_files:145 _hash = checksum.checksum146 else:147 _hash = md5sum(file)148 modified_files.append(ModifiedFiles(149 filename=file,150 is_source=False,151 checksum=_hash152 ))153 source_hash = None154 if checksum.source:155 if checksum.source in sources_checksums:156 source_hash = sources_checksums[checksum.source]157 else:158 source_hash = sources_checksums[checksum.source] = md5sum(checksum.source)159 if checksum.source_checksum != source_hash:160 modified_files.append(ModifiedFiles(161 filename=checksum.source,162 is_source=True,163 checksum=source_hash164 ))165 new_source_checksum = checksum.source_checksum166 if update_source:167 new_source_checksum = source_hash168 rows.append([169 file, _hash,170 checksum.source or "",171 new_source_checksum or ""172 ])173 if _write:174 with open(filepath, "w") as f:175 writer = csv.writer(f)176 writer.writerows(rows)...
test_storage.py
Source:test_storage.py
1# -*- coding: utf-8 -*-2from unittest import TestCase3from scrapy.settings import Settings4from scrapy_tracker.storage.memory import MemoryStorage5from scrapy_tracker.storage.redis import RedisStorage6from scrapy_tracker.storage.sqlalchemy import SqlAlchemyStorage7from tests import TEST_KEY, TEST_CHECKSUM, mock8class TestMemoryStorage(TestCase):9 def setUp(self):10 self.storage = MemoryStorage(None)11 def test_getset(self):12 result = self.storage.getset(TEST_KEY, TEST_CHECKSUM)13 self.assertIsNone(result)14 found = self.storage.getset(TEST_KEY, 'new_checksum')15 self.assertEqual(TEST_CHECKSUM, found)16 found = self.storage.getset(TEST_KEY, TEST_CHECKSUM)17 self.assertEqual('new_checksum', found)18 result = self.storage.getset('new_key', TEST_CHECKSUM)19 self.assertIsNone(result)20class TestSqlAlchemyStorage(TestCase):21 def setUp(self):22 self.storage = SqlAlchemyStorage(Settings({23 'TRACKER_SQLALCHEMY_ENGINE': 'sqlite:///:memory:',24 'TRACKER_SQLALCHEMY_FLUSH_DB': True25 }))26 def test_getset(self):27 result = self.storage.getset(TEST_KEY, TEST_CHECKSUM)28 self.assertIsNone(result)29 found = self.storage.getset(TEST_KEY, 'new_checksum')30 self.assertEqual(TEST_CHECKSUM, found)31 found = self.storage.getset(TEST_KEY, TEST_CHECKSUM)32 self.assertEqual('new_checksum', found)33 result = self.storage.getset('new_key', TEST_CHECKSUM)34 self.assertIsNone(result)35class TestRedisStorage(TestCase):36 def setUp(self):37 with mock.patch("scrapy_tracker.storage.redis.StrictRedis") as mock_redis:38 data = {}39 def getset(key, val):40 old_val = data.get(key)41 data[key] = val42 return old_val43 mock_getset = mock.MagicMock()44 mock_getset.getset.side_effect = getset45 mock_redis.return_value = mock_getset46 self.storage = RedisStorage(Settings({47 'TRACKER_RADIS_FLUSH_DB': True48 }))49 def test_getset(self):50 result = self.storage.getset(TEST_KEY, TEST_CHECKSUM)51 self.assertIsNone(result)52 found = self.storage.getset(TEST_KEY, 'new_checksum')53 self.assertEqual(TEST_CHECKSUM, found)54 found = self.storage.getset(TEST_KEY, TEST_CHECKSUM)55 self.assertEqual('new_checksum', found)56 result = self.storage.getset('new_key', TEST_CHECKSUM)...
test_util.py
Source:test_util.py
1#!/usr/bin/python2# -*- coding: iso-8859-15 -*-3"""4Test module for s3yum.util5"""6import logging7import unittest8import sys9import io10import hashlib11import datetime12from mock import (13 MagicMock,14 patch,15 )16from s3yum.util import (17 s3join,18 get_s3item_md5,19 md5_matches,20 s3time_as_datetime,21 )22class TestS3YumUtil(unittest.TestCase):23 """24 Test s3yum utility methods25 """26 def test_s3join(self):27 """28 Verify that s3 pathing is assembled correctly29 """30 self.assertEqual(s3join('a','b','c'),'a/b/c')31 # Double slashes should become single slashes:32 self.assertEqual(s3join('a/','/b/','/c'),'a/b/c')33 # Verify that trailing / is preserved:34 self.assertEqual(s3join('a','b','c/'),'a/b/c/')35 return36 def test_s3md5(self):37 """38 Verify that the s3 item md5 is always available39 """40 test_checksum = 'FakeMD5'41 mock_item1 = MagicMock()42 mock_item1.md5 = test_checksum43 mock_item2 = MagicMock()44 mock_item2.md5 = None45 mock_item2.etag = test_checksum46 mock_item3 = MagicMock()47 mock_item3.md5 = None48 mock_item3.etag = '"{}"'.format(test_checksum)49 self.assertEqual(get_s3item_md5(mock_item1),test_checksum)50 self.assertEqual(get_s3item_md5(mock_item2),test_checksum)51 self.assertEqual(get_s3item_md5(mock_item3),test_checksum)52 def test_md5_matching(self):53 """54 Verify that md5 matching works properly55 """56 filepath = 'my/random/file'57 file_contents = 'Random file contents'58 checksum = hashlib.md5(file_contents).hexdigest()59 m = MagicMock(spec=file,60 return_value=io.BytesIO(file_contents))61 with patch('s3yum.util.open', m, create=True):62 self.assertTrue(md5_matches(filepath,checksum))63 m.assert_called_once_with(filepath, 'r')64 return65 def test_s3_timestamp(self):66 """67 Verify that both kinds of s3 timestamps are handled properly68 """69 ts1 = 'Wed, 12 Oct 2009 17:50:00 GMT'70 self.assertEqual(s3time_as_datetime(ts1),71 datetime.datetime(2009,10,12,17,50))72 ts2 = '2015-07-08T14:50:48.000Z'73 self.assertEqual(s3time_as_datetime(ts2),74 datetime.datetime(2015,7,8,14,50,48))75 return76if __name__ == '__main__':77 logging.basicConfig(stream=sys.stderr)78 logging.getLogger().setLevel(logging.DEBUG)...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!