Best Python code snippet using localstack_python
s3_cache_store.py
Source:s3_cache_store.py
1from __future__ import division, print_function, absolute_import2import fnmatch3from lazy_import import lazy_module4boto3 = lazy_module("boto3")5import podpac6from podpac.core.settings import settings7from podpac.core.cache.utils import CacheException, CacheWildCard8from podpac.core.cache.file_cache_store import FileCacheStore9class S3CacheStore(FileCacheStore): # pragma: no cover10 cache_mode = "s3"11 cache_modes = set(["s3", "all"])12 _limit_setting = "S3_CACHE_MAX_BYTES"13 _delim = "/"14 def __init__(self, s3_bucket=None, aws_region_name=None, aws_access_key_id=None, aws_secret_access_key=None):15 """Initialize a cache that uses a folder on a local disk file system.16 Parameters17 ----------18 max_size : None, optional19 Maximum allowed size of the cache store in bytes. Defaults to podpac 'S3_CACHE_MAX_BYTES' setting, or no limit if this setting does not exist.20 use_settings_limit : bool, optional21 Use podpac settings to determine cache limits if True, this will also cause subsequent runtime changes to podpac settings module to effect the limit on this cache. Default is True.22 s3_bucket : str, optional23 bucket name, overides settings24 aws_region_name : str, optional25 e.g. 'us-west-1', 'us-west-2','us-east-1'26 aws_access_key_id : str, optional27 overides podpac settings if both `aws_access_key_id` and `aws_secret_access_key` are specified28 aws_secret_access_key : str, optional29 overides podpac settings if both `aws_access_key_id` and `aws_secret_access_key` are specified30 """31 if not settings["S3_CACHE_ENABLED"]:32 raise CacheException("S3 cache is disabled in the podpac settings.")33 self._root_dir_path = settings["S3_CACHE_DIR"]34 if s3_bucket is None:35 s3_bucket = settings["S3_BUCKET_NAME"]36 if aws_access_key_id is None or aws_secret_access_key is None:37 aws_access_key_id = settings["AWS_ACCESS_KEY_ID"]38 aws_secret_access_key = settings["AWS_SECRET_ACCESS_KEY"]39 if aws_region_name is None:40 aws_region_name = settings["AWS_REGION_NAME"]41 aws_session = boto3.session.Session(region_name=aws_region_name)42 self._s3_client = aws_session.client(43 "s3",44 # config= boto3.session.Config(signature_version='s3v4'),45 aws_access_key_id=aws_access_key_id,46 aws_secret_access_key=aws_secret_access_key,47 )48 self._s3_bucket = s3_bucket49 try:50 self._s3_client.head_bucket(Bucket=self._s3_bucket)51 except Exception as e:52 raise e53 # -----------------------------------------------------------------------------------------------------------------54 # main cache API55 # -----------------------------------------------------------------------------------------------------------------56 @property57 def size(self):58 paginator = self._s3_client.get_paginator("list_objects")59 operation_parameters = {"Bucket": self._s3_bucket, "Prefix": self._root_dir_path}60 page_iterator = paginator.paginate(**operation_parameters)61 total_size = 062 for page in page_iterator:63 if "Contents" in page:64 for obj in page["Contents"]:65 total_size += obj["Size"]66 return total_size67 def cleanup(self):68 """69 Remove expired entries.70 """71 pass # TODO metadata72 # -----------------------------------------------------------------------------------------------------------------73 # helper methods74 # -----------------------------------------------------------------------------------------------------------------75 def search(self, node, item=CacheWildCard(), coordinates=CacheWildCard()):76 """Fileglob to match files that could be storing cached data for specified node,key,coordinates77 Parameters78 ----------79 node : podpac.core.node.Node80 item : str, CacheWildCard81 CacheWildCard indicates to match any key82 coordinates : podpac.core.coordinates.coordinates.Coordinates, CacheWildCard, None83 CacheWildCard indicates to macth any coordinates84 Returns85 -------86 TYPE : str87 Fileglob of existing paths that match the request88 """89 delim = self._delim90 prefix = self._get_node_dir(node)91 prefix = prefix if prefix.endswith(delim) else prefix + delim92 response = self._s3_client.list_objects_v2(Bucket=self._s3_bucket, Prefix=prefix, Delimiter=delim)93 if response["KeyCount"] > 0:94 obj_names = [o["Key"].replace(prefix, "") for o in response["Contents"]]95 else:96 obj_names = []97 node_dir = self._get_node_dir(node)98 obj_names = fnmatch.filter(obj_names, self._get_filename_pattern(node, item, coordinates))99 paths = [self._path_join(node_dir, filename) for filename in obj_names]100 return paths101 # -----------------------------------------------------------------------------------------------------------------102 # file storage abstraction103 # -----------------------------------------------------------------------------------------------------------------104 def _save(self, path, s, metadata=None):105 # note s needs to be b'bytes' or file below106 # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.put_object107 self._s3_client.put_object(Bucket=self._s3_bucket, Body=s, Key=path)108 # TODO metadata109 def _load(self, path):110 response = self._s3_client.get_object(Bucket=self._s3_bucket, Key=path)111 return response["Body"].read()112 def _path_join(self, *paths):113 return self._delim.join(paths)114 def _basename(self, path):115 if self._delim in path:116 dirname, basename = path.rsplit(self._delim, 1)117 else:118 basename = path119 return basename120 def _remove(self, path):121 self._s3_client.delete_object(Bucket=self._s3_bucket, Key=path)122 def _exists(self, path):123 response = self._s3_client.list_objects_v2(Bucket=self._s3_bucket, Prefix=path)124 obj_count = response["KeyCount"]125 return obj_count == 1 and response["Contents"][0]["Key"] == path126 def _make_dir(self, path):127 # Does not need to do anything for S3 as the prefix is just part of the object name.128 # note: I believe AWS uses prefixes to decide how to partition objects in a bucket which could affect performance.129 pass130 def _rmtree(self, path):131 paginator = self._s3_client.get_paginator("list_objects_v2")132 pages = paginator.paginate(Bucket=self._s3_bucket, Prefix=path)133 to_delete = dict(Objects=[])134 for item in pages.search("Contents"):135 if item:136 to_delete["Objects"].append(dict(Key=item["Key"]))137 if len(to_delete["Objects"]) >= 1000:138 self._s3_client.delete_objects(Bucket=self._s3_bucket, Delete=to_delete)139 to_delete = dict(Objects=[])140 if len(to_delete["Objects"]):141 self._s3_client.delete_objects(Bucket=self._s3_bucket, Delete=to_delete)142 def _is_empty(self, directory):143 if not directory.endswith(self._delim):144 directory += self._delim145 response = self._s3_client.list_objects_v2(Bucket=self._s3_bucket, Prefix=directory, MaxKeys=2)146 # TODO throw an error if key count is zero as this indicates `directory` is not an existing directory.147 return response["KeyCount"] == 1148 def _rmdir(self, directory):149 # s3 can have "empty" directories150 # should check if directory is empty and the delete151 # delete_objects could be used if recursive=True is specified to this function.152 # NOTE: This can remove the object representing the prefix without deleting other objects with the prefix.153 # This is because s3 is really a key/value store. This function should maybe be changed to throw an154 # error if the prefix is not "empty" or should delete all objects with the prefix. The former would155 # be more consistent with the os function used in the DiskCacheStore.156 # ToDo: 1) examine boto3 response, 2) handle object versioning (as it stands this will apply to the "null version")157 # https://boto3.amazonaws.com/v1/documentation/api/latest/reference/services/s3.html#S3.Client.delete_object158 if not directory.endswith(self._delim):159 directory += self._delim160 self._s3_client.delete_object(Bucket=self._s3_bucket, Key=directory)161 def _dirname(self, path):162 dirname, basename = path.rsplit(self._delim, 1)163 return dirname164 def _get_metadata(self, path, key):165 return None # TODO metadata166 def _set_metadata(self, path, key, value):...
cloudstorage.py
Source:cloudstorage.py
1"""2Cloud Storage integration3.. include common links, assuming primary doc root is up one directory4.. include:: ../include/links.rst5"""6import urllib7import fnmatch8import os9try:10 import boto311 import smart_open12 _s3_support_enabled = True13except:14 _s3_support_enabled = False15_supported_cloud_schemes = ['s3']16_s3_client = None17def initialize_cloud_storage(cloud_uri, args):18 scheme = urllib.parse.urlsplit(cloud_uri)[0]19 if scheme == 's3' and _s3_support_enabled:20 session = boto3.Session()21 global _s3_client22 _s3_client = session.client('s3', endpoint_url = args.endpoint_url)23 elif scheme == '':24 raise ValueError("Cannot initialize cloud storage, {cloud_uri} is not a URI")25 else:26 raise RuntimeError("Cannot initialize cloud storage, {scheme} is not a supported cloud URI scheme.")27def is_cloud_uri(potential_uri):28 scheme = urllib.parse.urlsplit(potential_uri)[0]29 if scheme in _supported_cloud_schemes:30 return True31 else:32 return False33def list_objects(cloud_uri, patterns):34 parsed_uri = urllib.parse.urlsplit(cloud_uri) 35 if parsed_uri.scheme == 's3' and _s3_client is not None:36 bucket = parsed_uri.netloc37 key = parsed_uri.path.lstrip('/')38 paginator = _s3_client.get_paginator('list_objects_v2')39 for page in paginator.paginate(Bucket=bucket, Prefix=key):40 for object in page['Contents']:41 matches = False42 for pattern in patterns:43 if fnmatch.fnmatchcase(object['Key'], pattern):44 yield (parsed_uri.scheme + '://' + bucket + '/' + object['Key'], object['Size'])45 elif parsed_uri.scheme == '':46 raise ValueError("Cannot initialize cloud storage, {cloud_uri} is not a URI")47 else:48 raise RuntimeError("Cannot initialize cloud storage, {scheme} is not a supported cloud URI scheme.")49def open(cloud_uri,mode="r"):50 if _s3_support_enabled and _s3_client is not None:51 return smart_open.open(cloud_uri, mode, transport_params = {'client': _s3_client})52 else:53 raise RuntimeError(f"Cannot open {cloud_uri} from cloud storage because cloud storage has not been initialized")54def copy(source_uri, dest_uri):55 if _s3_client is None:56 raise RuntimeError(f"Cannot copy {source_uri} to {dest_uri}: S3 cloud storage has not been initialized.")57 parsed_source = urllib.parse.urlsplit(source_uri) 58 parsed_dest = urllib.parse.urlsplit(dest_uri)59 if parsed_source.scheme != 's3' or parsed_dest.scheme != 's3':60 raise ValueError(f"Cannot copy {source_uri} to {dest_uri}: only S3 to S3 copies are supported.")61 source = {'Bucket': parsed_source.netloc, 'Key': parsed_source.path.lstrip('/')}62 _s3_client.copy(source, parsed_dest.netloc, parsed_dest.path.lstrip('/'))63def upload(source_file, dest_uri):64 if _s3_client is None:65 raise RuntimeError(f"Cannot upload {source_file} to {dest_uri}: S3 cloud storage has not been initialized.")66 parsed_dest = urllib.parse.urlsplit(dest_uri)67 if parsed_dest.scheme != 's3':68 raise ValueError(f"Cannot upload to {dest_uri}: only S3 uploads are supported.")69 _s3_client.upload_file(source_file, parsed_dest.netloc, parsed_dest.path.lstrip('/'))70def download(source_uri, dest_dir):71 if _s3_client is None:72 raise RuntimeError(f"Cannot download {source_uri} to {dest_dir}: S3 cloud storage has not been initialized.")73 parsed_source = urllib.parse.urlsplit(source_uri)74 if parsed_source.scheme != 's3':75 raise ValueError(f"Cannot download from {source_uri}: only S3 uploads are supported.")76 dest_file = os.path.join(dest_dir, os.path.basename(parsed_source.path))77 _s3_client.download_file(parsed_source.netloc, parsed_source.path.lstrip('/'), dest_file )...
s3.py
Source:s3.py
1import json2import boto33_s3_client = boto3.client('s3')4def read_json_file(bucket, key):5 """6 Reads a JSON file from s3.7 :params bucket: The name of the bucket where the source file is at.8 :params key: The full file name of the object to read.9 """10 response = _s3_client.get_object(11 Bucket=bucket,12 Key=key13 )14 return json.load(response['Body'])15def write_json_file(bucket, key, data):16 """17 Write a JSON file to an s3 bucket.18 :params bucket: The name of the bucket where the source file is at.19 :params key: The full file name of the object to write.20 :param data: The json object to add to the bucket.21 """22 return _s3_client.put_object(23 Bucket=bucket,24 Key=key,25 Body=json.dumps(data).encode()...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!