Best Python code snippet using tempest_python
app_kfold_sklearn.py
Source:app_kfold_sklearn.py
1# -*- coding: utf-8 -*-2from tweets import tweets_preprocessor3from models import Sklearn4from sklearn.model_selection import cross_val_score5from utils import log, save_classifier, log_classifier6from data import train_data, test_data7from sklearn.model_selection import StratifiedKFold8import uuid9########################################################################################################################10PREPROCESSING_ALGORITHMS = {11 '1258a9d2-111e-4d4a-acda-852dd7ba3e88': {12 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,13 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,14 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,15 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,16 'join': True},17 '60314ef9-271d-4865-a7db-6889b1670f59': {18 'add_link_flag': False, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': False,19 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,20 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,21 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,22 'join': True},23 '4c2e484d-5cb8-4e3e-ba7b-679ae7a73fca': {24 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,25 'add_keyword_flag': True, 'add_location_flag': True, 'remove_links': True, 'remove_users': True,26 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,27 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,28 'join': True},29 '8b7db91c-c8bf-40f2-986a-83a659b63ba6': {30 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': False,31 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,32 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,33 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,34 'join': True},35 '7bc816a1-25df-4649-8570-0012d0acd72a': {36 'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,37 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,38 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,39 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,40 'join': True},41 'a85c8435-6f23-4015-9e8c-19547222d6ce': {42 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,43 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,44 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': False,45 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,46 'join': True},47 'b054e509-4f04-44f2-bcf9-14fa8af4eeed': {48 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,49 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,50 'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': False,51 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,52 'join': True},53 '2e359f0b-bfb9-4eda-b2a4-cd839c122de6': {54 'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,55 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,56 'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': False,57 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,58 'join': True},59 '71bd09db-e104-462d-887a-74389438bb49': {60 'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,61 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,62 'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': True,63 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,64 'join': True},65 'd3cc3c6e-10de-4b27-8712-8017da428e41': {66 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,67 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,68 'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': True,69 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,70 'join': True}71}72VECTORIZERS = [73 {74 'TYPE': 'TFIDF',75 'OPTIONS': {76 'binary': True,77 'ngram_range': (1, 1)78 }79 },80 {81 'TYPE': 'COUNT',82 'OPTIONS': {83 'binary': True,84 'ngram_range': (1, 1)85 }86 },87 {88 'TYPE': 'TFIDF',89 'OPTIONS': {90 'binary': True,91 'ngram_range': (1, 2)92 }93 },94 {95 'TYPE': 'COUNT',96 'OPTIONS': {97 'binary': True,98 'ngram_range': (1, 2)99 }100 },101 {102 'TYPE': 'TFIDF',103 'OPTIONS': {104 'binary': True,105 'ngram_range': (1, 3)106 }107 },108 {109 'TYPE': 'COUNT',110 'OPTIONS': {111 'binary': True,112 'ngram_range': (1, 3)113 }114 }115]116CLASSIFIERS = [117 {118 'TYPE': 'RIDGE',119 'OPTIONS': {}120 },121 {122 'TYPE': 'LOGISTIC_REGRESSION',123 'OPTIONS': {}124 },125 # {126 # 'TYPE': 'RANDOM_FOREST',127 # 'OPTIONS': {}128 # },129 # {130 # 'TYPE': 'DECISION_TREE',131 # 'OPTIONS': {}132 # },133 {134 'TYPE': 'SVC',135 'OPTIONS': {}136 },137 # {138 # 'TYPE': 'SGD',139 # 'OPTIONS': {}140 # }141]142SEED = 7143KFOLD = 10144for VECTORIZER in VECTORIZERS:145 for CLASSIFIER in CLASSIFIERS:146 for algorithm_id, preprocessing_algorithm in PREPROCESSING_ALGORITHMS.items():147 LOG_DICT = {148 'UUID': str(uuid.uuid4()),149 'PREPROCESSING_ALGORITHM_UUID': algorithm_id,150 'PREPROCESSING_ALGORITHM': preprocessing_algorithm,151 'VECTORIZER': VECTORIZER,152 'CLASSIFIER': CLASSIFIER,153 'KFOLD_HISTORY': []154 }155 kfold = StratifiedKFold(n_splits=KFOLD, shuffle=True, random_state=SEED)156 train_data['preprocessed'] = tweets_preprocessor.preprocess(157 train_data.text,158 preprocessing_algorithm,159 keywords=train_data.keyword,160 locations=train_data.location161 )162 test_data['preprocessed'] = tweets_preprocessor.preprocess(163 test_data.text,164 preprocessing_algorithm,165 keywords=test_data.keyword,166 locations=test_data.location167 )168 inputs = train_data['preprocessed']169 targets = train_data['target']170 k = 0171 for train, validation in kfold.split(inputs, targets):172 vectorizer = Sklearn.VECTORIZERS[VECTORIZER['TYPE']](**VECTORIZER['OPTIONS'])173 x_train = vectorizer.fit_transform(inputs[train]).todense()174 y_train = targets[train]175 x_val = vectorizer.transform(inputs[validation]).todense()176 y_val = targets[validation]177 x_test = vectorizer.transform(test_data.preprocessed).todense()178 y_test = test_data.target.values179 classifier = Sklearn.CLASSIFIERS[CLASSIFIER['TYPE']](**CLASSIFIER['OPTIONS'])180 try:181 classifier.fit(x_train, y_train)182 train_score = round(classifier.score(x_train, y_train), 6)183 val_score = round(classifier.score(x_val, y_val), 6)184 test_score = round(classifier.score(x_test, y_test), 6)185 history = {186 'train_score': train_score,187 'val_score': val_score,188 'test_score': test_score189 }190 except:191 history = 'error'192 LOG_DICT['KFOLD_HISTORY'].append(history)193 print(CLASSIFIER['TYPE'], VECTORIZER, history, k)194 log_classifier(LOG_DICT)...
aws_s3_tile_cp.py
Source:aws_s3_tile_cp.py
1# coding=utf-82import argparse3import botocore4import boto35import datetime6import hashlib7import logging8import mercantile9import os10import pytz11import time12import threading13from mercantile import Tile14from multiprocessing.dummy import Pool15logging.basicConfig(level=logging.INFO)16# Quieting boto messages down a little17logging.getLogger('boto3.resources.action').setLevel(logging.WARNING)18logging.getLogger('botocore').setLevel(logging.WARNING)19logging.getLogger('marblecutter').setLevel(logging.WARNING)20logging.getLogger('marblecutter.mosaic').setLevel(logging.WARNING)21logging.getLogger('marblecutter.sources').setLevel(logging.WARNING)22logger = logging.getLogger('batchtiler')23if os.environ.get('VERBOSE'):24 logger.setLevel(logging.DEBUG)25THREAD_LOCAL = threading.local()26def initialize_thread():27 # Each thread needs its own boto3 Session object -Â it's not threadsafe28 THREAD_LOCAL.boto_session = boto3.session.Session()29 THREAD_LOCAL.s3_client = THREAD_LOCAL.boto_session.client('s3')30# Don't copy tiles that exist and are newer than this cutoff datetime31# in yyyy-mm-ddThh:mm:ss format (in UTC)32CUTOFF_DATE = pytz.UTC.localize(datetime.datetime.strptime(33 os.environ.get('CUTOFF_DATE'),34 '%Y-%m-%dT%H:%M:%S'35)) if os.environ.get('CUTOFF_DATE') else None36# Only copy these tile types37ONLY_COPY = os.environ.get('ONLY_COPY').split(',') if os.environ.get('ONLY_COPY') else None38# The number of threads in the pool communicating with AWS39POOL_SIZE = int(os.environ.get('POOL_SIZE', '12'))40POOL = Pool(POOL_SIZE, initializer=initialize_thread)41RENDER_COMBINATIONS = [42 ("normal", ".png"),43 ("terrarium", ".png"),44 ("geotiff", ".tif"),45]46def s3_key(key_prefix, tile_type, tile, key_suffix, include_hash):47 key = '{}/{}/{}/{}{}'.format(48 tile_type,49 tile.z,50 tile.x,51 tile.y,52 key_suffix,53 )54 if include_hash:55 h = hashlib.md5(key).hexdigest()[:6]56 key = '{}/{}'.format(57 h,58 key,59 )60 if key_prefix:61 key = '{}/{}'.format(key_prefix, key)62 return key63def head_object(s3, bucket, key):64 """ Head the given object and return the result if it exists.65 Returns `None` if it doesn't exist. """66 try:67 return s3.head_object(68 Bucket=bucket,69 Key=key,70 )71 except botocore.exceptions.ClientError as e:72 # If a client error is thrown, then check that it was a 404 error.73 # If it was a 404 error, then the object does not exist.74 error_code = int(e.response['Error']['Code'])75 if error_code == 404:76 return None77 raise78def copy_tile(tile, remove_hash, from_s3, to_s3):79 from_bucket, from_prefix = from_s380 to_bucket, to_prefix = to_s381 s3 = THREAD_LOCAL.s3_client82 for (type, ext) in RENDER_COMBINATIONS:83 from_key = s3_key(from_prefix, type, tile, ext, remove_hash)84 to_key = s3_key(to_prefix, type, tile, ext, False)85 if ONLY_COPY and type not in ONLY_COPY:86 logger.debug(87 'Skipping copy to s3://%s/%s because '88 'type %s not in %s',89 to_bucket, to_key,90 type, ONLY_COPY,91 )92 continue93 tries = 094 wait = 1.095 while True:96 try:97 tries += 198 if CUTOFF_DATE:99 # Check if the tile that we're copying to already exists100 # and if its newer than the specified cutoff date101 obj_head_resp = head_object(s3, to_bucket, to_key)102 if obj_head_resp and obj_head_resp['LastModified'] >= CUTOFF_DATE:103 logger.debug(104 'Skipping copy to s3://%s/%s because '105 'last modified %s >= %s',106 to_bucket, to_key,107 obj_head_resp['LastModified'].isoformat(),108 CUTOFF_DATE.isoformat(),109 )110 # This is a break (instead of a return) so that we111 # continue with the outer for loop112 break113 s3.copy_object(114 Bucket=to_bucket,115 Key=to_key,116 CopySource={117 'Bucket': from_bucket,118 'Key': from_key,119 }120 )121 logger.debug(122 "Copied s3://%s/%s to s3://%s/%s at try %s",123 from_bucket, from_key,124 to_bucket, to_key,125 tries,126 )127 break128 except botocore.vendored.requests.exceptions.ConnectionError as e:129 logger.info(130 "%s received, try %s, while copying "131 "s3://%s/%s to s3://%s/%s, waiting %0.1f sec",132 e, tries,133 from_bucket, from_key,134 to_bucket, to_key,135 wait,136 )137 time.sleep(wait)138 wait = min(30.0, wait * 2.0)139 except botocore.exceptions.CredentialRetrievalError as e:140 logger.info(141 "%s received, try %s, while copying "142 "s3://%s/%s to s3://%s/%s, waiting %0.1f sec",143 e, tries,144 from_bucket, from_key,145 to_bucket, to_key,146 wait,147 )148 time.sleep(wait)149 wait = min(30.0, wait * 2.0)150 except botocore.exceptions.ClientError as e:151 error_code = str(e.response.get('Error', {}).get('Code'))152 if error_code in ('SlowDown', '503'):153 logger.info(154 "%s received, try %s, while copying "155 "s3://%s/%s to s3://%s/%s, waiting %0.1f sec",156 error_code, tries,157 from_bucket, from_key,158 to_bucket, to_key,159 wait,160 )161 time.sleep(wait)162 wait = min(30.0, wait * 2.0)163 elif error_code == 'NoSuchKey':164 logger.warn(165 "NoSuchKey received while copying "166 "s3://%s/%s to s3://%s/%s (skipping copy)",167 from_bucket, from_key,168 to_bucket, to_key,169 )170 break171 else:172 raise173def tile_exc_wrapper(tile, remove_hash, from_s3, to_s3):174 try:175 copy_tile(tile, remove_hash, from_s3, to_s3)176 except Exception:177 logger.exception('Error while processing tile %s', tile)178 raise179def queue_render(tile, remove_hash, from_s3, to_s3):180 logger.debug('Enqueueing render for tile %s', tile)181 POOL.apply_async(182 tile_exc_wrapper,183 args=[tile, remove_hash, from_s3, to_s3]184 )185def queue_tile(tile, max_zoom, remove_hash, from_s3, to_s3):186 queue_render(tile, remove_hash, from_s3, to_s3)187 if tile.z < max_zoom:188 for child in mercantile.children(tile):189 queue_tile(child, max_zoom, remove_hash, from_s3, to_s3)190if __name__ == "__main__":191 parser = argparse.ArgumentParser()192 parser.add_argument('x', type=int)193 parser.add_argument('y', type=int)194 parser.add_argument('zoom', type=int)195 parser.add_argument('max_zoom', type=int)196 parser.add_argument('from_bucket')197 parser.add_argument('to_bucket')198 parser.add_argument('--from_prefix')199 parser.add_argument('--to_prefix')200 parser.add_argument('--remove_hash', dest='remove_hash', action='store_true', default=False)201 args = parser.parse_args()202 root = Tile(args.x, args.y, args.zoom)203 logger.info('Copying tiles from root tile %s to zoom %s at '204 's3://%s/%s to s3://%s/%s',205 root, args.max_zoom, args.from_bucket, args.from_prefix or '',206 args.to_bucket, args.to_prefix or '')207 logger.info('Running %s processes', POOL_SIZE)208 start_time = time.time()209 queue_tile(root, args.max_zoom, args.remove_hash,210 (args.from_bucket, args.from_prefix),211 (args.to_bucket, args.to_prefix))212 est_copies = (4**(args.max_zoom - args.zoom)) * len(RENDER_COMBINATIONS)213 POOL.close()214 POOL.join()215 end_time = time.time()216 logger.info('Done processing pyramid %s to zoom %s (%d ops in %0.1f sec)',...
preprocessing_algorithms.py
Source:preprocessing_algorithms.py
1PREPROCESSING_ALGORITHMS = {2 '1258a9d2-111e-4d4a-acda-852dd7ba3e88': {3 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,4 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,5 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,6 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,7 'join': False},8 '60314ef9-271d-4865-a7db-6889b1670f59': {9 'add_link_flag': False, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': False,10 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,11 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,12 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,13 'join': False},14 '4c2e484d-5cb8-4e3e-ba7b-679ae7a73fca': {15 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,16 'add_keyword_flag': True, 'add_location_flag': True, 'remove_links': True, 'remove_users': True,17 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,18 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,19 'join': False},20 '8b7db91c-c8bf-40f2-986a-83a659b63ba6': {21 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': False,22 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,23 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,24 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,25 'join': False},26 '7bc816a1-25df-4649-8570-0012d0acd72a': {27 'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,28 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,29 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': True,30 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': True, 'remove_not_alpha': True,31 'join': False},32 'a85c8435-6f23-4015-9e8c-19547222d6ce': {33 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,34 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,35 'remove_hash': True, 'unslang': True, 'split_words': True, 'stem': False, 'remove_punctuations': False,36 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,37 'join': False},38 'b054e509-4f04-44f2-bcf9-14fa8af4eeed': {39 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,40 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,41 'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': False,42 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,43 'join': False},44 '2e359f0b-bfb9-4eda-b2a4-cd839c122de6': {45 'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,46 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,47 'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': False,48 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,49 'join': False},50 '71bd09db-e104-462d-887a-74389438bb49': {51 'add_link_flag': False, 'add_user_flag': False, 'add_hash_flag': False, 'add_number_flag': False,52 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,53 'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': True,54 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,55 'join': False},56 'd3cc3c6e-10de-4b27-8712-8017da428e41': {57 'add_link_flag': True, 'add_user_flag': True, 'add_hash_flag': True, 'add_number_flag': True,58 'add_keyword_flag': False, 'add_location_flag': False, 'remove_links': True, 'remove_users': True,59 'remove_hash': True, 'unslang': True, 'split_words': False, 'stem': False, 'remove_punctuations': True,60 'remove_numbers': True, 'to_lower_case': True, 'remove_stop_words': False, 'remove_not_alpha': False,61 'join': False}62}63def get_preprocessing_algorithm(alg_id=None, join=False):64 if alg_id is None:65 if not join:66 return PREPROCESSING_ALGORITHMS.copy()67 else:68 a = {}69 for key, value in PREPROCESSING_ALGORITHMS.items():70 a[key] = value71 a[key]['join'] = True72 return a73 for key, alg in PREPROCESSING_ALGORITHMS.items():74 if alg_id in key:75 if not join:76 return alg.copy()77 else:78 a = alg.copy()79 a['join'] = True...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!