How to use _url_replacer method in gabbi

Best Python code snippet using gabbi_python

project.py

Source:project.py Github

copy

Full Screen

...32 reader = csv.DictReader(f, delimiter=delimiter)33 for r in reader:34 flist[r[key]] = r35 return flist36def _url_replacer(url, params, query_mode=False):37 """Replaces urp params"""38 if query_mode:39 query_char = '?'40 splitter = '&'41 else:42 splitter = PARAM_SPLITTER43 query_char = PARAM_SPLITTER44 parsed = urlparse(url)45 finalparams = []46 for k, v in params.items():47 finalparams.append('%s=%s' % (str(k), str(v)))48 return parsed.geturl() + query_char + splitter.join(finalparams)49def load_json_file(filename, default={}):50 """Loads JSON file and return it as dict"""51 if os.path.exists(filename):52 f = open(filename, 'r', encoding='utf8')53 data = json.load(f)54 f.close()55 else:56 data = default57 return data58class ProjectBuilder:59 """Project builder"""60 def __init__(self, project_path=None):61 self.http = requests.Session()62 self.project_path = os.getcwd() if project_path is None else project_path63 self.config_filename = os.path.join(self.project_path, 'apibackuper.cfg')64 self.__read_config(self.config_filename)65 self.enable_logging()66 def enable_logging(self):67 """Enable logging to file and StdErr"""68 logFormatter = logging.Formatter("%(asctime)s [%(threadName)-12.12s] [%(levelname)-5.5s] %(message)s")69 rootLogger = logging.getLogger()70 fileHandler = logging.FileHandler("{0}".format(self.logfile))71 fileHandler.setFormatter(logFormatter)72 rootLogger.addHandler(fileHandler)73 consoleHandler = logging.StreamHandler()74 consoleHandler.setFormatter(logFormatter)75 rootLogger.addHandler(consoleHandler)76 def __read_config(self, filename):77 self.config = None78 if os.path.exists(self.config_filename):79 conf = configparser.ConfigParser()80 conf.read(filename, encoding='utf8')81 self.config = conf82 storagedir = conf.get('storage', 'storage_path') if conf.has_option('storage',83 'storage_path') else 'storage'84 self.storagedir = os.path.join(self.project_path, storagedir)85 self.field_splitter = conf.get('settings', 'splitter') if conf.has_option('settings',86 'splitter') else FIELD_SPLITTER87 self.id = conf.get('settings', 'id') if conf.has_option('settings', 'id') else None88 self.name = conf.get('settings', 'name')89 self.logfile = conf.get('settings', 'logfile') if conf.has_option('settings', 'logfile') else "apibackuper.log"90 self.data_key = conf.get('data', 'data_key')91 self.storage_type = conf.get('storage', 'storage_type')92 self.http_mode = conf.get('project', 'http_mode')93 self.description = conf.get('project', 'description') if conf.has_option('project', 'description') else None94 self.start_url = conf.get('project', 'url')95 self.page_limit = conf.getint('params', 'page_size_limit')96 self.resp_type = conf.get('project', 'resp_type') if conf.has_option('project', 'resp_type') else 'json'97 self.iterate_by = conf.get('project', 'iterate_by') if conf.has_option('project', 'iterate_by') else 'page'98 self.default_delay = conf.getint('project', 'default_delay') if conf.has_option('project', 'default_delay') else DEFAULT_DELAY99 self.retry_delay = conf.getint('project', 'retry_delay') if conf.has_option('project',100 'retry_delay') else RETRY_DELAY101 self.force_retry = conf.getboolean('project', 'force_retry') if conf.has_option('project', 'force_retry') else False102 self.retry_count = conf.getint('project', 'retry_count') if conf.has_option('project',103 'retry_count') else DEFAULT_RETRY_COUNT104 self.start_page = conf.getint('params', 'start_page') if conf.has_option('params', 'start_page') else 1105 self.query_mode = conf.get('params', 'query_mode') if conf.has_option('params', 'query_mode') else "query"106 self.flat_params = conf.getboolean('params', 'force_flat_params') if conf.has_option('params', 'force_flat_params') else False107 self.total_number_key = conf.get('data', 'total_number_key') if conf.has_option('data', 'total_number_key') else ''108 self.pages_number_key = conf.get('data', 'pages_number_key') if conf.has_option('data', 'pages_number_key') else ''109 self.page_number_param = conf.get('params', 'page_number_param') if conf.has_option('params', 'page_number_param') else None110 self.count_skip_param = conf.get('params', 'count_skip_param') if conf.has_option('params', 'count_skip_param') else None111 self.count_from_param = conf.get('params', 'count_from_param') if conf.has_option('params', 'count_from_param') else None112 self.count_to_param = conf.get('params', 'count_to_param') if conf.has_option('params', 'count_to_param') else None113 self.page_size_param = conf.get('params', 'page_size_param') if conf.has_option('params', 'page_size_param') else None114 self.storage_file = os.path.join(self.storagedir, 'storage.zip')115 self.details_storage_file = os.path.join(self.storagedir, 'details.zip')116 if conf.has_section('follow'):117 self.follow_data_key = conf.get('follow', 'follow_data_key') if conf.has_option('follow',118 'follow_data_key') else None119 self.follow_item_key = conf.get('follow', 'follow_item_key') if conf.has_option('follow',120 'follow_item_key') else None121 self.follow_mode = conf.get('follow', 'follow_mode') if conf.has_option('follow',122 'follow_mode') else None123 self.follow_http_mode = conf.get('follow', 'follow_http_mode') if conf.has_option('follow',124 'follow_http_mode') else 'GET'125 self.follow_param = conf.get('follow', 'follow_param') if conf.has_option('follow',126 'follow_param') else None127 self.follow_pattern = conf.get('follow', 'follow_pattern') if conf.has_option('follow',128 'follow_pattern') else None129 self.follow_url_key = conf.get('follow', 'follow_url_key') if conf.has_option('follow',130 'follow_url_key') else None131 if conf.has_section('files'):132 self.fetch_mode = conf.get('files', 'fetch_mode')133 self.default_ext = conf.get('files', 'default_ext') if conf.has_option('files', 'default_ext') else None134 self.files_keys = conf.get('files', 'keys').split(',')135 self.root_url = conf.get('files', 'root_url')136 self.storage_mode = conf.get('files', 'storage_mode') if conf.has_option('files', 'storage_mode') else 'filepath'137 self.file_storage_type = conf.get('files', 'file_storage_type') if conf.has_option('files', 'file_storage_type') else 'zip'138 self.use_aria2 = conf.get('files', 'use_aria2') if conf.has_option('files', 'use_aria2') else 'False'139 def _single_request(self, url, headers, params, flatten=None):140 """Single http/https request"""141 if self.http_mode == 'GET':142 if self.flat_params and len(params.keys()) > 0:143 s = []144 for k, v in flatten.items():145 s.append('%s=%s' % (k, v.replace("'", '"').replace('True', 'true')))146 logging.info('url: %s' % (url + '?' + '&'.join(s)))147 if headers:148 response = self.http.get(url + '?' + '&'.join(s), headers=headers, verify=False)149 else:150 response = self.http.get(url + '?' + '&'.join(s), verify=False)151 else:152 logging.info('url: %s, params: %s' % (url, str(params)))153 if headers:154 response = self.http.get(url, params=params, headers=headers, verify=False)155 else:156 response = self.http.get(url, params=params, verify=False)157 else:158 logging.debug('Request %s, params %s, headers %s' % (url, str(params), str(headers)))159 if headers:160 response = self.http.post(url, json=params, headers=headers, verify=False)161 else:162 response = self.http.post(url, json=params, verify=False)163 return response164 @staticmethod165 def create(name):166 """Create new project"""167 if not os.path.exists(name):168 os.mkdir(name)169 config_filename = 'apibackuper.cfg'170 config_path = os.path.join(name, config_filename)171 if os.path.exists(config_path):172 print('Project already exists')173 else:174 config = configparser.ConfigParser()175 config['settings'] = {'initialized': False, 'name': name}176 f = open(config_path, 'w', encoding='utf8')177 config.write(f)178 f.close()179 print('Projects %s created' % (name))180 def init(self, url, pagekey, pagesize, datakey, itemkey, changekey, iterateby, http_mode, work_modes):181 """[TBD] Unfinished method. Don't use it please"""182 conf = self.__read_config(self.config_filename)183 if conf is None:184 print('Config file not found. Please run in project directory')185 return186 pass187 def export(self, format, filename):188 """Exports data as JSON lines, BSON and other formats"""189 if self.config is None:190 print('Config file not found. Please run in project directory')191 return192 if format == 'jsonl':193 outfile = open(filename, 'w', encoding='utf8')194 elif format == 'gzip':195 outfile = gzip.open(filename, mode='wt', encoding='utf8')196 pass197 else:198 print("Only 'jsonl' format supported for now.")199 return200 details_file = os.path.join(self.storagedir, 'details.zip')201 if self.config.has_section('follow') and os.path.exists(details_file):202 mzip = ZipFile(details_file, mode='r', compression=ZIP_DEFLATED)203 for fname in mzip.namelist():204 tf = mzip.open(fname, 'r')205 logging.info('Loading %s' % (fname))206 data = json.load(tf)207 tf.close()208 try:209 if self.follow_data_key:210 follow_data = get_dict_value(data, self.follow_data_key, splitter=self.field_splitter)211 if isinstance(follow_data, dict):212 outfile.write(json.dumps(follow_data, ensure_ascii=False) + '\n')213 else:214 for item in follow_data:215 outfile.write(json.dumps(item, ensure_ascii=False) + '\n')216 else:217 outfile.write(json.dumps(data, ensure_ascii=False) + '\n')218 except KeyError:219 logging.info('Data key: %s not found' % (self.data_key))220 else:221 storage_file = os.path.join(self.storagedir, 'storage.zip')222 if not os.path.exists(storage_file):223 print('Storage file not found %s' % (storage_file))224 return225 mzip = ZipFile(storage_file, mode='r', compression=ZIP_DEFLATED)226 for fname in mzip.namelist():227 tf = mzip.open(fname, 'r')228 try:229 data = json.load(tf)230 except:231 continue232 finally:233 tf.close()234 try:235 if self.data_key:236 for item in get_dict_value(data, self.data_key, splitter=self.field_splitter):237 outfile.write(json.dumps(item, ensure_ascii=False) + '\n')238 else:239 for item in data:240 outfile.write(json.dumps(item, ensure_ascii=False) + '\n')241 except KeyError:242 logging.info('Data key: %s not found' % (self.data_key))243 outfile.close()244 logging.info('Data exported to %s' % (filename))245 def run(self, mode):246 """Run data collection"""247 if self.config is None:248 print('Config file not found. Please run in project directory')249 return250 if not os.path.exists(self.storagedir):251 os.mkdir(self.storagedir)252 if self.storage_type != 'zip':253 print('Only zip storage supported right now')254 return255 storage_file = os.path.join(self.storagedir, 'storage.zip')256 if mode == 'full':257 mzip = ZipFile(storage_file, mode='w', compression=ZIP_DEFLATED)258 else:259 mzip = ZipFile(storage_file, mode='a', compression=ZIP_DEFLATED)260 start = timer()261 headers= load_json_file(os.path.join(self.project_path, 'headers.json'), default={})262 params = load_json_file(os.path.join(self.project_path, 'params.json'), default={})263 if self.flat_params:264 flatten = {}265 for k, v in params.items():266 flatten[k] = str(v)267 else:268 flatten = None269 url_params = load_json_file(os.path.join(self.project_path, 'url_params.json'), default=None)270 if self.query_mode == 'params':271 url = _url_replacer(self.start_url, url_params)272 elif self.query_mode == 'mixed':273 url = _url_replacer(self.start_url, url_params, query_mode=True)274 else:275 url = self.start_url276 response = self._single_request(url, headers, params, flatten)277 if self.resp_type == 'json':278 start_page_data = response.json()279 else:280 start_page_data = xmltodict.parse(response.content)281# print(json.dumps(start_page_data, ensure_ascii=False))282 end = timer()283 if len(self.total_number_key) > 0:284 total = get_dict_value(start_page_data, self.total_number_key, splitter=self.field_splitter)285 nr = 1 if total % self.page_limit > 0 else 0286 num_pages = (total / self.page_limit) + nr287 elif len(self.pages_number_key) > 0:288 num_pages = int(get_dict_value(start_page_data, self.pages_number_key, splitter=self.field_splitter))289 total = num_pages * self.page_limit290 else:291 num_pages = None292 total = None293 logging.info('Total pages %d, records %d' % (num_pages, total))294 num_pages = int(num_pages)295 change_params = {}296 # By default it's "full" mode with start_page and end_page as full list of pages297 start_page = self.start_page298 end_page = num_pages + self.start_page299 # if "continue" mode is set, shift start_page to last page saved. Force rewriting last page and continue300 if mode == 'continue':301 logging.debug('Continue mode enabled, looking for last saved page')302 pagenames = mzip.namelist()303 for page in range(self.start_page, num_pages):304 if 'page_%d.json' % (page) not in pagenames:305 if page > self.start_page:306 start_page = page307 else:308 start_page = page309 break310 logging.debug('Start page number %d' % (start_page))311 for page in range(start_page, end_page):312 if self.page_size_param and len(self.page_size_param) > 0:313 change_params[self.page_size_param] = self.page_limit314 if self.iterate_by == 'page':315 change_params[self.page_number_param] = page316 elif self.iterate_by == 'skip':317 change_params[self.count_skip_param] = (page-1) * self.page_limit318 elif self.iterate_by == 'range':319 change_params[self.count_from_param] = (page-1) * self.page_limit320 change_params[self.count_to_param] = page * self.page_limit321 url = self.start_url if self.query_mode != 'params' else _url_replacer(self.start_url, url_params)322 if self.query_mode in ['params', 'mixed']:323 url_params.update(change_params)324 else:325# print(params, change_params)326 params = update_dict_values(params, change_params)327 if self.flat_params and len(params.keys()) > 0:328 for k, v in params.items():329 flatten[k] = str(v)330 if self.query_mode == 'params':331 url = _url_replacer(self.start_url, url_params)332 elif self.query_mode == 'mixed':333 url = _url_replacer(self.start_url, url_params, query_mode=True)334 else:335 url = self.start_url336 response = self._single_request(url, headers, params, flatten)337 time.sleep(self.default_delay)338 if response.status_code in DEFAULT_ERROR_STATUS_CODES:339 rc = 0340 for rc in range(1, self.retry_count, 1):341 logging.info('Retry attempt %d of %d, delay %d' % (rc, self.retry_count, self.retry_delay))342 time.sleep(self.retry_delay)343 response = self._single_request(url, headers, params, flatten)344 if response.status_code not in DEFAULT_ERROR_STATUS_CODES:345 logging.info('Looks like finally we have proper response on %d attempt' %(rc))346 break347 if response.status_code not in DEFAULT_ERROR_STATUS_CODES:348 if num_pages is not None:349 logging.info('Saving page %d of %d' % (page, num_pages))350 else:351 logging.info('Saving page %d' % (page))352 if self.resp_type == 'json':353 outdata = response.content354 elif self.resp_type == 'xml':355 outdata = json.dump(xmltodict.parse(response.content))356 mzip.writestr('page_%d.json' % (page), outdata)357 else:358 logging.info('Errors persist on page %d. Stopped' % (page))359 break360 mzip.close()361 # pass362 def follow(self, mode='item'):363 """Collects data about each data using additional requests"""364 if self.config is None:365 print('Config file not found. Please run in project directory')366 return367 if not os.path.exists(self.storagedir):368 os.mkdir(self.storagedir)369 if self.storage_type != 'zip':370 print('Only zip storage supported right now')371 return372 if not os.path.exists(self.storage_file):373 print('Storage file not found')374 return375 params = None376 params_file = os.path.join(self.project_path, 'follow_params.json')377 if os.path.exists(params_file):378 f = open(params_file, 'r', encoding='utf8')379 params = json.load(f)380 f.close()381 else:382 params = {}383 if self.flat_params:384 flatten = {}385 for k, v in params.items():386 flatten[k] = str(v)387 headers_file = os.path.join(self.project_path, 'headers.json')388 if os.path.exists(headers_file):389 f = open(headers_file, 'r', encoding='utf8')390 headers = json.load(f)391 f.close()392 else:393 headers = {}394 mzip = ZipFile(self.storage_file, mode='r', compression=ZIP_DEFLATED)395 if self.follow_mode == 'item':396 allkeys = []397 logging.info('Extract unique key values from downloaded data')398 for fname in mzip.namelist():399 tf = mzip.open(fname, 'r')400 data = json.load(tf)401 tf.close()402 try:403 for item in get_dict_value(data, self.data_key, splitter=self.field_splitter):404 allkeys.append(item[self.follow_item_key])405# except KeyError:406 except KeyboardInterrupt:407 logging.info('Data key: %s not found' % (self.data_key))408 logging.info('%d allkeys to process' % (len(allkeys)))409 if mode == 'full':410 mzip = ZipFile(self.details_storage_file, mode='w', compression=ZIP_DEFLATED)411 finallist = allkeys412 elif mode == 'continue':413 mzip = ZipFile(self.details_storage_file, mode='a', compression=ZIP_DEFLATED)414 keys = []415 filenames = mzip.namelist()416 for name in filenames:417 keys.append(int(name.rsplit('.', 1)[0]))418 logging.info('%d filenames in zip file' % (len(keys)))419 finallist = list(set(allkeys) - set(keys))420 logging.info('%d keys in final list' % (len(finallist)))421 n = 0422 total = len(finallist)423 for key in finallist:424 n += 1425 change_params = {}426 change_params[self.follow_param] = key427 params = update_dict_values(params, change_params)428 if self.follow_http_mode == 'GET':429 if headers:430 response = self.http.get(self.follow_pattern, params=params, headers=headers)431 else:432 response = self.http.get(self.follow_pattern, params=params)433 else:434 if headers:435 response = self.http.post(self.follow_pattern, params=params, headers=headers)436 else:437 response = self.http.post(self.follow_pattern, params=params)438 logging.info('Saving object with id %s. %d of %d' % (key, n, total))439 mzip.writestr('%s.json' % (key), response.content)440 time.sleep(DEFAULT_DELAY)441 mzip.close()442 elif self.follow_mode == 'url':443 allkeys = {}444 logging.info('Extract urls to follow from downloaded data')445 for fname in mzip.namelist():446 tf = mzip.open(fname, 'r')447 data = json.load(tf)448 tf.close()449# logging.info(str(data))450 try:451 for item in get_dict_value(data, self.data_key, splitter=self.field_splitter):452 id = item[self.follow_item_key]453 allkeys[id] = get_dict_value(item, self.follow_url_key, splitter=self.field_splitter)454 except KeyError:455 logging.info('Data key: %s not found' % (self.data_key))456 if mode == 'full':457 mzip = ZipFile(self.details_storage_file, mode='w', compression=ZIP_DEFLATED)458 finallist = allkeys459 n = 0460 elif mode == 'continue':461 mzip = ZipFile(self.details_storage_file, mode='a', compression=ZIP_DEFLATED)462 keys = []463 filenames = mzip.namelist()464 for name in filenames:465 keys.append(int(name.rsplit('.', 1)[0]))466 finallist = list(set(allkeys.keys()) - set(keys))467 n = len(keys)468 total = len(allkeys.keys())469 for key in finallist:470 n += 1471 url = allkeys[key]472 if headers:473 response = self.http.get(url, params=params, headers=headers)474 else:475 response = self.http.get(url, params=params)476 # else:477 # if http_mode == 'GET':478 # response = self.http.post(start_url, json=params)479 logging.info('Saving object with id %s. %d of %d' % (key, n, total))480 mzip.writestr('%s.json' % (key), response.content)481 time.sleep(DEFAULT_DELAY)482 mzip.close()483 elif self.follow_mode == 'drilldown':484 pass485 elif self.follow_mode == 'prefix':486 allkeys = []487 logging.info('Extract unique key values from downloaded data')488 for fname in mzip.namelist():489 tf = mzip.open(fname, 'r')490 data = json.load(tf)491 tf.close()492 try:493 for item in get_dict_value(data, self.data_key, splitter=self.field_splitter):494 allkeys.append(item[self.follow_item_key])495 except KeyboardInterrupt:496 logging.info('Data key: %s not found' % (self.data_key))497 if mode == 'full':498 mzip = ZipFile(self.details_storage_file, mode='w', compression=ZIP_DEFLATED)499 finallist = allkeys500 elif mode == 'continue':501 mzip = ZipFile(self.details_storage_file, mode='a', compression=ZIP_DEFLATED)502 keys = []503 filenames = mzip.namelist()504 for name in filenames:505 keys.append(name.rsplit('.', 1)[0])506 finallist = list(set(allkeys) - set(keys))507 n = 0508 total = len(finallist)509 for key in finallist:510 n += 1511 url = self.follow_pattern + str(key)512# print(url)513 response = self.http.get(url)514 logging.info('Saving object with id %s. %d of %d' % (key, n, total))515 mzip.writestr('%s.json' % (key), response.content)516 time.sleep(DEFAULT_DELAY)517 mzip.close()518 else:519 print('Follow section not configured. Please update config file')520 def getfiles(self, be_careful=False):521 """Downloads all files associated with this API data"""522 if self.config is None:523 print('Config file not found. Please run in project directory')524 return525 if not os.path.exists(self.storagedir):526 os.mkdir(self.storagedir)527 if self.storage_type != 'zip':528 print('Only zip storage supported right now')529 return530 storage_file = os.path.join(self.storagedir, 'storage.zip')531 if not os.path.exists(storage_file):532 print('Storage file not found')533 return534 uniq_ids = set()535 allfiles_name = os.path.join(self.storagedir, 'allfiles.csv')536 if not os.path.exists(allfiles_name):537 if not self.config.has_section('follow'):538 logging.info('Extract file urls from downloaded data')539 mzip = ZipFile(storage_file, mode='r', compression=ZIP_DEFLATED)540 n = 0541 for fname in mzip.namelist():542 n += 1543 if n % 10 == 0:544 logging.info('Processed %d files, uniq ids %d' % (n, len(uniq_ids)))545 tf = mzip.open(fname, 'r')546 data = json.load(tf)547 tf.close()548 try:549 if self.data_key:550 iterate_data = get_dict_value(data, self.data_key, splitter=self.field_splitter)551 else:552 iterate_data = data553 for item in iterate_data:554 if item:555 for key in self.files_keys:556 file_data = get_dict_value(item, key, as_array=True, splitter=self.field_splitter)557 if file_data:558 for uniq_id in file_data:559 if uniq_id is not None:560 if isinstance(uniq_id, list):561 uniq_ids.update(set(uniq_id))562 else:563 uniq_ids.add(uniq_id)564 except KeyError:565 logging.info('Data key: %s not found' % (str(self.data_key)))566 else:567 details_storage_file = os.path.join(self.storagedir, 'details.zip')568 mzip = ZipFile(details_storage_file, mode='r', compression=ZIP_DEFLATED)569 n = 0570 for fname in mzip.namelist():571 n += 1572 if n % 1000 == 0:573 logging.info('Processed %d records' % (n))574 tf = mzip.open(fname, 'r')575 data = json.load(tf)576 tf.close()577 items = []578 if self.follow_data_key:579 for item in get_dict_value(data, self.follow_data_key, splitter=self.field_splitter):580 items.append(item)581 else:582 items = [data, ]583 for item in items:584 for key in self.files_keys:585 urls = get_dict_value(item, key, as_array=True, splitter=self.field_splitter)586 if urls is not None:587 for uniq_id in urls:588 if uniq_id is not None and len(uniq_id.strip()) > 0:589 uniq_ids.append(uniq_id)590 mzip.close()591 logging.info('Storing all filenames')592 f = open(allfiles_name, 'w', encoding='utf8')593 for u in uniq_ids:594 f.write(str(u) + '\n')595 f.close()596 else:597 logging.info('Load all filenames')598 uniq_ids = load_file_list(allfiles_name)599 # Start download600 processed_files = []601 skipped_files_dict = {}602 files_storage_file = os.path.join(self.storagedir, 'files.zip')603 files_list_storage = os.path.join(self.storagedir, 'files.list')604 files_skipped = os.path.join(self.storagedir, 'files_skipped.list')605 if os.path.exists(files_list_storage):606 processed_files = load_file_list(files_list_storage, encoding='utf8')607 list_file = open(files_list_storage, 'a', encoding='utf8')608 else:609 list_file = open(files_list_storage, 'w', encoding='utf8')610 if os.path.exists(files_skipped):611 skipped_files_dict = load_csv_data(files_skipped, key='filename', encoding='utf8')612 skipped_file = open(files_skipped, 'a', encoding='utf8')613 skipped = csv.DictWriter(skipped_file, delimiter=';', fieldnames=['filename', 'filesize', 'reason'])614 else:615 skipped_files_dict = {}616 skipped_file = open(files_skipped, 'w', encoding='utf8')617 skipped = csv.DictWriter(skipped_file, delimiter=';', fieldnames=['filename', 'filesize', 'reason'])618 skipped.writeheader()619 use_aria2 = True if self.use_aria2 == 'True' else False620 if use_aria2:621 aria2 = aria2p.API(622 aria2p.Client(623 host="http://localhost",624 port=6800,625 secret=""626 )627 )628 else:629 aria2 = None630 if self.file_storage_type == 'zip':631 fstorage = ZipFileStorage(files_storage_file, mode='a', compression=ZIP_DEFLATED)632 elif self.file_storage_type == 'filesystem':633 fstorage = FilesystemStorage(os.path.join('storage', 'files'))634 n = 0635 for uniq_id in uniq_ids:636 if self.fetch_mode == 'prefix':637 url = self.root_url + str(uniq_id)638 elif self.fetch_mode == 'pattern':639 url = self.root_url.format(uniq_id)640 n += 1641 if n % 50 == 0:642 logging.info('Downloaded %d files' % (n))643# if url in processed_files:644# continue645 if be_careful:646 r = self.http.head(url, timeout=DEFAULT_TIMEOUT, verify=False)647 if 'content-disposition' in r.headers.keys() and self.storage_mode == 'filepath':648 filename = r.headers['content-disposition'].rsplit('filename=', 1)[-1].strip('"')649 elif self.default_ext is not None:650 filename = uniq_id + '.' + self.default_ext651 else:652 filename = uniq_id653# if not 'content-length' in r.headers.keys():654# logging.info('File %s skipped since content-length not found in headers' % (url))655# record = {'filename' : filename, 'filesize' : "0", 'reason' : 'Content-length not set in headers'}656# skipped_files_dict[uniq_id] = record657# skipped.writerow(record)658# continue659 if 'content-length' in r.headers.keys() and int(r.headers['content-length']) > FILE_SIZE_DOWNLOAD_LIMIT and self.file_storage_type == 'zip':660 logging.info('File skipped with size %d and name %s' % (int(r.headers['content-length']) , url))661 record = {'filename' : filename, 'filesize' : str(r.headers['content-length']), 'reason' : 'File too large. More than %d bytes' % (FILE_SIZE_DOWNLOAD_LIMIT)}662 skipped_files_dict[uniq_id] = record663 skipped.writerow(record)664 continue665 else:666 if self.default_ext is not None:667 filename = str(uniq_id) + '.' + self.default_ext668 else:669 filename = str(uniq_id)670 if self.storage_mode == 'filepath':671 filename = urlparse(url).path672 logging.info('Processing %s as %s' % (url, filename))673 if fstorage.exists(filename):674 logging.info('File %s already stored' % (filename))675 continue676 if not use_aria2:677 response = self.http.get(url, timeout=DEFAULT_TIMEOUT, verify=False)678 fstorage.store(filename, response.content)679 list_file.write(url + '\n')680 else:681 aria2.add_uris(uris=[url,], options={'out': filename, 'dir' : os.path.abspath(os.path.join('storage', 'files'))})682 fstorage.close()683 list_file.close()684 skipped_file.close()685 def estimate(self, mode):686 """Measures time, size and count of records"""687 if self.config is None:688 print('Config file not found. Please run in project directory')689 return690 data = []691 params = {}692 data_size = 0693 headers = None694 headers_file = os.path.join(self.project_path, 'headers.json')695 if os.path.exists(headers_file):696 f = open(headers_file, 'r', encoding='utf8')697 headers = json.load(f)698 f.close()699 else:700 headers = {}701 params_file = os.path.join(self.project_path, 'params.json')702 if os.path.exists(params_file):703 f = open(params_file, 'r', encoding='utf8')704 params = json.load(f)705 f.close()706 if self.flat_params:707 flatten = {}708 for k, v in params.items():709 flatten[k] = str(v)710 params = flatten711 url_params = None712 params_file = os.path.join(self.project_path, 'url_params.json')713 if os.path.exists(params_file):714 f = open(params_file, 'r', encoding='utf8')715 url_params = json.load(f)716 f.close()717 if len(self.total_number_key) > 0:718 start = timer()719 if self.query_mode == 'params':720 url = _url_replacer(self.start_url, url_params)721 elif self.query_mode == 'mixed':722 url = _url_replacer(self.start_url, url_params, query_mode=True)723 else:724 url = self.start_url725 if self.http_mode == 'GET':726 if self.flat_params and len(params.keys()) > 0:727 s = []728 for k, v in params.items():729 s.append('%s=%s' % (k, v.replace("'", '"').replace('True', 'true')))730 if headers:731 start_page_data = self.http.get(url + '?' + '&'.join(s), headers=headers).json()732 else:733 start_page_data = self.http.get(url + '?' + '&'.join(s)).json()734 else:735 logging.debug('Start request params: %s headers: %s' %(str(params), str(headers)))736 if headers and len(headers.keys()) > 0:...

Full Screen

Full Screen

case.py

Source:case.py Github

copy

Full Screen

...290 return re.sub(self._simple_replacer_regex('URL'),291 self._regex_replacer(self._url_replacer,292 escape_regex),293 message)294 def _url_replacer(self, match):295 """Replace a regex match with the value of a previous url."""296 case = match.group('case')297 if case:298 referred_case = self.history[case]299 else:300 referred_case = self.prior301 return referred_case.url302 def _location_replace(self, message, escape_regex=False):303 """Replace $LOCATION in a message.304 With the location header from a previous request.305 """306 return re.sub(self._simple_replacer_regex('LOCATION'),307 self._regex_replacer(self._location_replacer,308 escape_regex),...

Full Screen

Full Screen

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run gabbi automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful