Best Python code snippet using gherkin-python
preprocess_BERT.py
Source:preprocess_BERT.py
1from __future__ import division2import random3import sys4import io5import os6import logging7import re8import pandas as pd9import ujson as json10import os.path as op11from tqdm import tqdm12from collections import Counter, OrderedDict13import argparse14program = os.path.basename(sys.argv[0])15L = logging.getLogger(program)16logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s')17logging.root.setLevel(level=logging.INFO)18L.info("Running %s" % ' '.join(sys.argv))19entity_linking_pattern = re.compile('#.*?;-*[0-9]+,(-*[0-9]+)#')20fact_pattern = re.compile('#(.*?);-*[0-9]+,-*[0-9]+#')21unk_pattern = re.compile('#([^#]+);-1,-1#')22TSV_DELIM = "\t"23TBL_DELIM = " ; "24def join_unicode(delim, entries):25 #entries = [_.decode('utf8') for _ in entries]26 return delim.join(entries)27def parse_fact(fact):28 fact = re.sub(unk_pattern, '[UNK]', fact)29 chunks = re.split(fact_pattern, fact)30 output = ' '.join([x.strip() for x in chunks if len(x.strip()) > 0])31 return output32def process_file(data_dir, shuffle=False):33 all_csv_dir = op.join(data_dir, "data/all_csv")34 all_data = op.join(data_dir, "tokenized_data/full_cleaned.json")35 examples = []36 with io.open(all_data, 'r', encoding='utf8') as fin:37 dataset = json.load(fin)38 for idx, (fname, sample) in tqdm(enumerate(dataset.items())):39 print(fname)40 try:41 if(len(sample)==0):42 continue43 print(fname, "succcess")44 table = pd.read_csv(op.join(all_csv_dir, fname), delimiter='[#]')45 #print(table)46 # facts: list of strings47 facts = sample[0]48 # labels: list of ints49 labels = sample[1]50 print(len(facts), len(labels))51 assert all([x in [0, 1, 2] for x in labels])52 assert len(facts) == len(labels)53 # types: list of table column strings54 types = [str(x) for x in table.columns.values.tolist()]55 # columns: {type: list of cell phrases in this column}56 columns = OrderedDict()57 for t in types:58 # np array of cells in the one-column table (dataframe) --> list59 one_column = [str(x) for x in table[t].to_numpy().tolist()]60 columns[t] = one_column61 # pack into one example62 example = {63 "csv": fname,64 "columns": columns,65 "facts": facts,66 "labels": labels67 }68 examples.append(example)69 except:70 print("{} is misformated".format(fname))71 if shuffle:72 random.shuffle(examples)73 print("{} samples in total".format(len(examples)))74 return examples75def convert_to_tsv(out_file, examples, dataset_type, meta, scan):76 L.info("Processing {} examples...".format(dataset_type))77 total = 078 unk = 079 len_total = 080 empty_table = 081 with io.open(out_file, 'w', encoding='utf-8') as fout:82 for example in tqdm(examples):83 assert len(example['facts']) == len(example['labels'])84 for fact, label in zip(example['facts'], example['labels']):85 # use entity linking info to retain relevant columns86 useful_column_nums = [int(x) for x in re.findall(entity_linking_pattern, fact) if not x == '-1']87 useful_column_nums = dict.fromkeys(useful_column_nums)88 remaining_table = OrderedDict()89 for idx, (column_type, column_cells) in enumerate(example['columns'].items()):90 if idx in useful_column_nums:91 column_type = '_'.join(column_type.split())92 remaining_table[column_type] = column_cells93 fact_clean = parse_fact(fact)94 if len(remaining_table) > 0:95 table_cells, table_feats = [], []96 len_total += 197 if scan == 'vertical':98 for column_type, column_cells in remaining_table.items():99 column_type = ' '.join(column_type.split('_'))100 table_cells.extend([column_type, 'are :'])101 this_column = []102 for idx, c in enumerate(column_cells):103 this_column.append("row {} is {}".format(idx + 1, c))104 this_column = join_unicode(TBL_DELIM, this_column)105 table_cells.append(this_column)106 table_cells.append('.')107 table_feats.append(column_type)108 else:109 # stupid but to reserve order110 table_column_names, table_column_cells = [], []111 for column_type, column_cells in remaining_table.items():112 column_type = ' '.join(column_type.split('_'))113 table_feats.append(column_type)114 table_column_names.append(column_type)115 table_column_cells.append(column_cells)116 for idx, row in enumerate(zip(*table_column_cells)):117 table_cells.append('row {} is :'.format(idx + 1))118 this_row = []119 for col, tk in zip(table_column_names, row):120 this_row.append('{} is {}'.format(col, tk))121 this_row = join_unicode(TBL_DELIM, this_row)122 table_cells.append(this_row)123 table_cells.append('.')124 table_str = ' '.join(table_cells)125 out_items = [example['csv'],126 str(len(table_feats)),127 ' '.join([str(x) for x in table_feats]),128 table_str,129 fact_clean,130 str(label)]131 out_items = TSV_DELIM.join(out_items)132 total += 1133 fout.write(out_items + "\n")134 else:135 if dataset_type != 'train':136 table_feats = ['[UNK]']137 table_cells = ['[UNK]']138 table_str = ' '.join(table_cells)139 out_items = [example['csv'],140 str(len(table_feats)),141 ' '.join([str(x) for x in table_feats]),142 table_str,143 fact_clean,144 str(label)]145 out_items = TSV_DELIM.join(out_items)146 fout.write(out_items + "\n")147 total += 1148 empty_table += 1149 print("Built {} instances of features in total, {}/{}={}% unseen column types, {} empty tables"150 .format(total, unk, len_total, "{0:.2f}".format(unk * 100 / len_total), empty_table))151 meta["{}_total".format(dataset_type)] = total152 return meta153def split_dataset(data_dir, all_examples):154 total_size = len(all_examples)155 L.info("split {} tables into train dev test ...".format(total_size))156 data_dir = op.join(data_dir, "data/")157 csv_id_lkt = {}158 for x in ['train', 'val', 'test', 'small_test', 'simple_test', 'complex_test']:159 id_file = op.join(data_dir, "{}_id.json".format(x))160 with io.open(id_file, 'r', encoding='utf-8') as fin:161 csv_id_lkt[x] = dict.fromkeys(json.load(fin), True)162 trainset, validset, testset, small_test, simple_test, complex_test = [], [], [], [], [], []163 for sample in all_examples:164 if sample['csv'] in csv_id_lkt['small_test']:165 small_test.append(sample)166 if sample['csv'] in csv_id_lkt['simple_test']:167 simple_test.append(sample)168 if sample['csv'] in csv_id_lkt['complex_test']:169 complex_test.append(sample)170 if sample['csv'] in csv_id_lkt['test']:171 testset.append(sample)172 if sample['csv'] in csv_id_lkt['train']:173 trainset.append(sample)174 elif sample['csv'] in csv_id_lkt['val']:175 validset.append(sample)176 177 else:178 print('{} is NOT used'.format(sample['csv']))179 return trainset, validset, testset, small_test, simple_test, complex_test180def save(filename, obj, message=None, beautify=False):181 assert message is not None182 print("Saving {} ...".format(message))183 with io.open(filename, "a") as fh:184 if beautify:185 json.dump(obj, fh, sort_keys=True, indent=4)186 else:187 json.dump(obj, fh)188def mkdir_p(path1, path2=None):189 if path2 is not None:190 path1 = os.path.join(path1, path2)191 if not os.path.exists(path1):192 os.mkdir(path1)193 return path1194def count_types(dataset):195 type_cnt = []196 for example in dataset:197 for name in example['columns'].keys():198 type_cnt.append('_'.join(name.split()))199 return type_cnt200if __name__ == "__main__":201 parser = argparse.ArgumentParser()202 parser.add_argument("--data_dir",203 type=str,204 default='../',205 help="The path of TabFact folder")206 parser.add_argument("--output_dir",207 type=str,208 default='../processed_datasets',209 help="The path to save output tsv files")210 parser.add_argument("--scan",211 default="horizontal",212 choices=["vertical", "horizontal"],213 type=str,214 help="The direction of sequentializing table cells.")215 args = parser.parse_args()216 root_dir = mkdir_p(args.output_dir)217 data_save_dir = mkdir_p(root_dir, "tsv_data_{}".format(args.scan))218 train_tsv = os.path.join(data_save_dir, "train.tsv")219 dev_tsv = os.path.join(data_save_dir, "dev.tsv")220 test_tsv = os.path.join(data_save_dir, "test.tsv")221 small_test_tsv = os.path.join(data_save_dir, "small_test.tsv")222 simple_test_tsv = os.path.join(data_save_dir, "simple_test.tsv")223 complex_test_tsv = os.path.join(data_save_dir, "complex_test.tsv")224 meta_file = os.path.join(data_save_dir, "meta.json")225 type2idx_file = os.path.join(data_save_dir, "type2idx.json")226 idx2type_file = os.path.join(data_save_dir, "idx2type.json")227 L.info("process file ...")228 all_examples = process_file(args.data_dir)229 L.info("spliting datasets ...")230 trainset, devset, testset, small_test, simple_test, complex_test = split_dataset(args.data_dir, all_examples)231 L.info("build tsv datasets ...")232 meta = {}233 meta = convert_to_tsv(train_tsv, trainset, "train", meta, args.scan)234 meta = convert_to_tsv(dev_tsv, devset, "dev", meta, args.scan)235 meta = convert_to_tsv(test_tsv, testset, "test", meta, args.scan)236 meta = convert_to_tsv(small_test_tsv, small_test, "small_test", meta, args.scan)237 meta = convert_to_tsv(simple_test_tsv, simple_test, "simple_test", meta, args.scan)238 meta = convert_to_tsv(complex_test_tsv, complex_test, "complex_test", meta, args.scan)...
generate_graphs.py
Source:generate_graphs.py
1import matplotlib.pyplot as plt2import numpy as np3import os4import plotly.plotly as py5import plotly.graph_objs as go6import plotly.offline7colors = {8 'pyBW exact': 'orange',9 'pyBW app.': 'red',10 'pyBG exact': 'green',11 'pyBG app. bin=int_size/5': 'cyan',12 'pyBG app. bin=int_size/10': 'magenta',13 'pyBG app. bin=int_size/20': 'blue',14 'pyBG app. bin=100': 'cyan',15 'pyBG app. bin=50': 'magenta',16 'pyBG app. bin=25': 'blue'17}18METHOD_LIST = [19 'pyBW exact',20 'pyBW app.',21 'pyBG exact',22 'pyBG app. bin=int_size/5',23 'pyBG app. bin=int_size/10',24 'pyBG app. bin=int_size/20'25]26TABLE_HEADERS = [27 'Interval Size (bPS)',28 'Error Rate (%)',29 'Mean Squared Error',30 'Absolute Error',31 '# Actual is 0'32]33RUNTIME_TABLE_HEADERS = [34 'Dataset',35 'pyBW exact',36 'pyBW app.',37 'pyBG exact',38 'pyBG app. bin=100',39 'pyBG app. bin=50',40 'pyBG app. bin=25'41]42TITLE_FONT_SIZE = 1643AXIS_FONT_SIZE = 1244LEGEND_FONT_SIZE = 1045GRAPH_ROOT_LOCATION = 'graphs'46NUM_ERROR_TYPES = 447sample_runtimes = {}48# Interval size: 50049def create_runtime_num_test(infile, data_name):50 line = infile.readline()51 num_tests = [int(x) for x in line.split()]52 run_times = {}53 while True:54 name = infile.readline().strip().strip()55 if name == "":56 break57 results = infile.readline().split()58 run_times[name] = [float(x) for x in results]59 sample_runtimes[data_name] = {}60 for test in run_times:61 sample_runtimes[data_name][test] = run_times[test][-1]62 i = 063 for name in run_times:64 plt.plot([np.log10(x) for x in num_tests], [np.log10(x) for x in run_times[name]],65 color=colors[name], label=name)66 i += 167 plt.title(f"Run Time for {data_name}", fontsize=TITLE_FONT_SIZE)68 plt.xlabel("log10(# of tests)", fontsize=AXIS_FONT_SIZE)69 plt.ylabel("log10(runtime (seconds))", fontsize=AXIS_FONT_SIZE)70 plt.legend(loc='best', fontsize=LEGEND_FONT_SIZE)71 plt.savefig(f'graphs/{data_name}/run_time.png', dpi=300)72 plt.close()73# Number of tests: 10,00074def create_interval_error(in_file, data_name):75 line = in_file.readline()76 intervals = [int(x) for x in line.split()]77 errors = {}78 table_cells = {}79 while True:80 name = in_file.readline().strip()81 if name == "":82 break83 table_cells[name] = [[x] for x in intervals]84 error_list = []85 for i in range(NUM_ERROR_TYPES):86 error_list.append(in_file.readline().split())87 errors[name] = {}88 for line in error_list:89 error_name = line[0]90 errors[name][error_name] = [float(line[x]) for x in range(1, len(line), 1)]91 for x in range(len(intervals)):92 error = errors[name][error_name][x]93 if error_name == 'not_included':94 error = int(error)95 if error_name == 'percent_error':96 error *= 10097 error = round(error, 5)98 table_cells[name][x].append(error)99 # Don't include intervals of over 10k in the graph100 while intervals[-1] >= 10000:101 intervals.pop()102 for name in errors:103 errors[name]['percent_error'].pop()104 i = 0105 for name in errors:106 plt.plot(intervals, [x * 100 for x in errors[name]['percent_error']], color=colors[name], label=name)107 i += 1108 plt.title(f"Percentage Error Rate vs. Interval Size for {data_name}", fontsize=TITLE_FONT_SIZE)109 plt.xlabel("Interval Size (basepairs)", fontsize=AXIS_FONT_SIZE)110 plt.ylabel("Percentage Error Rate (%)", fontsize=AXIS_FONT_SIZE)111 plt.legend(loc='best', fontsize=LEGEND_FONT_SIZE)112 plt.savefig(f'graphs/{data_name}/interval_error.png', dpi=300)113 plt.close()114 for name in table_cells:115 plt.figure()116 plt.title(f"{data_name} --- {name}", fontsize=AXIS_FONT_SIZE)117 table = plt.table(118 cellText=table_cells[name],119 colWidths=[0.027, 0.023, 0.03, 0.022, 0.02],120 colLabels=TABLE_HEADERS,121 loc='center'122 )123 table.auto_set_font_size(False)124 table.set_fontsize(LEGEND_FONT_SIZE)125 table.scale(11, 2)126 plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)127 plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False)128 for pos in ['right', 'top', 'bottom', 'left']:129 plt.gca().spines[pos].set_visible(False)130 save_name = name.replace(" ", "_").replace('/', '')131 plt.savefig(f'graphs/{data_name}/{save_name}_table.png',132 bbox_inches='tight', pad_inches=0.05, dpi=300)133 plt.close()134# Number of tests: 10,000135def create_interval_runtime(in_file, data_name):136 line = in_file.readline()137 intervals = [int(x) for x in line.split()]138 run_times = {}139 while True:140 name = in_file.readline().strip()141 if name == '':142 break143 results = in_file.readline().split()144 run_times[name] = [float(x) for x in results]145 table_cells = [[x] for x in intervals]146 for i, interval in enumerate(intervals):147 for name in METHOD_LIST:148 table_cells[i].append(round(run_times[name][i], 5))149 plt.figure()150 plt.title(f"Runtime (seconds) vs. Interval Size for {data_name}\n",151 fontsize=TITLE_FONT_SIZE, y=1.2)152 table = plt.table(153 cellText=table_cells,154 colWidths=[0.1, 0.1, 0.1, 0.1, 0.2, 0.2, 0.2],155 colLabels=['Interval Size'] + METHOD_LIST,156 loc='center'157 )158 table.auto_set_font_size(False)159 table.set_fontsize(LEGEND_FONT_SIZE)160 table.scale(2, 3)161 plt.tick_params(axis='x', which='both', bottom=False, top=False, labelbottom=False)162 plt.tick_params(axis='y', which='both', right=False, left=False, labelleft=False)163 for pos in ['right', 'top', 'bottom', 'left']:164 plt.gca().spines[pos].set_visible(False)165 plt.savefig(f'graphs/{data_name}/interval_runtime_table.png',166 bbox_inches='tight', pad_inches=0.05, dpi=300)167 plt.close()168 # Don't include intervals of over 10k in the graph169 while intervals[-1] >= 10000:170 intervals.pop()171 for name in run_times:172 run_times[name].pop()173 i = 0174 for name in run_times:175 plt.plot(intervals, [np.log10(x) for x in run_times[name]],176 color=colors[name], label=name)177 i += 1178 plt.title(f"Run Time vs. Interval Size for {data_name}", fontsize=TITLE_FONT_SIZE)179 plt.xlabel("Interval Size (basepairs)", fontsize=AXIS_FONT_SIZE)180 plt.ylabel("log10(runtime (seconds))", fontsize=AXIS_FONT_SIZE)181 plt.legend(loc='best', fontsize=LEGEND_FONT_SIZE)182 plt.savefig(f'graphs/{data_name}/interval_run_time.png', dpi=300)183 plt.close()184def create_values_indexed(in_file, data_name):185 bin_sizes = in_file.readline().split()186 bin_sizes = [int(x) for x in bin_sizes]187 values_indexed = in_file.readline().split()188 values_indexed = [int(x) for x in values_indexed]189 plt.plot(bin_sizes, values_indexed)190 plt.title(f"Values Indexed vs. Bin Size for Exact Mean Calculation", fontsize=TITLE_FONT_SIZE)191 plt.xlabel("Bin Size", fontsize=AXIS_FONT_SIZE)192 plt.ylabel("Values Indexed", fontsize=AXIS_FONT_SIZE)193 plt.savefig(f'graphs/{data_name}/values_indexed.png')194 plt.close()195def create_million_runtime_table():196 name_order = [197 'ENCFF050CCI',198 'ENCFF321FZQ',199 'ENCFF376VCU',200 'ENCFF384CMP',201 'ENCFF631HEX',202 'ENCFF643WMY',203 'ENCFF770CQD',204 'ENCFF847JMY',205 'ENCFF726XVA',206 'ENCFF877IHY',207 'ENCFF000LAB',208 'ENCFF000KYT'209 ]210 table_cells = [[name] + [round(sample_runtimes[name][stat], 3) for stat in RUNTIME_TABLE_HEADERS[1:]] for name in name_order]211 average = [round(np.mean([sample_runtimes[sample][stat] for sample in sample_runtimes]), 3) for stat in RUNTIME_TABLE_HEADERS[1:]]212 table_cells.append(['Average'] + average)213 plt.figure()214 # plt.title(f"Runtime for 1 Million Test Intervals", fontsize=AXIS_FONT_SIZE)215 table = plt.table(216 cellText=table_cells,217 colWidths=[1/7 for _ in range(7)],218 colLabels=RUNTIME_TABLE_HEADERS,219 loc='center'220 )221 table.auto_set_font_size(False)222 table.set_fontsize(LEGEND_FONT_SIZE)223 table.scale(2, 2)224 plt.tick_params(axis='x', which='both', bottom=False, top=False,225 labelbottom=False)226 plt.tick_params(axis='y', which='both', right=False, left=False,227 labelleft=False)228 for pos in ['right', 'top', 'bottom', 'left']:229 plt.gca().spines[pos].set_visible(False)230 plt.savefig(f'graphs/runtime_table.png',231 bbox_inches='tight', pad_inches=0.05)232 plt.close()233def main():234 # create_values_indexed(open('graphs/ENCFF376VCU/values_indexed.txt'), 'ENCFF376VCU')235 for subdir, dirs, files in os.walk(GRAPH_ROOT_LOCATION):236 data_name = subdir[7:]237 print(data_name)238 for file_name in files:239 file_path = subdir + '/' + file_name240 with open(file_path) as in_file:241 if file_name == 'run_time_results.txt':242 pass243 # create_runtime_num_test(in_file, data_name)244 elif file_name == 'interval_error_results.txt':245 pass246 # create_interval_error(in_file, data_name)247 elif file_name == 'interval_runtime_results.txt':248 pass249 create_interval_runtime(in_file, data_name)250 elif file_name[-4:] == '.png' or file_name[-4:] == '.swp':251 continue252 else:253 print(f"Unknown file: {file_name}")254 # create_million_runtime_table()255if __name__ == '__main__':...
SimpleHTML.py
Source:SimpleHTML.py
1import json2import base643from PIL import Image4HTML_START = """<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN">5<html lang="en">6<head>7 <title>%s</title>8</head>9<body>\n"""10HTML_END = """</body>11</html>"""12COLOR_DICT = {13 "LightRed": "#FFCCCC",14 "Red": "#FE0000",15 "LightGreen": "#CCFFCC",16 "Green": "#007800",17}18def create_text(text, heading=None, color=None, bold=None):19 if heading:20 html_str = "<h%d>%s</h%d>\n" % (heading, text, heading)21 else:22 html_str = text23 return html_str24def create_image(image_path, width=None, height=None):25 html_str = '<img src="%s" title="%s"' % (image_path, image_path)26 if width:27 html_str += " width=%d" % width28 if height:29 html_str += " height=%d" % height30 html_str += "/>\n"31 return html_str32def create_html_start(title=""):33 return HTML_START % title34def create_html_end():35 return HTML_END36def create_table(table_header=None, table_cells=None, border_size=1):37 string_list = []38 string_list.append("<table border='%d'>" % border_size)39 if table_header:40 string_list.append("<tr>")41 header_str = ""42 for table_header in table_header:43 if table_header.startswith("bgcolor"):44 header_str += "<th %s</th>" % table_header45 else:46 header_str += "<th>%s</th>" % table_header47 header_str += "\n"48 string_list.append(header_str)49 if table_cells:50 for table_row in table_cells:51 if str(table_row[0]).startswith("bgcolor"):52 row_str = "<tr %s>" % str(table_row[0])53 table_row.pop(0)54 else:55 row_str = "<tr>"56 for cell in table_row:57 cell_str = str(cell)58 if cell_str.startswith("<td bgcolor"):59 row_str += cell_str60 elif cell_str.startswith("bgcolor"):61 row_str += "<td %s" % cell_str62 else:63 row_str += "<td>%s</td>" % cell_str64 row_str += "</tr>\n"65 string_list.append(row_str)66 if table_header:67 string_list.append("</tr>")68 string_list.append("</table>")69 return string_list70def create_ref(ref_name, ref_text=None, hidden=True):71 if ref_text:72 return "<a href=#%s>%s</a>" % (ref_name, ref_text)73 else:74 return "<a href=#%s></a>" % ref_name75def create_toc(ref_list, title):76 toc_str = '<nav role="navigation" class="table-of-contents">'77 if title:78 toc_str += "<h2>%s</h2>" % title79 toc_str += "<ul>"80 for ref in ref_list:81 toc_str += '<li><a href="#%s">%s</a></li>' % (ref[0], ref[1])82 toc_str += "</ul></nav>"83 return toc_str84def create_json_images(image_list):85 json_item = {"type": "images", "items": []}86 for image in image_list:87 item = {}88 item["type"] = "image"89 item["suffix"] = image["filename"].split(".")[-1]90 item["title"] = image["title"]91 im = Image.open(image["filename"])92 item["xsize"] = im.size[0] / 293 item["ysize"] = im.size[1] / 294 item["value"] = base64.b64encode(open(image["filename"]).read())95 """96 if item.get("thumbnail_image_filename") is None:97 if thumbnailHeight is not None and thumbnailWidth is not None:98 item["thumbnailSuffix"] = pathToImage.split(".")[-1]99 item["thumbnailXsize"] = thumbnailHeight100 item["thumbnailYsize"] = thumbnailWidth101 item["thumbnailValue"] = base64.b64encode(open(pathToImage).read())102 else:103 item["thumbnailSuffix"] = pathToThumbnailImage.split(".")[-1]104 thumbnailIm = PIL.Image.open(pathToThumbnailImage)105 item["thumbnailXsize"] = thumbnailIm.size[0]106 item["thumbnailYsize"] = thumbnailIm.size[1]107 item["thumbnailValue"] = base64.b64encode(open(pathToThumbnailImage).read())108 """109 json_item["items"].append(item)110 return json_item111def generate_parallel_processing_report(mesh_scan_results, params_dict):112 json_dict = {"items": []}113 html_file = open(params_dict["html_file_path"], "w")114 html_file.write('<div align="CENTER">\n')115 if params_dict["lines_num"] > 1:116 json_dict["items"].append({"type": "title", "value": "Mesh scan results"})117 html_file.write(HTML_START % "Mesh scan results")118 else:119 html_file.write(HTML_START % "Line scan results")120 json_dict["items"].append({"type": "title", "value": "Line scan results"})121 html_file.write(create_image("parallel_processing_plot.png"))122 html_file.write("</br>")123 html_file.write(create_text("Scan parameters", heading=1))124 osc_range_per_line = params_dict["osc_range"] * (params_dict["images_per_line"] - 1)125 table_cells = [126 ("Number of lines", str(params_dict["lines_num"])),127 ("Frames per line", str(params_dict["images_per_line"])),128 ]129 if params_dict["lines_num"] > 1:130 table_cells.extend(131 (132 (133 "Grid size",134 "%d x %d microns"135 % (136 (params_dict["steps_x"] * params_dict["xOffset"] * 1000),137 (params_dict["steps_y"] * params_dict["yOffset"] * 1000),138 ),139 ),140 (141 "Scan area",142 "%d x %d microns"143 % ((params_dict["dx_mm"] * 1000), (params_dict["dy_mm"] * 1000)),144 ),145 (146 "Horizontal distance between frames",147 "%d microns" % (params_dict["xOffset"] * 1000),148 ),149 (150 "Vertical distance between frames",151 "%d microns" % (params_dict["xOffset"] * 1000),152 ),153 ("Osciallation middle", "%.1f" % params_dict["osc_midle"]),154 ("Osciallation range per frame", "%.2f" % params_dict["osc_range"]),155 (156 "Osciallation range per line",157 "%.2f (from %.2f to %2.f)"158 % (159 osc_range_per_line,160 (params_dict["osc_midle"] - osc_range_per_line / 2),161 (params_dict["osc_midle"] + osc_range_per_line / 2),162 ),163 ),164 )165 )166 table_rec = create_table(table_cells=table_cells, border_size=0)167 for row in table_rec:168 html_file.write(row)169 html_file.write("</br>")170 positions = mesh_scan_results.get("best_positions", [])171 if len(positions) > 0:172 html_file.write(create_text("Best position", heading=1))173 html_file.write("</br>")174 html_file.write('<font size="2">')175 table_cells = [176 [177 "%d" % positions[0]["index"],178 "<b>%.2f<b>" % positions[0]["score"],179 "<b>%d</b>" % positions[0]["spots_num"],180 "%.1f" % positions[0]["spots_resolution"],181 positions[0]["filename"],182 "%d" % (positions[0]["col"] + 0.5),183 "%d" % (positions[0]["row"] + 0.5),184 ]185 ]186 table_rec = create_table(187 [188 "Index",189 "<b>Score</b>",190 "<b>Number of spots</b>",191 "Resolution",192 "File name",193 "Column",194 "Row",195 ],196 table_cells,197 )198 for row in table_rec:199 html_file.write(row)200 html_file.write("</br>")201 if len(positions) > 1:202 html_file.write(create_text("All positions", heading=1))203 html_file.write("</br>")204 table_cells = []205 for position in positions[1:]:206 table_cells.append(207 (208 position["index"],209 "<b>%.2f</b>" % position["score"],210 "<b>%d</b>" % position["spots_num"],211 "%.1f" % position["spots_resolution"],212 position["filename"],213 "%d" % (position["col"] + 0.5),214 "%d" % (position["row"] + 0.5),215 )216 )217 table_rec = create_table(218 [219 "Index",220 "<b>Score</b>",221 "<b>Number of spots</b>",222 "Resolution",223 "File name",224 "Column",225 "Row",226 ],227 table_cells,228 )229 for row in table_rec:230 html_file.write(row)231 html_file.write("</br>")232 html_file.write("</font>")233 html_file.write("</div>\n")234 html_file.write(HTML_END)235 html_file.close()236 image = {"title": "plot", "filename": params_dict["cartography_path"]}237 json_dict["items"].append(create_json_images([image]))...
grabber_handler.py
Source:grabber_handler.py
1# -*- coding: utf-8 -*-2""" Grabs html, parses, and saves json to disk. """3from __future__ import unicode_literals4import datetime, json, pprint, sys5import requests6from bs4 import BeautifulSoup7from clusters_api.config import settings8# from clusters_api.utils import logger_setup9class Grabber(object):10 """ TODO: once server acls are set up, re-enable logging. """11 def __init__( self ):12 """ Sets up basics. """13 self.parser = None14 self.parser = Parser()15 # def __init__( self, log ):16 # """ Sets up basics. """17 # self.log = log18 # self.parser = None19 # self.parser = Parser( self.log )20 def update_data( self ):21 """ Accesses source html, parses it, and saves json to disk. """22 r = requests.get( settings.SOURCE_URL )23 html = r.content.decode( 'utf-8' )24 clusters_dict = self.parser.parse_cluster_html( html )25 save_dict = {26 'datetime_updated': unicode( datetime.datetime.now() ),27 'counts': clusters_dict }28 jstring = json.dumps( save_dict, sort_keys=True, indent=2 )29 with open( settings.JSON_FILE_PATH, 'w' ) as f:30 f.write( jstring )31 return32class Parser(object):33 def __init__( self ):34 """ Sets up basics. """35 self.cluster_name_mapper = { # source-html-name: api-name36 'Rock 1st Floor': 'rock-level-1',37 'Rock 2nd Floor': 'rock-level-2-main',38 'Rock Grad': 'rock-level-2-grad',39 'Friedman': 'scili-friedman',40 'SciLi Mezz': 'scili-mezzanine' }41 # def __init__( self, log ):42 # """ Sets up basics. """43 # self.log = log44 # self.cluster_name_mapper = { # source-html-name: api-name45 # 'Rock 1st Floor': 'rock-level-1',46 # 'Rock 2nd Floor': 'rock-level-2-main',47 # 'Rock Grad': 'rock-level-2-grad',48 # 'Friedman': 'scili-friedman',49 # 'SciLi Mezz': 'scili-mezzanine' }50 def parse_cluster_html( self, html ):51 """ Takes source html.52 Parses out cluster data.53 Returns dict.54 Note: this used the mobile site, which doesn't directly contain all the info needed. """55 table_rows = self._grab_cluster_tablerows( html )56 data_dict = {}57 for row in table_rows:58 title = self._extract_title( row )59 if title in self.cluster_name_mapper.keys():60 count_dict = self._extract_counts( row )61 data_dict[ self.cluster_name_mapper[title] ] = count_dict # takes, eg, title 'Rock 1st Floor' and stores key as 'rock-level-1'62 api_data_dict = self._tweak_counts( data_dict )63 return api_data_dict64 def _grab_cluster_tablerows( self, html ):65 """ Helper. Grabs cluster table-row objects from html.66 Returns list of BeautifulSoup dom objects. """67 soup = BeautifulSoup( html )68 table_rows = soup.findAll( 'tr' )69 relevant_tablerows = []70 for row in table_rows:71 table_cells = row.findAll( 'td' )72 if len( table_cells ) == 9:73 relevant_tablerows.append( row )74 return relevant_tablerows75 def _extract_title( self, row ):76 """ Helper. Grabs title from table-row object.77 Returns unicode-string or None. """78 title_cell = row.findAll( 'td' )[0]79 a_link = title_cell.findAll( 'a' )80 title = None81 if len( a_link ) > 0: # goal: '''[<a href="javascript:loadPieChart(11)">Rock 1st Floor</a>]'''82 title = unicode( a_link[0].string )83 return title84 def _extract_counts( self, row ):85 """ Helper. Grabs count info from table-row object.86 Returns dict; counts are integers. """87 table_cells = row.findAll( 'td' )88 rawdata_count_names = [ 'In Use', 'Available Stations', 'Unavailable Stations', 'Offline Stations', 'Total Stations' ] # don't re-order; this is order in rawdata89 count_dict = {}; i = 090 for cell in table_cells:91 try:92 count = int( cell.string )93 count_dict[ rawdata_count_names[i] ] = count94 i += 195 except:96 pass97 return count_dict98 def _tweak_counts( self, data_dict ):99 """ Helper. Updates count_dict labels to api-compatible ones; adds useful 'calculated_available' data.100 Returns dict. """101 updated_data_dict = {}102 for key, value in data_dict.items():103 cluster_name = key; count_dict = value104 updated_count_dict = {105 'available': count_dict['Available Stations'],106 'calculated_available': count_dict['Available Stations'] + count_dict['Offline Stations'],107 'in_use': count_dict['In Use'],108 'offline': count_dict['Offline Stations'],109 'total': count_dict['Total Stations'] }110 updated_data_dict[cluster_name] = updated_count_dict111 return updated_data_dict112if __name__ == '__main__':113 """ Assumes env is activated.114 Called by cron script.115 TODO: once server acls are set up, re-enable logging. """116 try:117 grabber = Grabber()118 grabber.update_data()119 except Exception as e:120 message = '- in grabber_handler.__main__; exception updating data, %s' % unicode(repr(e))121 print message122# if __name__ == '__main__':123# """ Assumes env is activated.124# Called by cron script. """125# try:126# log = logger_setup.setup_logger()127# except Exception as e:128# print '- in grabber_handler.__main__; exception setting up logger, %s' % unicode(repr(e))129# sys.exit()130# try:131# grabber = Grabber( log )132# grabber.update_data()133# except Exception as e:134# message = '- in grabber_handler.__main__; exception updating data, %s' % unicode(repr(e))135# print message...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!