Best Python code snippet using keyboard
generator.py
Source:generator.py
1"""File: generator.py2 Author: Nathan Robinson3 Contact: nathan.m.robinson@gmail.com4 Date: 2013-11-135 Desctiption: Generate cave diver PDDL problems.6 Lisence: Copyright (c) Year 2013, Nathan Robinson <nathan.m.robinson@gmail.com>7 Christian Muise <christian.muise@gmail.com>8 Charles Gretton <charles.gretto@gmail.com>9 Permission to use, copy, modify, and/or distribute this software for any10 purpose with or without fee is hereby granted, provided that the above11 copyright notice and this permission notice appear in all copies.12 THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES13 WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF14 MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY15 SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES16 WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN17 ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR18 IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.19 To do:20 1. partial order reduction option21 2. Make it easier to specify what tunnels to make22 3. document and send in23"""24import generator_cmd_line25from cmd_line import InputException26import sys, random, itertools27eps = 0.0128def make_caves(branch_depths):29 """ Generate a random tree with the branches of the given lengths.30 Return the nodes, the node depths, and the leaf nodes.31 ([int]) -> [(int, int)], [int], [int]32 """33 edges = [(x, x+1) for x in xrange(branch_depths[0])]34 node_depths = range(branch_depths[0]+1)35 nodes = branch_depths[0]+136 leaves = [nodes-1]37 for branch in branch_depths[1:]:38 junction = random.choice([x for x in xrange(nodes) if node_depths[x] < branch])39 length = branch - node_depths[junction]40 edges.append((junction, nodes))41 node_depths.append(node_depths[junction]+1)42 for nid, new_node in enumerate(xrange(nodes, nodes+length-1)):43 edges.append((new_node, new_node+1))44 node_depths.append(node_depths[junction]+2+nid)45 nodes += length46 leaves.append(nodes-1)47 return edges, node_depths, leaves48def make_objectives(objective_depths, node_depths, leaves):49 """ Make num_objectives objectives at leaves with the specified depths.50 ([int], [int]) -> [int]51 """52 objectives = []53 for obj_d in objective_depths:54 candidates = [n for n in leaves\55 if node_depths[n] == obj_d and n not in objectives]56 if not candidates:57 raise Exception("Not enough leaf nodes with depth " +\58 str(obj_d) + " for objective.")59 objectives.append(random.choice(candidates))60 return objectives61def make_tanks_and_divers(node_depths, objectives, num_tank_adjustment,62 num_diver_adjustment, ordered_tanks):63 """ Make the required number of tank and diver objects.64 |Tanks| = 2^(depth+1). (1 dummy (if ordered))65 |Divers| = 2^(depth-1)66 Adjust the number of tanks and divers by num_tank_adjustment and67 num_diver_adjustment. If either of these numbers is negative, the68 problem will be unsatisfiable.69 ([int], [int], int, int, bool) -> [str], [str]70 """71 num_tanks = num_tank_adjustment - 172 num_divers = num_diver_adjustment73 for obj in objectives:74 num_tanks += pow(2, node_depths[obj]+1)75 num_divers += pow(2, node_depths[obj]-1)76 tanks = ['t' + str(x) for x in xrange(num_tanks)]77 if ordered_tanks:78 tanks.append('dummy')79 divers = ['d' + str(x) for x in xrange(num_divers)]80 return tanks, divers81def make_positive_relationships(objectives, node_depths):82 """ Create a (transitively closed) graph of relationships showing which83 divers depend on other divers to deliver them tanks to satisfy the84 objectives.85 Process - Start at each objective and walk back to the entrance keeping86 a list of divers as we go. At each step we need 1 additional diver to87 service each diver in our list.88 ([int], [int]) -> set([(int, int)])89 """90 cur_d = 091 pos_relationships = set()92 for obj in objectives:93 obj_divers = [cur_d]94 cur_d += 195 depth = node_depths[obj]-196 while depth > 0:97 new_divers = range(cur_d, cur_d + len(obj_divers))98 for diver in obj_divers:99 for n_diver in new_divers:100 pos_relationships.add((n_diver, diver))101 obj_divers.extend(new_divers)102 cur_d += len(new_divers)103 depth -= 1104 return pos_relationships105def make_negative_relationships(pos_relationships, num_divers, neg_link_prob):106 """ Make a set of negative relationships where divers preclude each other.107 For the problem to be satisfiable there must just be an ordering over108 the divers that works. Lets assume that the positive relationships109 represent this order. We are then able to rule out everything else.110 In fact, we we have a neg_link_prob chance of ruling out a non-positive111 link.112 (set([(int, int)]), int, ...) -> { int : [int] }113 """114 neg_relationships = dict([(x, list()) for x in xrange(num_divers)])115 for (diver1, diver2) in itertools.combinations(xrange(num_divers), 2):116 if (diver1, diver2) not in pos_relationships and random.random() < neg_link_prob:117 neg_relationships[diver1].append(diver2)118 return neg_relationships119def add_neg_cycle(neg_relationships, num_divers, neg_cycle_frac):120 """ Adds a negative cycle to the diver relationships, making the problem121 have no solutions. num_divers * neg_cycle_frac (min 2) divers are122 involved.123 ({ int : [int] }, int, float) -> None124 """125 divers = random.sample(xrange(num_divers), max(2, int(num_divers*neg_cycle_frac)))126 for did, diver1 in enumerate(divers):127 diver2 = divers[(did+1)%len(divers)]128 if diver2 not in neg_relationships[diver1]:129 neg_relationships[diver1].append(diver2)130def make_hiring_costs(neg_relationships, min_cost, max_cost, perturb):131 """ Make the hiring costs for the divers. The costs are inversely132 proportional to the number of negative relation ships a diver has.133 They are perturbed by perturb.134 ({ int : [int] }, int, int, float) -> { int : int }135 """136 divers = neg_relationships.keys()137 num_rels = [len(x) for x in neg_relationships.values()]138 min_rels, max_rels = min(num_rels), max(num_rels)139 rel_range = max_rels - min_rels140 cost_range = max_cost - min_cost141 if cost_range == 0:142 return dict([(d, min_cost) for d in divers])143 if rel_range == 0:144 mid_cost = int(min_cost + cost_range / 2.0)145 return dict([(d, mid_cost) for d in divers])146 rel_dict = dict([(n, list()) for n in num_rels])147 for nid, num in enumerate(num_rels):148 rel_dict[num].append(divers[nid])149 sorted_rels = sorted(zip(rel_dict.keys(), rel_dict.values()))150 sorted_rels.reverse()151 hiring_costs = {}152 cost_inc = cost_range / float(len(sorted_rels))153 for rid, (nr, nr_d) in enumerate(sorted_rels):154 for d in nr_d:155 base_cost = min_cost + cost_inc*rid156 base_cost += random.random()*2*perturb*base_cost - perturb*base_cost157 base_cost = max(min_cost, min(max_cost, int(base_cost)))158 hiring_costs[d] = base_cost159 return hiring_costs160def write_domain_file(file_name, divers, neg_relationships, strips, ordered_tanks):161 """ Write the PDDL domain file to file_name.162 (str, [int], { int : [int] }, bool, bool) -> None163 """164 try:165 output_file = file(file_name, 'w')166 if strips:167 output_file.write(";; Cave Diving STRIPS\n")168 else:169 output_file.write(";; Cave Diving ADL\n")170 output_file.write(";; Authors: Nathan Robinson,\n")171 output_file.write(";; Christian Muise, and\n")172 output_file.write(";; Charles Gretton\n\n")173 if strips:174 output_file.write("(define (domain cave-diving-strips)\n")175 else:176 output_file.write("(define (domain cave-diving-adl)\n")177 output_file.write(" (:requirements :typing)\n")178 output_file.write(" (:types location diver tank quantity)\n")179 output_file.write(" (:predicates\n")180 output_file.write(" (at-tank ?t - tank ?l - location)\n")181 output_file.write(" (in-storage ?t - tank)\n")182 output_file.write(" (full ?t - tank)\n")183 if ordered_tanks:184 output_file.write(" (next-tank ?t1 - tank ?t2 - tank)\n")185 output_file.write(" (at-diver ?d - diver ?l - location)\n")186 output_file.write(" (available ?d - diver)\n")187 output_file.write(" (at-surface ?d - diver)\n")188 output_file.write(" (decompressing ?d - diver)\n")189 if not strips:190 output_file.write(" (precludes ?d1 - diver ?d2 - diver)\n")191 output_file.write(" (cave-entrance ?l - location)\n")192 output_file.write(" (connected ?l1 - location ?l2 - location)\n")193 output_file.write(" (next-quantity ?q1 - quantity ?q2 - quantity)\n")194 output_file.write(" (holding ?d - diver ?t - tank)\n")195 output_file.write(" (capacity ?d - diver ?q - quantity)\n")196 output_file.write(" (have-photo ?l - location)\n")197 output_file.write(" (in-water )\n")198 199 output_file.write(" )\n\n")200 #output_file.write(" (:functions\n")201 #output_file.write(" (hiring-cost ?d - diver) - number\n")202 #output_file.write(" (other-cost) - number\n")203 #output_file.write(" (total-cost) - number\n")204 #output_file.write(" )\n\n")205 # Divers are made constants given the strips usage206 num_diver_lines = len(divers)/20 + 1207 ordered_divers = ['d' + str(x) for x in xrange(len(divers))]208 output_file.write(" (:constants\n")209 for d_line in xrange(num_diver_lines):210 output_file.write(" " + " ".join(ordered_divers[(d_line*20):(d_line*20+20)]) + " - diver\n")211 output_file.write(" )\n\n")212 if not strips:213 output_file.write(" (:action hire-diver\n")214 output_file.write(" :parameters (?d1 - diver)\n")215 output_file.write(" :precondition (and (available ?d1)\n")216 output_file.write(" (not (in-water)) \n")217 output_file.write(" )\n")218 output_file.write(" :effect (and (at-surface ?d1)\n")219 output_file.write(" (not (available ?d1))\n")220 output_file.write(" (forall (?d2 - diver)\n")221 output_file.write(" (when (precludes ?d1 ?d2) (not (available ?d2))))\n")222 output_file.write(" (in-water)\n")223 output_file.write(" (increase (total-cost) (hiring-cost ?d1))\n")224 output_file.write(" )\n")225 output_file.write(" )\n\n")226 else:227 for did, diver1 in enumerate(divers):228 output_file.write(" (:action hire-diver-" + diver1 + "\n")229 output_file.write(" :parameters ( )\n")230 output_file.write(" :precondition (and (available " + diver1 + "))\n")231 output_file.write(" :effect (and (at-surface " + diver1 + ")\n")232 output_file.write(" (not (available " + diver1 + "))\n")233 #for diver2 in neg_relationships[did]:234 # output_file.write(" (not (available " + divers[diver2] + "))\n")235 #output_file.write(" (increase (total-cost) (hiring-cost " + diver1 + "))\n")236 output_file.write(" )\n")237 output_file.write(" )\n\n")238 output_file.write(" (:action prepare-tank\n")239 if ordered_tanks:240 output_file.write(" :parameters (?d - diver ?t1 ?t2 - tank ?q1 ?q2 - quantity)\n")241 else:242 output_file.write(" :parameters (?d - diver ?t1 - tank ?q1 ?q2 - quantity)\n")243 output_file.write(" :precondition (and (at-surface ?d)\n")244 output_file.write(" (in-storage ?t1)\n")245 output_file.write(" (next-quantity ?q1 ?q2)\n")246 output_file.write(" (capacity ?d ?q2)\n")247 if ordered_tanks:248 output_file.write(" (next-tank ?t1 ?t2)\n")249 output_file.write(" )\n")250 output_file.write(" :effect (and (not (in-storage ?t1))\n")251 output_file.write(" (not (capacity ?d ?q2))\n")252 if ordered_tanks:253 output_file.write(" (in-storage ?t2)\n")254 output_file.write(" (full ?t1)\n")255 output_file.write(" (capacity ?d ?q1)\n")256 output_file.write(" (holding ?d ?t1)\n")257 #output_file.write(" (increase (total-cost) (other-cost ))\n")258 output_file.write(" )\n")259 output_file.write(" )\n\n")260 output_file.write(" (:action enter-water\n")261 output_file.write(" :parameters (?d - diver ?l - location)\n")262 output_file.write(" :precondition (and (at-surface ?d)\n")263 output_file.write(" (cave-entrance ?l)\n")264 output_file.write(" )\n")265 output_file.write(" :effect (and (not (at-surface ?d))\n")266 output_file.write(" (at-diver ?d ?l)\n")267 #output_file.write(" (increase (total-cost) (other-cost ))\n")268 output_file.write(" )\n")269 output_file.write(" )\n\n")270 output_file.write(" (:action pickup-tank\n")271 output_file.write(" :parameters (?d - diver ?t - tank ?l - location ?q1 ?q2 - quantity)\n")272 output_file.write(" :precondition (and (at-diver ?d ?l)\n")273 output_file.write(" (at-tank ?t ?l)\n")274 output_file.write(" (next-quantity ?q1 ?q2)\n")275 output_file.write(" (capacity ?d ?q2)\n")276 output_file.write(" )\n")277 output_file.write(" :effect (and (not (at-tank ?t ?l))\n")278 output_file.write(" (not (capacity ?d ?q2))\n")279 output_file.write(" (holding ?d ?t)\n")280 output_file.write(" (capacity ?d ?q1)\n")281 #output_file.write(" (increase (total-cost) (other-cost ))\n")282 output_file.write(" )\n")283 output_file.write(" )\n\n")284 output_file.write(" (:action drop-tank\n")285 output_file.write(" :parameters (?d - diver ?t - tank ?l - location ?q1 ?q2 - quantity)\n")286 output_file.write(" :precondition (and (at-diver ?d ?l)\n")287 output_file.write(" (holding ?d ?t)\n")288 output_file.write(" (next-quantity ?q1 ?q2)\n")289 output_file.write(" (capacity ?d ?q1)\n")290 output_file.write(" )\n")291 output_file.write(" :effect (and (not (holding ?d ?t))\n")292 output_file.write(" (not (capacity ?d ?q1))\n")293 output_file.write(" (at-tank ?t ?l)\n")294 output_file.write(" (capacity ?d ?q2)\n")295 #output_file.write(" (increase (total-cost) (other-cost ))\n")296 output_file.write(" )\n")297 output_file.write(" )\n\n")298 output_file.write(" (:action swim\n")299 output_file.write(" :parameters (?d - diver ?t - tank ?l1 ?l2 - location)\n")300 output_file.write(" :precondition (and (at-diver ?d ?l1)\n")301 output_file.write(" (holding ?d ?t)\n")302 output_file.write(" (full ?t)\n")303 output_file.write(" (connected ?l1 ?l2)\n")304 output_file.write(" )\n")305 output_file.write(" :effect (and (not (at-diver ?d ?l1))\n")306 output_file.write(" (not (full ?t))\n")307 output_file.write(" (at-diver ?d ?l2)\n")308 #output_file.write(" (increase (total-cost) (other-cost ))\n")309 output_file.write(" )\n")310 output_file.write(" )\n\n")311 output_file.write(" (:action photograph\n")312 output_file.write(" :parameters (?d - diver ?l - location ?t - tank)\n")313 output_file.write(" :precondition (and (at-diver ?d ?l)\n")314 output_file.write(" (holding ?d ?t)\n")315 output_file.write(" (full ?t)\n")316 output_file.write(" )\n")317 output_file.write(" :effect (and (not (full ?t))\n")318 output_file.write(" (have-photo ?l)\n")319 #output_file.write(" (increase (total-cost) (other-cost ))\n")320 output_file.write(" )\n")321 output_file.write(" )\n\n")322 output_file.write(" (:action decompress\n")323 output_file.write(" :parameters (?d - diver ?l - location)\n")324 output_file.write(" :precondition (and (at-diver ?d ?l)\n")325 output_file.write(" (cave-entrance ?l)\n")326 output_file.write(" )\n")327 output_file.write(" :effect (and (not (at-diver ?d ?l))\n")328 output_file.write(" (decompressing ?d)\n")329 output_file.write(" (not (in-water))\n")330 #output_file.write(" (increase (total-cost) (other-cost ))\n")331 output_file.write(" )\n")332 output_file.write(" )\n\n")333 output_file.write(")\n")334 output_file.close()335 except IOError:336 print "Error: could not write to the domain file:", file_name337def write_problem_file(file_name, problem_name, num_locations, tanks, divers,338 objectives, edges, neg_relationships, hiring_costs, other_action_cost,339 strips, ordered_tanks):340 """ Write the PDDL problem file to file_name.341 (str, str, int, [str], [str], [int], [(int, int)], { int : [int] },342 { int : int }, int, bool, bool) -> None343 """344 try:345 output_file = file(file_name, 'w')346 if strips:347 output_file.write(";; Cave Diving STRIPS\n")348 else:349 output_file.write(";; Cave Diving ADL\n")350 output_file.write(";; Authors: Nathan Robinson,\n")351 output_file.write(";; Christian Muise, and\n")352 output_file.write(";; Charles Gretton\n\n")353 if strips:354 output_file.write("(define (problem cave-diving-strips-" + problem_name + ")\n")355 output_file.write(" (:domain cave-diving-strips)\n")356 else:357 output_file.write("(define (problem cave-diving-adl-" + problem_name + ")\n")358 output_file.write(" (:domain cave-diving-adl)\n")359 output_file.write(" (:objects\n")360 output_file.write(" " + " ".join(\361 ['l' + str(x) for x in xrange(num_locations)]) + " - location\n")362 num_diver_lines = len(divers)/20 + 1363 ordered_divers = ['d' + str(x) for x in xrange(len(divers))]364 #for d_line in xrange(num_diver_lines):365 # output_file.write(" " + " ".join(ordered_divers[(d_line*20):(d_line*20+20)]) + " - diver\n")366 num_tank_lines = len(tanks)/20 + 1367 for t_line in xrange(num_tank_lines):368 output_file.write(" " + " ".join(tanks[(t_line*20):(t_line*20+20)]) + " - tank\n")369 output_file.write(" zero one two three four - quantity\n")370 output_file.write(" )\n\n")371 output_file.write(" (:init\n")372 for diver in ordered_divers:373 output_file.write(" (available " + diver + ")\n")374 for diver in ordered_divers:375 output_file.write(" (capacity " + diver + " four)\n")376 if ordered_tanks:377 output_file.write(" (in-storage " + tanks[0] + ")\n")378 for tid, tank in enumerate(tanks[:-1]):379 output_file.write(" (next-tank " + tank + " " + tanks[tid+1] + ")\n")380 else:381 for tank in tanks:382 output_file.write(" (in-storage " + tank + ")\n")383 output_file.write(" (cave-entrance l0)\n")384 for edge in edges:385 output_file.write(" (connected l" + str(edge[0]) + " l" + str(edge[1]) + ")\n")386 output_file.write(" (connected l" + str(edge[1]) + " l" + str(edge[0]) + ")\n")387 output_file.write(" (next-quantity zero one)\n")388 output_file.write(" (next-quantity one two)\n")389 output_file.write(" (next-quantity two three)\n")390 output_file.write(" (next-quantity three four)\n")391 if not strips:392 for did1, diver1 in enumerate(divers):393 for diver2 in neg_relationships[did1]:394 output_file.write(" (precludes " + diver1 + " " + divers[diver2] + ")\n")395 #for did, diver in enumerate(divers):396 # output_file.write(" (= (hiring-cost " + diver + ") " + str(hiring_costs[did]) + ")\n")397 #output_file.write(" (= (other-cost ) " + str(other_action_cost) + ")\n")398 #output_file.write(" (= (total-cost) 0)\n")399 output_file.write(" )\n\n")400 output_file.write(" (:goal\n")401 output_file.write(" (and\n")402 for obj in objectives:403 output_file.write(" (have-photo l" + str(obj) + ")\n")404 for diver in divers:405 output_file.write(" (decompressing " + diver + ")\n")406 output_file.write(" )\n )\n\n")407 #output_file.write(" (:metric minimize (total-cost))\n\n")408 output_file.write(")\n")409 output_file.close()410 except IOError:411 print "Error: could not write to the problem file:", file_name412def main():413 args = generator_cmd_line.process_args()414 random.seed(args.seed)415 edges, node_depths, leaves = make_caves(args.cave_branches)416 objectives = make_objectives(args.objectives, node_depths, leaves)417 tanks, divers = make_tanks_and_divers(node_depths, objectives,418 args.num_tank_adjustment, args.num_diver_adjustment, args.order_tanks)419 pos_relationships = make_positive_relationships(objectives, node_depths)420 neg_relationships = make_negative_relationships(pos_relationships,421 len(divers), args.neg_link_prob)422 if args.neg_cycle_length:423 add_neg_cycle(neg_relationships, len(divers), args.neg_cycle_length)424 hiring_costs = make_hiring_costs(neg_relationships, args.minimum_hiring_cost,425 args.maximum_hiring_cost, args.perturb_hiring_costs)426 random.shuffle(divers)427 if not args.quiet:428 print429 print "Edges: ", ", ".join(map(str, edges))430 print "Depths:", ", ".join(map(str, node_depths))431 print "Objectives:", ", ".join(map(str, objectives))432 print "Tanks:", ", ".join(map(str, tanks))433 print "Divers:", ", ".join(map(str, divers))434 print "Positive relationships:", ", ".join(map(str, pos_relationships))435 print "Negative relationships:", neg_relationships436 print "Hiring costs:", hiring_costs437 if args.domain_file_name:438 write_domain_file(args.domain_file_name, divers, neg_relationships,439 args.strips, args.order_tanks)440 write_problem_file(args.problem_file_name, args.problem_name, len(node_depths),441 tanks, divers, objectives, edges, neg_relationships, hiring_costs,442 args.other_action_cost, args.strips, args.order_tanks)443if __name__ == "__main__":...
milestone5.py
Source:milestone5.py
1#!/usr/bin/python2import sys, string3from random import choice4import random5from string import ascii_lowercase6from scipy.stats import beta, uniform7import numpy as np8import struct9import pandas as pd10import math11import data_gen_utils12# note this is the base path to the data files we generate13TEST_BASE_DIR = "/cs165/generated_data"14# note this is the base path that _POINTS_ to the data files we generate15DOCKER_TEST_BASE_DIR = "/cs165/staff_test"16############################################################################17# Notes: You can generate your own scripts for generating data fairly easily by modifying this script.18# 19############################################################################20def generateDataMilestone5(dataSize):21 outputFile = TEST_BASE_DIR + '/data5.csv'22 header_line = data_gen_utils.generateHeaderLine('db1', 'tbl5', 4)23 outputTable = pd.DataFrame(np.random.randint(0, dataSize/5, size=(dataSize, 4)), columns =['col1', 'col2', 'col3', 'col4'])24 # This is going to have many, many duplicates for large tables!!!!25 outputTable['col1'] = np.random.randint(0,1000, size = (dataSize))26 outputTable['col2'] = np.random.randint(0,1000, size = (dataSize))27 outputTable['col3'] = np.random.randint(0,10000, size = (dataSize))28 outputTable['col4'] = np.random.randint(0,10000, size = (dataSize))29 outputTable.to_csv(outputFile, sep=',', index=False, header=header_line, line_terminator='\n')30 return outputTable31 32def createTest38(dataTable):33 # prelude34 output_file, exp_output_file = data_gen_utils.openFileHandles(38, TEST_DIR=TEST_BASE_DIR)35 output_file.write('-- Correctness test: Do inserts in tbl5.\n')36 output_file.write('--\n')37 output_file.write('-- Let table tbl5 have a secondary index (col2) and a clustered index (col3), so, all should be maintained when we insert new data.\n')38 output_file.write('-- This means that the table should be always sorted on col3 and the secondary indexes on col2 should be updated\n')39 output_file.write('--\n')40 output_file.write('-- Create Table\n')41 output_file.write('create(tbl,"tbl5",db1,4)\n')42 output_file.write('create(col,"col1",db1.tbl5)\n')43 output_file.write('create(col,"col2",db1.tbl5)\n')44 output_file.write('create(col,"col3",db1.tbl5)\n')45 output_file.write('create(col,"col4",db1.tbl5)\n')46 output_file.write('-- Create a clustered index on col1\n')47 output_file.write('create(idx,db1.tbl5.col1,sorted,clustered)\n')48 output_file.write('-- Create an unclustered btree index on col2\n')49 output_file.write('create(idx,db1.tbl5.col2,btree,unclustered)\n')50 output_file.write('--\n')51 output_file.write('--\n')52 output_file.write('-- Load data immediately in the form of a clustered index\n')53 output_file.write('load(\"'+DOCKER_TEST_BASE_DIR+'/data5.csv\")\n')54 output_file.write('--\n')55 output_file.write('-- INSERT INTO tbl5 VALUES (-1,-11,-111,-1111);\n')56 output_file.write('-- INSERT INTO tbl5 VALUES (-2,-22,-222,-2222);\n')57 output_file.write('-- INSERT INTO tbl5 VALUES (-3,-33,-333,-2222);\n')58 output_file.write('-- INSERT INTO tbl5 VALUES (-4,-44,-444,-2222);\n')59 output_file.write('-- INSERT INTO tbl5 VALUES (-5,-55,-555,-2222);\n')60 output_file.write('--\n')61 output_file.write('relational_insert(db1.tbl5,-1,-11,-111,-1111)\n')62 output_file.write('relational_insert(db1.tbl5,-2,-22,-222,-2222)\n')63 output_file.write('relational_insert(db1.tbl5,-3,-33,-333,-2222)\n')64 output_file.write('relational_insert(db1.tbl5,-4,-44,-444,-2222)\n')65 output_file.write('relational_insert(db1.tbl5,-5,-55,-555,-2222)\n')66 #output_file.write('shutdown\n')67 # update dataTable68 dataTable = dataTable.append({"col1":-1, "col2":-11, "col3": -111, "col4": -1111}, ignore_index = True)69 dataTable = dataTable.append({"col1":-2, "col2":-22, "col3": -222, "col4": -2222}, ignore_index = True)70 dataTable = dataTable.append({"col1":-3, "col2":-33, "col3": -333, "col4": -2222}, ignore_index = True)71 dataTable = dataTable.append({"col1":-4, "col2":-44, "col3": -444, "col4": -2222}, ignore_index = True)72 dataTable = dataTable.append({"col1":-5, "col2":-55, "col3": -555, "col4": -2222}, ignore_index = True)73 74 # no expected results75 data_gen_utils.closeFileHandles(output_file, exp_output_file)76 return dataTable77def createTest39(dataTable, approxSelectivity):78 output_file, exp_output_file = data_gen_utils.openFileHandles(39, TEST_DIR=TEST_BASE_DIR)79 dataSize = len(dataTable)80 offset = int(approxSelectivity * dataSize)81 highestHighVal = int((dataSize/2) - offset)82 selectValLess = np.random.randint(-55, -11)83 selectValGreater = selectValLess + offset84 selectValLess2 = np.random.randint(-10, 0)85 selectValGreater2 = selectValLess2 + offset86 output_file.write('-- Correctness test: Test for updates on columns with index\n')87 output_file.write('--\n')88 output_file.write('-- SELECT col1 FROM tbl5 WHERE col2 >= {} AND col2 < {};\n'.format(selectValLess, selectValGreater))89 output_file.write('--\n')90 output_file.write('s1=select(db1.tbl5.col2,{},{})\n'.format(selectValLess, selectValGreater))91 output_file.write('f1=fetch(db1.tbl5.col1,s1)\n')92 output_file.write('print(f1)\n')93 output_file.write('--\n')94 output_file.write('-- SELECT col3 FROM tbl5 WHERE col1 >= {} AND col1 < {};\n'.format(selectValLess2, selectValGreater2))95 output_file.write('--\n')96 output_file.write('s2=select(db1.tbl5.col1,{},{})\n'.format(selectValLess2, selectValGreater2))97 output_file.write('f2=fetch(db1.tbl5.col3,s2)\n')98 output_file.write('print(f2)\n')99 # generate expected results100 dfSelectMaskGT = dataTable['col2'] >= selectValLess101 dfSelectMaskLT = dataTable['col2'] < selectValGreater102 output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col1']103 if len(output) > 0:104 exp_output_file.write(output.to_string(header=False,index=False))105 exp_output_file.write('\n\n')106 dfSelectMaskGT2 = dataTable['col1'] >= selectValLess2107 dfSelectMaskLT2 = dataTable['col1'] < selectValGreater2108 output = dataTable[dfSelectMaskGT2 & dfSelectMaskLT2]['col3']109 if len(output) > 0:110 exp_output_file.write(output.to_string(header=False,index=False))111 exp_output_file.write('\n')112 data_gen_utils.closeFileHandles(output_file, exp_output_file)113def createTests40(dataTable):114 output_file, exp_output_file = data_gen_utils.openFileHandles(40, TEST_DIR=TEST_BASE_DIR)115 output_file.write('-- Correctness test: Update values\n')116 output_file.write('--\n')117 output_file.write('-- UPDATE tbl5 SET col1 = -10 WHERE col1 = -1;\n')118 output_file.write('-- UPDATE tbl5 SET col1 = -20 WHERE col2 = -22;\n')119 output_file.write('-- UPDATE tbl5 SET col1 = -30 WHERE col1 = -3;\n')120 output_file.write('-- UPDATE tbl5 SET col1 = -40 WHERE col3 = -444;\n')121 output_file.write('-- UPDATE tbl5 SET col1 = -50 WHERE col1 = -5;\n')122 output_file.write('--\n')123 output_file.write('u1=select(db1.tbl5.col1,-1,0)\n')124 output_file.write('relational_update(db1.tbl5.col1,u1,-10)\n')125 output_file.write('u2=select(db1.tbl5.col2,-22,-21)\n')126 output_file.write('relational_update(db1.tbl5.col1,u2,-20)\n')127 output_file.write('u3=select(db1.tbl5.col1,-3,-2)\n')128 output_file.write('relational_update(db1.tbl5.col1,u3,-30)\n')129 output_file.write('u4=select(db1.tbl5.col3,-444,-443)\n')130 output_file.write('relational_update(db1.tbl5.col1,u4,-40)\n')131 output_file.write('u5=select(db1.tbl5.col1,-5,-4)\n')132 output_file.write('relational_update(db1.tbl5.col1,u5,-50)\n')133 output_file.write('shutdown\n')134 # update dataTable135 dfSelectMaskEq = dataTable['col1'] == -1136 dataTable.loc[dfSelectMaskEq,'col1']=-10137 dfSelectMaskEq = dataTable['col2'] == -22138 dataTable.loc[dfSelectMaskEq,'col1']=-20139 140 dfSelectMaskEq = dataTable['col1'] == -3141 dataTable.loc[dfSelectMaskEq,'col1']=-30142 143 dfSelectMaskEq = dataTable['col3'] == -444144 dataTable.loc[dfSelectMaskEq,'col1']=-40145 146 dfSelectMaskEq = dataTable['col1'] == -5147 dataTable.loc[dfSelectMaskEq,'col1']=-50148 # no expected results149 data_gen_utils.closeFileHandles(output_file, exp_output_file)150 return dataTable151def createTest41(dataTable):152 output_file, exp_output_file = data_gen_utils.openFileHandles(41, TEST_DIR=TEST_BASE_DIR)153 selectValLess = np.random.randint(-200, -100)154 selectValGreater = np.random.randint(10, 100)155 output_file.write('-- Correctness test: Run query after inserts and updates\n')156 output_file.write('--\n')157 output_file.write('-- SELECT col1 FROM tbl5 WHERE col2 >= {} AND col2 < {};\n'.format(selectValLess, selectValGreater))158 output_file.write('--\n')159 output_file.write('s1=select(db1.tbl5.col2,{},{})\n'.format(selectValLess, selectValGreater))160 output_file.write('f1=fetch(db1.tbl5.col1,s1)\n')161 output_file.write('print(f1)\n')162 # generate expected results163 dfSelectMask = (dataTable['col2'] >= selectValLess) & (dataTable['col2'] < selectValGreater)164 output = dataTable[dfSelectMask]['col1']165 exp_output_file.write(output.to_string(header=False,index=False))166 data_gen_utils.closeFileHandles(output_file, exp_output_file)167def createTest42(dataTable):168 output_file, exp_output_file = data_gen_utils.openFileHandles(42, TEST_DIR=TEST_BASE_DIR)169 output_file.write('-- Correctness test: Delete values and run queries after inserts, updates, and deletes\n')170 output_file.write('--\n')171 output_file.write('-- DELETE FROM tbl5 WHERE col1 = -10;\n')172 output_file.write('-- DELETE FROM tbl5 WHERE col2 = -22;\n')173 output_file.write('-- DELETE FROM tbl5 WHERE col1 = -30;\n')174 output_file.write('-- DELETE FROM tbl5 WHERE col3 = -444;\n')175 output_file.write('-- DELETE FROM tbl5 WHERE col1 = -50;\n')176 output_file.write('-- SELECT col1 FROM tbl5 WHERE col2 >= -100 AND col2 < 20;\n')177 output_file.write('--\n')178 output_file.write('d1=select(db1.tbl5.col1,-10,-9)\n')179 output_file.write('relational_delete(db1.tbl5,d1)\n')180 output_file.write('d2=select(db1.tbl5.col2,-22,-21)\n')181 output_file.write('relational_delete(db1.tbl5,d2)\n')182 output_file.write('d3=select(db1.tbl5.col1,-30,-29)\n')183 output_file.write('relational_delete(db1.tbl5,d3)\n')184 output_file.write('d4=select(db1.tbl5.col3,-444,-443)\n')185 output_file.write('relational_delete(db1.tbl5,d4)\n')186 output_file.write('d5=select(db1.tbl5.col1,-50,-49)\n')187 output_file.write('relational_delete(db1.tbl5,d5)\n')188 output_file.write('s1=select(db1.tbl5.col2,-100,20)\n')189 output_file.write('f1=fetch(db1.tbl5.col1,s1)\n')190 output_file.write('print(f1)\n')191 # update dataTable192 dataTable = dataTable[dataTable.col1!=-10]193 dataTable = dataTable[dataTable.col2!=-22]194 dataTable = dataTable[dataTable.col1!=-30]195 dataTable = dataTable[dataTable.col3!=-444]196 dataTable = dataTable[dataTable.col1!=-50]197 198 dfSelectMask1=dataTable['col2']>=-100 199 dfSelectMask2=dataTable['col2']<20200 output = dataTable[dfSelectMask1 & dfSelectMask2]['col1']201 if len(output) > 0:202 exp_output_file.write(output.to_string(header=False,index=False))203 exp_output_file.write('\n')204 data_gen_utils.closeFileHandles(output_file, exp_output_file)205 return dataTable206def createRandomUpdates(dataTable, numberOfUpdates, output_file):207 dataSize = len(dataTable)208 for i in range(numberOfUpdates):209 updatePos = np.random.randint(1, dataSize-1)210 col2Val = dataTable.values[updatePos][1]211 col1Val = dataTable.values[updatePos][0]212 output_file.write('-- UPDATE tbl5 SET col1 = {} WHERE col2 = {};\n'.format(col1Val+1, col2Val))213 output_file.write('u1=select(db1.tbl5.col2,{},{})\n'.format(col2Val, col2Val+1))214 output_file.write('relational_update(db1.tbl5.col1,u1,{})\n'.format(col1Val+1))215 output_file.write('--\n')216 dfSelectMaskEq = dataTable['col2'] == col2Val217 dataTable.loc[dfSelectMaskEq,'col1']=col1Val+1218 return dataTable219def createRandomDeletes(dataTable, numberOfUpdates, output_file):220 for i in range(numberOfUpdates):221 dataSize = len(dataTable)222 updatePos = np.random.randint(1, dataSize-1)223 col1Val = dataTable.values[updatePos][0]224 output_file.write('-- DELETE FROM tbl5 WHERE col1 = {};\n'.format(col1Val))225 output_file.write('d1=select(db1.tbl5.col1,{},{})\n'.format(col1Val, col1Val+1))226 output_file.write('relational_delete(db1.tbl5,d1)\n')227 output_file.write('--\n')228 dataTable = dataTable[dataTable.col1!=col1Val]229 return dataTable230def createRandomInserts(dataTable, numberOfInserts, output_file):231 for i in range(numberOfInserts):232 col1Val = np.random.randint(0,1000)233 col2Val = np.random.randint(0,1000)234 col3Val = np.random.randint(0,10000)235 col4Val = np.random.randint(0,10000)236 output_file.write('-- INSERT INTO tbl5 VALUES ({},{},{},{});\n'.format(col1Val, col2Val, col3Val, col4Val))237 output_file.write('relational_insert(db1.tbl5,{},{},{},{})\n'.format(col1Val, col2Val, col3Val, col4Val))238 dataTable = dataTable.append({"col1":col1Val, "col2":col2Val, "col3": col3Val, "col4": col4Val}, ignore_index = True)239 output_file.write('--\n')240 return dataTable241def createRandomSelects(dataTable, numberOfQueries, output_file, exp_output_file):242 lowestVal = dataTable['col2'].min()243 highestVal = dataTable['col2'].max()244 dataSize = len(dataTable)245 for i in range(numberOfQueries):246 selectValLess = np.random.randint(lowestVal-1, highestVal-1)247 selectValGreater = np.random.randint(selectValLess, highestVal)248 output_file.write('-- SELECT col1 FROM tbl5 WHERE col2 >= {} AND col2 < {};\n'.format(selectValLess, selectValGreater))249 output_file.write('s1=select(db1.tbl5.col2,{},{})\n'.format(selectValLess, selectValGreater))250 output_file.write('f1=fetch(db1.tbl5.col1,s1)\n')251 output_file.write('print(f1)\n')252 dfSelectMaskGT = dataTable['col2'] >= selectValLess253 dfSelectMaskLT = dataTable['col2'] < selectValGreater254 output = dataTable[dfSelectMaskGT & dfSelectMaskLT]['col1']255 if len(output) > 0:256 exp_output_file.write(output.to_string(header=False,index=False))257 exp_output_file.write('\n')258 259def createTest43(dataTable):260 output_file, exp_output_file = data_gen_utils.openFileHandles(43, TEST_DIR=TEST_BASE_DIR)261 output_file.write('-- Scalability test: A large number of inserts, deletes and updates, followed by a number of queries\n')262 output_file.write('--\n')263 dataTable = createRandomInserts(dataTable, 100, output_file)264 dataTable = createRandomUpdates(dataTable, 100, output_file)265 dataTable = createRandomDeletes(dataTable, 100, output_file)266 createRandomSelects(dataTable, 5, output_file, exp_output_file)267 data_gen_utils.closeFileHandles(output_file, exp_output_file)268def generateMilestoneFiveFiles(dataSize,randomSeed=47):269 np.random.seed(randomSeed)270 dataTable = generateDataMilestone5(dataSize)271 dataTable = createTest38(dataTable)272 createTest39(dataTable, 0.1)273 dataTable = createTests40(dataTable)274 createTest41(dataTable)275 dataTable = createTest42(dataTable)276 createTest43(dataTable)277def main(argv):278 global TEST_BASE_DIR279 global DOCKER_TEST_BASE_DIR280 dataSize = int(argv[0])281 if len(argv) > 1:282 randomSeed = int(argv[1])283 else:284 randomSeed = 47285 286 if len(argv) > 2:287 TEST_BASE_DIR = argv[2]288 if len(argv) > 3:289 DOCKER_TEST_BASE_DIR = argv[3]290 generateMilestoneFiveFiles(dataSize, randomSeed=randomSeed)291if __name__ == "__main__":...
milestone2.py
Source:milestone2.py
1#!/usr/bin/python2import sys, string3from random import choice4import random5from string import ascii_lowercase6from scipy.stats import beta, uniform7import numpy as np8import struct9import pandas as pd10import data_gen_utils11# note this is the base path where we store the data files we generate12TEST_BASE_DIR = "/cs165/generated_data"13# note this is the base path that _POINTS_ to the data files we generate14DOCKER_TEST_BASE_DIR = "/cs165/staff_test"15#16# Example usage: 17# python milestone2.py 10000 42 ~/repo/cs165-docker-test-runner/test_data /cs165/staff_test18#19############################################################################20# Notes: You can generate your own scripts for generating data fairly easily by modifying this script.21#22# To test functionality and speed, run your tests first on small data. Then when you are reasonably confident that your code works, move to bigger data sizes for speed.23# 24############################################################################25def generateDataMilestone2(dataSize):26 outputFile = TEST_BASE_DIR + '/data3_batch.csv'27 header_line = data_gen_utils.generateHeaderLine('db1', 'tbl3_batch', 4)28 outputTable = pd.DataFrame(np.random.randint(0, dataSize/5, size=(dataSize, 4)), columns =['col1', 'col2', 'col3', 'col4'])29 # This is going to have many, many duplicates for large tables!!!!30 outputTable['col1'] = np.random.randint(0,1000, size = (dataSize))31 outputTable['col4'] = np.random.randint(0,10000, size = (dataSize))32 outputTable['col4'] = outputTable['col4'] + outputTable['col1']33 outputTable.to_csv(outputFile, sep=',', index=False, header=header_line, line_terminator='\n')34 return outputTable35def createTestTen():36 # prelude37 output_file, exp_output_file = data_gen_utils.openFileHandles(10, TEST_DIR=TEST_BASE_DIR)38 output_file.write('-- Load Test Data 2\n')39 output_file.write('-- Create a table to run batch queries on\n')40 output_file.write('--\n')41 # query42 output_file.write('-- Loads data from: data3_batch.csv\n')43 output_file.write('--\n')44 output_file.write('-- Create Table\n')45 output_file.write('create(tbl,"tbl3_batch",db1,4)\n')46 output_file.write('create(col,"col1",db1.tbl3_batch)\n')47 output_file.write('create(col,"col2",db1.tbl3_batch)\n')48 output_file.write('create(col,"col3",db1.tbl3_batch)\n')49 output_file.write('create(col,"col4",db1.tbl3_batch)\n')50 output_file.write('--\n')51 output_file.write('-- Load data immediately\n')52 output_file.write('load(\"'+DOCKER_TEST_BASE_DIR+'/data3_batch.csv\")\n')53 output_file.write('--\n')54 output_file.write('-- Testing that the data is durable on disk.\n')55 output_file.write('shutdown\n')56 # no expected results57 data_gen_utils.closeFileHandles(output_file, exp_output_file)58def createTestEleven(dataTable):59 # prelude and query60 output_file, exp_output_file = data_gen_utils.openFileHandles(11, TEST_DIR=TEST_BASE_DIR)61 output_file.write('--\n')62 output_file.write('-- Testing for batching queries\n')63 output_file.write('-- 2 queries with NO overlap\n')64 output_file.write('--\n')65 output_file.write('-- Query in SQL:\n')66 output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 10 AND col1 < 20;\n')67 output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 800 AND col1 < 830;\n')68 output_file.write('--\n')69 output_file.write('--\n')70 output_file.write('batch_queries()\n')71 output_file.write('s1=select(db1.tbl3_batch.col1,10,20)\n')72 output_file.write('s2=select(db1.tbl3_batch.col1,800,830)\n')73 output_file.write('batch_execute()\n')74 output_file.write('f1=fetch(db1.tbl3_batch.col4,s1)\n')75 output_file.write('f2=fetch(db1.tbl3_batch.col4,s2)\n')76 output_file.write('print(f1)\n')77 output_file.write('print(f2)\n')78 # generate expected restuls. 79 dfSelectMask1 = (dataTable['col1'] >= 10) & (dataTable['col1'] < 20)80 dfSelectMask2 = (dataTable['col1'] >= 800) & (dataTable['col1'] < 830)81 output1 = dataTable[dfSelectMask1]['col4']82 output2 = dataTable[dfSelectMask2]['col4']83 exp_output_file.write(data_gen_utils.outputPrint(output1))84 exp_output_file.write('\n\n')85 exp_output_file.write(data_gen_utils.outputPrint(output2))86 exp_output_file.write('\n')87 data_gen_utils.closeFileHandles(output_file, exp_output_file)88def createTestTwelve(dataTable):89 # prelude and query90 output_file, exp_output_file = data_gen_utils.openFileHandles(12, TEST_DIR=TEST_BASE_DIR)91 output_file.write('--\n')92 output_file.write('-- Testing for batching queries\n')93 output_file.write('-- 2 queries with partial overlap\n')94 output_file.write('--\n')95 output_file.write('-- Query in SQL:\n')96 output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 600 AND col1 < 820;\n')97 output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 800 AND col1 < 830;\n')98 output_file.write('--\n')99 output_file.write('--\n')100 output_file.write('batch_queries()\n')101 output_file.write('s1=select(db1.tbl3_batch.col1,600,820)\n')102 output_file.write('s2=select(db1.tbl3_batch.col1,800,830)\n')103 output_file.write('batch_execute()\n')104 output_file.write('f1=fetch(db1.tbl3_batch.col4,s1)\n')105 output_file.write('f2=fetch(db1.tbl3_batch.col4,s2)\n')106 output_file.write('print(f1)\n')107 output_file.write('print(f2)\n')108 # generate expected restuls. 109 dfSelectMask1 = (dataTable['col1'] >= 600) & (dataTable['col1'] < 820)110 dfSelectMask2 = (dataTable['col1'] >= 800) & (dataTable['col1'] < 830)111 output1 = dataTable[dfSelectMask1]['col4']112 output2 = dataTable[dfSelectMask2]['col4']113 exp_output_file.write(data_gen_utils.outputPrint(output1))114 exp_output_file.write('\n\n')115 exp_output_file.write(data_gen_utils.outputPrint(output2))116 exp_output_file.write('\n')117 data_gen_utils.closeFileHandles(output_file, exp_output_file)118def createTestThirteen(dataTable):119 # prelude and query120 output_file, exp_output_file = data_gen_utils.openFileHandles(13, TEST_DIR=TEST_BASE_DIR)121 output_file.write('--\n')122 output_file.write('-- Testing for batching queries\n')123 output_file.write('-- 2 queries with full overlap (subsumption)\n')124 output_file.write('--\n')125 output_file.write('-- Query in SQL:\n')126 output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 810 AND col1 < 820;\n')127 output_file.write('-- SELECT col4 FROM tbl3_batch WHERE col1 >= 800 AND col1 < 830;\n')128 output_file.write('--\n')129 output_file.write('--\n')130 output_file.write('batch_queries()\n')131 output_file.write('s1=select(db1.tbl3_batch.col1,810,820)\n')132 output_file.write('s2=select(db1.tbl3_batch.col1,800,830)\n')133 output_file.write('batch_execute()\n')134 output_file.write('f1=fetch(db1.tbl3_batch.col4,s1)\n')135 output_file.write('f2=fetch(db1.tbl3_batch.col4,s2)\n')136 output_file.write('print(f1)\n')137 output_file.write('print(f2)\n')138 # generate expected restuls. 139 dfSelectMask1 = (dataTable['col1'] >= 810) & (dataTable['col1'] < 820)140 dfSelectMask2 = (dataTable['col1'] >= 800) & (dataTable['col1'] < 830)141 output1 = dataTable[dfSelectMask1]['col4']142 output2 = dataTable[dfSelectMask2]['col4']143 exp_output_file.write(data_gen_utils.outputPrint(output1))144 exp_output_file.write('\n\n')145 exp_output_file.write(data_gen_utils.outputPrint(output2))146 exp_output_file.write('\n')147 data_gen_utils.closeFileHandles(output_file, exp_output_file)148def createTestFourteen(dataTable):149 # prelude and query150 output_file, exp_output_file = data_gen_utils.openFileHandles(14, TEST_DIR=TEST_BASE_DIR)151 output_file.write('--\n')152 output_file.write('-- Testing for batching queries\n')153 output_file.write('-- Queries with no overlap\n')154 output_file.write('--\n')155 output_file.write('-- Query in SQL:\n')156 output_file.write('-- 10 Queries of the type:\n')157 output_file.write('-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n')158 output_file.write('--\n')159 output_file.write('--\n')160 output_file.write('batch_queries()\n')161 for i in range(10):162 output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format(i, (1000 * i), (1000 * i) + 30))163 output_file.write('batch_execute()\n')164 for i in range(10):165 output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i,i))166 for i in range(10):167 output_file.write('print(f{})\n'.format(i))168 #generate expected results169 for i in range(10):170 dfSelectMask = (dataTable['col4'] >= (1000 * i)) & (dataTable['col4'] < ((1000 * i) + 30))171 output = dataTable[dfSelectMask]['col1']172 exp_output_file.write(data_gen_utils.outputPrint(output))173 exp_output_file.write('\n\n')174 data_gen_utils.closeFileHandles(output_file, exp_output_file)175def createTestFifteen(dataTable):176 # prelude and queryDOCKER_TEST_BASE_DIR177 output_file, exp_output_file = data_gen_utils.openFileHandles(15, TEST_DIR=TEST_BASE_DIR)178 output_file.write('--\n')179 output_file.write('-- Testing for batching queries\n')180 output_file.write('-- Queries with full overlap (subsumption)\n')181 output_file.write('--\n')182 randomVal = np.random.randint(1000,9900)183 output_file.write('-- Query in SQL:\n')184 output_file.write('-- 10 Queries of the type:\n')185 output_file.write('-- SELECT col1 FROM tbl3_batch WHERE col4 >= _ AND col4 < _;\n')186 output_file.write('--\n')187 output_file.write('--\n')188 output_file.write('batch_queries()\n')189 for i in range(10):190 output_file.write('s{}=select(db1.tbl3_batch.col4,{},{})\n'.format(i, randomVal + (2 * i), randomVal + 60 - (2 * i)))191 output_file.write('batch_execute()\n')192 for i in range(10):193 output_file.write('f{}=fetch(db1.tbl3_batch.col1,s{})\n'.format(i,i))194 for i in range(10):195 output_file.write('print(f{})\n'.format(i))196 #generate expected results197 for i in range(10):198 dfSelectMask = (dataTable['col4'] >= (randomVal + (2 * i))) & (dataTable['col4'] < (randomVal + 60 - (2 * i)))199 output = dataTable[dfSelectMask]['col1']200 exp_output_file.write(data_gen_utils.outputPrint(output))201 exp_output_file.write('\n\n')202 data_gen_utils.closeFileHandles(output_file, exp_output_file)203def createTests16And17(dataTable, dataSize):204 # 1 / 1000 tuples should qualify on average. This is so that most time is spent on scans & not fetches or prints205 offset = np.max([1, int(dataSize/5000)])206 query_starts = np.random.randint(0,(dataSize/8), size = (100))207 output_file16, exp_output_file16 = data_gen_utils.openFileHandles(16, TEST_DIR=TEST_BASE_DIR)208 output_file17, exp_output_file17 = data_gen_utils.openFileHandles(17, TEST_DIR=TEST_BASE_DIR)209 output_file16.write('--\n')210 output_file16.write('-- Control timing for without batching\n')211 output_file16.write('-- Queries for 16 and 17 are identical.\n')212 output_file16.write('-- Query in SQL:\n')213 output_file16.write('-- 100 Queries of the type:\n')214 output_file16.write('-- SELECT col3 FROM tbl3_batch WHERE col2 >= _ AND col2 < _;\n')215 output_file16.write('--\n')216 output_file17.write('--\n')217 output_file17.write('-- Same queries with batching\n')218 output_file17.write('-- Queries for 16 and 17 are identical.\n')219 output_file17.write('--\n')220 output_file17.write('batch_queries()\n')221 for i in range(100):222 output_file16.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format(i, query_starts[i], query_starts[i] + offset))223 output_file17.write('s{}=select(db1.tbl3_batch.col2,{},{})\n'.format(i, query_starts[i], query_starts[i] + offset))224 output_file17.write('batch_execute()\n')225 for i in range(100):226 output_file16.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format(i,i))227 output_file17.write('f{}=fetch(db1.tbl3_batch.col3,s{})\n'.format(i,i))228 for i in range(100):229 output_file16.write('print(f{})\n'.format(i))230 output_file17.write('print(f{})\n'.format(i))231 # generate expected results232 for i in range(100):233 dfSelectMask = (dataTable['col2'] >= query_starts[i]) & ((dataTable['col2'] < (query_starts[i] + offset)))234 output = dataTable[dfSelectMask]['col3']235 exp_output_file16.write(data_gen_utils.outputPrint(output))236 exp_output_file16.write('\n\n')237 exp_output_file17.write(data_gen_utils.outputPrint(output))238 exp_output_file17.write('\n\n')239 data_gen_utils.closeFileHandles(output_file16, exp_output_file16)240 data_gen_utils.closeFileHandles(output_file17, exp_output_file17)241def generateMilestoneTwoFiles(dataSize, randomSeed):242 np.random.seed(randomSeed)243 dataTable = generateDataMilestone2(dataSize) 244 createTestTen()245 createTestEleven(dataTable)246 createTestTwelve(dataTable)247 createTestThirteen(dataTable)248 createTestFourteen(dataTable)249 createTestFifteen(dataTable)250 createTests16And17(dataTable, dataSize)251def main(argv):252 global TEST_BASE_DIR253 global DOCKER_TEST_BASE_DIR254 dataSize = int(argv[0])255 if len(argv) > 1:256 randomSeed = int(argv[1])257 else:258 randomSeed = 47259 # override the base directory for where to output test related files260 if len(argv) > 2:261 TEST_BASE_DIR = argv[2]262 if len(argv) > 3:263 DOCKER_TEST_BASE_DIR = argv[3]264 generateMilestoneTwoFiles(dataSize, randomSeed)265if __name__ == "__main__":...
h52vtp.py
Source:h52vtp.py
1"""2 Convert h5 files to vtp files in VTK XML format that can be opened by ParaView.3 The data type of the vtp file is "vtkPolyData", each PolyData piece specifies a set4 of points and cells independently from the other pieces. The points are described5 explicitly by the Points element. The cells are described explicitly by the Verts,6 Lines, Strips, and Polys elements.7 <VTKFile type="PolyData" ...>8 <PolyData>9 <Piece NumberOfPoints="#" NumberOfVerts="#" NumberOfLines="#"10 NumberOfStrips="#" NumberOfPolys="#">11 <PointData>...</PointData>12 <CellData>...</CellData>13 <Points>...</Points>14 <Verts>...</Verts>15 <Lines>...</Lines>16 <Strips>...</Strips>17 <Polys>...</Polys>18 </Piece>19 </PolyData>20 </VTKFile>21"""22import math23import argparse24import h5py25import numpy as np26from scipy import interpolate27def h5_to_vtp(surf_file, surf_name='train_loss', log=False, zmax=-1, interp=-1):28 # set this to True to generate points29 show_points = False30 # set this to True to generate polygons31 show_polys = True32 f = h5py.File(surf_file, 'r')33 [xcoordinates, ycoordinates] = np.meshgrid(f['xcoordinates'][:], f['ycoordinates'][:][:])34 vals = f[surf_name]35 x_array = xcoordinates[:].ravel()36 y_array = ycoordinates[:].ravel()37 z_array = vals[:].ravel()38 # Interpolate the resolution up to the desired amount39 if interp > 0:40 m = interpolate.interp2d(xcoordinates[0, :], ycoordinates[:, 0], vals, kind='cubic')41 x_array = np.linspace(min(x_array), max(x_array), interp)42 y_array = np.linspace(min(y_array), max(y_array), interp)43 z_array = m(x_array, y_array).ravel()44 x_array, y_array = np.meshgrid(x_array, y_array)45 x_array = x_array.ravel()46 y_array = y_array.ravel()47 vtp_file = surf_file + "_" + surf_name48 if zmax > 0:49 z_array[z_array > zmax] = zmax50 vtp_file += "_zmax=" + str(zmax)51 if log:52 z_array = np.log(z_array + 0.1)53 vtp_file += "_log"54 vtp_file += ".vtp"55 print("Here's your output file:{}".format(vtp_file))56 number_points = len(z_array)57 print("number_points = {} points".format(number_points))58 matrix_size = int(math.sqrt(number_points))59 print("matrix_size = {} x {}".format(matrix_size, matrix_size))60 poly_size = matrix_size - 161 print("poly_size = {} x {}".format(poly_size, poly_size))62 number_polys = poly_size * poly_size63 print("number_polys = {}".format(number_polys))64 min_value_array = [min(x_array), min(y_array), min(z_array)]65 max_value_array = [max(x_array), max(y_array), max(z_array)]66 min_value = min(min_value_array)67 max_value = max(max_value_array)68 averaged_z_value_array = []69 poly_count = 070 for column_count in range(poly_size):71 stride_value = column_count * matrix_size72 for row_count in range(poly_size):73 temp_index = stride_value + row_count74 averaged_z_value = (z_array[temp_index] + z_array[temp_index + 1] +75 z_array[temp_index + matrix_size] +76 z_array[temp_index + matrix_size + 1]) / 4.077 averaged_z_value_array.append(averaged_z_value)78 poly_count += 179 avg_min_value = min(averaged_z_value_array)80 avg_max_value = max(averaged_z_value_array)81 output_file = open(vtp_file, 'w')82 output_file.write('<VTKFile type="PolyData" version="1.0" byte_order="LittleEndian" header_type="UInt64">\n')83 output_file.write(' <PolyData>\n')84 if (show_points and show_polys):85 output_file.write(86 ' <Piece NumberOfPoints="{}" NumberOfVerts="{}" NumberOfLines="0" NumberOfStrips="0" NumberOfPolys="{}">\n'.format(87 number_points, number_points, number_polys))88 elif (show_polys):89 output_file.write(90 ' <Piece NumberOfPoints="{}" NumberOfVerts="0" NumberOfLines="0" NumberOfStrips="0" NumberOfPolys="{}">\n'.format(91 number_points, number_polys))92 else:93 output_file.write(94 ' <Piece NumberOfPoints="{}" NumberOfVerts="{}" NumberOfLines="0" NumberOfStrips="0" NumberOfPolys="">\n'.format(95 number_points, number_points))96 # <PointData>97 output_file.write(' <PointData>\n')98 output_file.write(99 ' <DataArray type="Float32" Name="zvalue" NumberOfComponents="1" format="ascii" RangeMin="{}" RangeMax="{}">\n'.format(100 min_value_array[2], max_value_array[2]))101 for vertexcount in range(number_points):102 if (vertexcount % 6) is 0:103 output_file.write(' ')104 output_file.write('{}'.format(z_array[vertexcount]))105 if (vertexcount % 6) is 5:106 output_file.write('\n')107 else:108 output_file.write(' ')109 if (vertexcount % 6) is not 5:110 output_file.write('\n')111 output_file.write(' </DataArray>\n')112 output_file.write(' </PointData>\n')113 # <CellData>114 output_file.write(' <CellData>\n')115 if (show_polys and not show_points):116 output_file.write(117 ' <DataArray type="Float32" Name="averaged zvalue" NumberOfComponents="1" format="ascii" RangeMin="{}" RangeMax="{}">\n'.format(118 avg_min_value, avg_max_value))119 for vertexcount in range(number_polys):120 if (vertexcount % 6) is 0:121 output_file.write(' ')122 output_file.write('{}'.format(averaged_z_value_array[vertexcount]))123 if (vertexcount % 6) is 5:124 output_file.write('\n')125 else:126 output_file.write(' ')127 if (vertexcount % 6) is not 5:128 output_file.write('\n')129 output_file.write(' </DataArray>\n')130 output_file.write(' </CellData>\n')131 # <Points>132 output_file.write(' <Points>\n')133 output_file.write(134 ' <DataArray type="Float32" Name="Points" NumberOfComponents="3" format="ascii" RangeMin="{}" RangeMax="{}">\n'.format(135 min_value, max_value))136 for vertexcount in range(number_points):137 if (vertexcount % 2) is 0:138 output_file.write(' ')139 output_file.write('{} {} {}'.format(x_array[vertexcount], y_array[vertexcount], z_array[vertexcount]))140 if (vertexcount % 2) is 1:141 output_file.write('\n')142 else:143 output_file.write(' ')144 if (vertexcount % 2) is not 1:145 output_file.write('\n')146 output_file.write(' </DataArray>\n')147 output_file.write(' </Points>\n')148 # <Verts>149 output_file.write(' <Verts>\n')150 output_file.write(151 ' <DataArray type="Int64" Name="connectivity" format="ascii" RangeMin="0" RangeMax="{}">\n'.format(152 number_points - 1))153 if (show_points):154 for vertexcount in range(number_points):155 if (vertexcount % 6) is 0:156 output_file.write(' ')157 output_file.write('{}'.format(vertexcount))158 if (vertexcount % 6) is 5:159 output_file.write('\n')160 else:161 output_file.write(' ')162 if (vertexcount % 6) is not 5:163 output_file.write('\n')164 output_file.write(' </DataArray>\n')165 output_file.write(166 ' <DataArray type="Int64" Name="offsets" format="ascii" RangeMin="1" RangeMax="{}">\n'.format(167 number_points))168 if (show_points):169 for vertexcount in range(number_points):170 if (vertexcount % 6) is 0:171 output_file.write(' ')172 output_file.write('{}'.format(vertexcount + 1))173 if (vertexcount % 6) is 5:174 output_file.write('\n')175 else:176 output_file.write(' ')177 if (vertexcount % 6) is not 5:178 output_file.write('\n')179 output_file.write(' </DataArray>\n')180 output_file.write(' </Verts>\n')181 # <Lines>182 output_file.write(' <Lines>\n')183 output_file.write(184 ' <DataArray type="Int64" Name="connectivity" format="ascii" RangeMin="0" RangeMax="{}">\n'.format(185 number_polys - 1))186 output_file.write(' </DataArray>\n')187 output_file.write(188 ' <DataArray type="Int64" Name="offsets" format="ascii" RangeMin="1" RangeMax="{}">\n'.format(189 number_polys))190 output_file.write(' </DataArray>\n')191 output_file.write(' </Lines>\n')192 # <Strips>193 output_file.write(' <Strips>\n')194 output_file.write(195 ' <DataArray type="Int64" Name="connectivity" format="ascii" RangeMin="0" RangeMax="{}">\n'.format(196 number_polys - 1))197 output_file.write(' </DataArray>\n')198 output_file.write(199 ' <DataArray type="Int64" Name="offsets" format="ascii" RangeMin="1" RangeMax="{}">\n'.format(200 number_polys))201 output_file.write(' </DataArray>\n')202 output_file.write(' </Strips>\n')203 # <Polys>204 output_file.write(' <Polys>\n')205 output_file.write(206 ' <DataArray type="Int64" Name="connectivity" format="ascii" RangeMin="0" RangeMax="{}">\n'.format(207 number_polys - 1))208 if (show_polys):209 polycount = 0210 for column_count in range(poly_size):211 stride_value = column_count * matrix_size212 for row_count in range(poly_size):213 temp_index = stride_value + row_count214 if (polycount % 2) is 0:215 output_file.write(' ')216 output_file.write('{} {} {} {}'.format(temp_index, (temp_index + 1), (temp_index + matrix_size + 1),217 (temp_index + matrix_size)))218 if (polycount % 2) is 1:219 output_file.write('\n')220 else:221 output_file.write(' ')222 polycount += 1223 if (polycount % 2) is 1:224 output_file.write('\n')225 output_file.write(' </DataArray>\n')226 output_file.write(227 ' <DataArray type="Int64" Name="offsets" format="ascii" RangeMin="1" RangeMax="{}">\n'.format(228 number_polys))229 if (show_polys):230 for polycount in range(number_polys):231 if (polycount % 6) is 0:232 output_file.write(' ')233 output_file.write('{}'.format((polycount + 1) * 4))234 if (polycount % 6) is 5:235 output_file.write('\n')236 else:237 output_file.write(' ')238 if (polycount % 6) is not 5:239 output_file.write('\n')240 output_file.write(' </DataArray>\n')241 output_file.write(' </Polys>\n')242 output_file.write(' </Piece>\n')243 output_file.write(' </PolyData>\n')244 output_file.write('</VTKFile>\n')245 output_file.write('')246 output_file.close()247 print("Done with file:{}".format(vtp_file))248if __name__ == '__main__':249 parser = argparse.ArgumentParser(250 description='Convert h5 file to XML-based VTK file that can be opened with ParaView')251 parser.add_argument('--surf_file', '-f', default='', help='The h5 file that contains surface values')252 parser.add_argument('--surf_name', default='train_loss',253 help='The type of surface to plot: train_loss | test_loss | train_acc | test_acc ')254 parser.add_argument('--zmax', default=-1, type=float, help='Maximum z value to map')255 parser.add_argument('--interp', default=-1, type=int,256 help='Interpolate the surface to this resolution (1000 recommended)')257 parser.add_argument('--log', action='store_true', default=False, help='log scale')258 args = parser.parse_args()...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!