Best Python code snippet using autotest_python
populate_from_mentions.py
Source:populate_from_mentions.py
1'''2Populate the staging area graph from the software mention imported documents3'''4import os5import json6from arango import ArangoClient7from populate_staging_area import StagingArea8import logging9import logging.handlers10from tqdm import tqdm11def populate(stagingArea):12 database_name_mentions = "mentions"13 print("Populate staging area from software mention import")14 if not stagingArea.sys_db.has_database(database_name_mentions):15 logging.error("Software mention import database does not exist: you need to first import the software mention resources")16 stagingArea.db = stagingArea.client.db(database_name_mentions, username=stagingArea.config['arangodb']['arango_user'], password=stagingArea.config['arangodb']['arango_pwd'])17 populate_mentions(stagingArea, stagingArea.get_source(database_name_mentions))18def populate_mentions(stagingArea, source_ref):19 '''20 Software mentions at this stage are all represented as independent software entity (very light-weight and with 21 the few extracted attributes). The information related to the mention in context are represented with the edge 22 relation "citations", with a "quotes work" (P6166) property to store the software (=work) mentioned and "quotation" 23 (P7081) for storing the whole context of mention (the target sentence). 24 Other relations built are funding (via Crossref funders) and references. 25 '''26 # given the possible number of documents, we use pagination rather than a large ttl 27 cursor = stagingArea.db.aql.execute(28 'FOR doc IN documents RETURN doc', full_count=True29 )30 stats = cursor.statistics()31 total_results = 032 if 'fullCount' in stats:33 total_results = stats['fullCount']34 page_size = 100035 nb_pages = (total_results // page_size)+136 print("entries:", total_results, ", nb. steps:", nb_pages)37 for page_rank in tqdm(range(0, nb_pages)):38 cursor = stagingArea.db.aql.execute(39 'FOR doc IN documents LIMIT ' + str(page_rank*page_size) + ', ' + str(page_size) + ' RETURN doc', ttl=360040 )41 for document in cursor:42 # document as document vertex collection43 local_doc = stagingArea.init_entity_from_template("document", source=source_ref)44 if local_doc is None:45 raise("cannot init document entity from default template")46 local_doc['_key'] = document["_key"]47 local_doc['_id'] = "documents/" + document["_key"]48 # document metadata stays as they are (e.g. full CrossRef record)49 local_doc['metadata'] = document['metadata']50 if "DOI" in document['metadata']:51 local_doc['index_doi'] = document['metadata']['DOI'].lower()52 # unfortunately the casing of the key DOI field is unreliable53 if "doi" in document['metadata']:54 local_doc['index_doi'] = document['metadata']['doi'].lower()55 if "title" in document['metadata'] and len(document['metadata']['title'])>0 and 'author' in document['metadata'] and len(document['metadata']['author'])>0:56 local_title = document['metadata']['title']57 local_author = None58 if 'author' in document['metadata']:59 # we normally always have an author field60 local_author = document['metadata']['author']61 if local_author != None and local_title != None:62 local_title_author_key = stagingArea.title_author_key(local_title, local_author)63 if local_title_author_key != None and len(local_title_author_key)>0:64 local_doc['index_title_author'] = local_title_author_key65 if not stagingArea.staging_graph.has_vertex(local_doc["_id"]):66 stagingArea.staging_graph.insert_vertex("documents", local_doc)67 # there are two relations to be built at this level:68 # - authorship based on "author" metadata field (edge "actor" from "persons" to "documents")69 # -> as we consider here text-mined documents, we might better not important every authors as entities at this stage70 # and keep only authors from key references cited together with software in mention71 # - funding based on crossref "funder" metadata field (edge "funding" from "organizations" to "documents") 72 '''73 if 'funder' in document['metadata'] and len(document['metadata']['funder'])>0:74 for funder in document['metadata']['funder']:75 # in WorkFunder, funder is defined by 'name', a 'DOI' (uppercase here, related to the funder), 76 # 'country' (conversion from medline/pubmed) 77 # funding is defined by 'award' [array] (optional)78 # the DOI here contains thefunder id and it should make possible to get a full CrossRef funder 79 # entry /funders/{id}80 # DOI 10.13039/100004440, funder id is 10000444081 # https://api.crossref.org/funders/100004440/ -> Wellcome82 # apparently 10.13039/ is the prefix for all funders? 83 funderID = None84 if "DOI" in funder:85 funderDOI = funder['DOI']86 ind = funderDOI.find('/')87 if ind != -1:88 funderID = funderDOI[ind+1:]89 if funderID == None:90 continue91 # full funder record at Crossref92 # Crossref funder ID is P315393 # create an organization entity, if not already present with this funder identifier via P315394 95 replaced = False96 # we check if the organization is not already in the KB, and aggregate/merge with this existing one if yes97 cursor = stagingArea.db.aql.execute(98 'FOR doc IN organizations FILTER ['+funderID+'] ANY IN doc["claims"]["P3153"][*]["value"] LIMIT 1 RETURN doc'99 )100 if cursor.count()>0:101 existing_organization = cursor.next()102 existing_organization = stagingArea.aggregate_with_merge(existing_organization, organization)103 #del existing_software["_rev"]104 #print(existing_software)105 stagingArea.staging_graph.update_vertex(existing_organization)106 organization = existing_organization107 replaced = True108 if not replaced:109 # organization as document vertex collection110 local_org = stagingArea.init_entity_from_template("organization", source=source_ref)111 if local_org is None:112 raise("cannot init organization entity from default template") 113 organization["labels"] = org_name114 local_org_id = stagingArea.get_uid()115 organization["_key"] = local_org_id116 organization["_id"] = "organizations/" + organization["_key"]117 stagingArea.staging_graph.insert_vertex("organizations", organization)118 # funding relation119 relation = {}120 relation["claims"] = {}121 relation["claims"]['P8324'] = [ {"references": [ source_ref ] } ]122 relation["_from"] = organization["_id"]123 relation["_to"] = "documents/" + document["_key"]124 relation["_id"] = "funding/" + organization["_key"] + "_" + document["_key"]125 stagingArea.staging_graph.insert_edge("funding", edge=relation)126 '''127 # we process all the annotations from this document, which makes possible some (modest) optimizations128 cursor_annot = stagingArea.db.aql.execute(129 "FOR doc IN annotations FILTER doc.document.$oid == '" + local_doc['_key'] + "' RETURN doc", ttl=60130 )131 software_name_processed = {}132 index_annot = 0133 for annotation in cursor_annot:134 # annotations from the same document lead to a set of new software entity (to be further disambiguated)135 # software with the same name in the same document are considered as the same entity and what is 136 # extracted for each annotation is aggregated in this single entity137 new_entity = False138 if not annotation["software-name"]["normalizedForm"] in software_name_processed:139 # new entity140 software = stagingArea.init_entity_from_template("software", source=source_ref)141 if software is None:142 raise("cannot init software entity from default template")143 software['labels'] = annotation["software-name"]["normalizedForm"]144 new_entity = True145 else:146 # otherwise get the existing entity for this software147 software = software_name_processed[annotation["software-name"]["normalizedForm"]]148 # version info (P348)149 if "version" in annotation and not check_value_exists(software["claims"], "P348", annotation["version"]):150 local_value = {}151 local_value["value"] = annotation["version"]["normalizedForm"]152 local_value["datatype"] = "string"153 local_value["references"] = []154 local_value["references"].append(source_ref)155 if not "P348" in software["claims"]:156 software["claims"]["P348"] = []157 software["claims"]["P348"].append(local_value)158 changed = True159 if "publisher" in annotation and not check_value_exists(software["claims"], "P123", annotation["publisher"]):160 # publisher (P123) 161 local_value = {}162 local_value["value"] = annotation["publisher"]["normalizedForm"]163 local_value["datatype"] = "string"164 local_value["references"] = []165 local_value["references"].append(source_ref)166 if not "P123" in software["claims"]:167 software["claims"]["P123"] = []168 software["claims"]["P123"].append(local_value)169 changed = True170 if "url" in annotation and not check_value_exists(software["claims"], "P854", annotation["url"]):171 # reference URL (P854) 172 local_value = {}173 local_value["value"] = annotation["url"]["normalizedForm"]174 local_value["value"] = local_value["value"].replace(" ", "")175 local_value["datatype"] = "url"176 local_value["references"] = []177 local_value["references"].append(source_ref)178 if not "P854" in software["claims"]:179 software["claims"]["P854"] = []180 software["claims"]["P854"].append(local_value)181 changed = True182 # the predicted wikidata entity and Wikipedia english page for the software are represented with property 183 # "said to be the same" (P460), which is defined as "said to be the same as that item, but it's uncertain or disputed"184 if "wikipediaExternalRef" in annotation and not check_value_exists(software["claims"], "P460", annotation["wikipediaExternalRef"]):185 # imported from Wikimedia project (P143) 186 local_value = {}187 local_value["value"] = "https://en.wikipedia.org/?curid=" + str(annotation["wikipediaExternalRef"])188 local_value["datatype"] = "url"189 local_value["references"] = []190 local_value["references"].append(source_ref)191 if not "P460" in software["claims"]:192 software["claims"]["P460"] = []193 software["claims"]["P460"].append(local_value)194 changed = True195 196 if "wikidataId" in annotation and not check_value_exists(software["claims"], "P460", annotation["wikidataId"]):197 local_value = {}198 local_value["value"] = annotation["wikidataId"]199 local_value["datatype"] = "wikibase-item"200 local_value["references"] = []201 local_value["references"].append(source_ref)202 if not "P460" in software["claims"]:203 software["claims"]["P460"] = []204 software["claims"]["P460"].append(local_value)205 software["index_entity"] = annotation["wikidataId"]206 changed = True207 # bibliographical references associated to the software could be aggregated here, possibly with count information208 # -> to be reviewed209 if new_entity:210 local_id = stagingArea.get_uid()211 software['_key'] = local_id212 software['_id'] = "software/" + local_id213 stagingArea.staging_graph.insert_vertex("software", software)214 software_name_processed[annotation["software-name"]["normalizedForm"]] = software215 elif changed:216 stagingArea.staging_graph.update_vertex(software)217 # relations to be built at this level:218 # - citations based on software mention in a document, which will include context sentence, coordinates, etc.219 # here document are fully specified (with PDF hash, page coordinates, etc.) because it has been "text-mined"220 # - references, which relate a software or a document (where the reference is expressed) to a document 221 # (and less frequently to a software), the document here can be simply a set of bibliographical metadata or222 # a fully specified document223 relation = stagingArea.init_entity_from_template("citation", source=source_ref)224 if relation is None:225 raise("cannot init citation relation from default template")226 # store original software name string - always present normally227 # we use property P6166 ("quote work", here the work is the mentioned software)228 if "software-name" in annotation:229 local_value = {}230 local_value["value"] = annotation["software-name"]["normalizedForm"]231 local_value["datatype"] = "string"232 local_value["references"] = []233 local_value["references"].append(source_ref)234 235 # bounding box in qualifier236 # relevant property is "relative position within image" (P2677) 237 if "boundingBoxes" in annotation["software-name"]:238 local_qualifier = {}239 local_qualifier_value = {}240 local_qualifier_value["value"] = annotation["software-name"]["boundingBoxes"]241 local_qualifier_value["datatype"] = "string"242 local_qualifier["P2677"] = local_qualifier_value243 local_value["qualifiers"] = []244 local_value["qualifiers"].append(local_qualifier)245 relation["claims"]["P6166"] = []246 relation["claims"]["P6166"].append(local_value)247 # store all original attributes in this citation relation, as they are in this annotation248 # version info (P348)249 if "version" in annotation:250 local_value = {}251 local_value["value"] = annotation["version"]["normalizedForm"]252 local_value["datatype"] = "string"253 local_value["references"] = []254 local_value["references"].append(source_ref)255 256 # bounding box in qualifier257 # relevant property is "relative position within image" (P2677) 258 if "boundingBoxes" in annotation["version"]:259 local_qualifier = {}260 local_qualifier_value = {}261 local_qualifier_value["value"] = annotation["version"]["boundingBoxes"]262 local_qualifier_value["datatype"] = "string"263 local_qualifier["P2677"] = local_qualifier_value264 local_value["qualifiers"] = []265 local_value["qualifiers"].append(local_qualifier)266 relation["claims"]["P348"] = []267 relation["claims"]["P348"].append(local_value)268 if "publisher" in annotation:269 # publisher (P123) 270 local_value = {}271 local_value["value"] = annotation["publisher"]["normalizedForm"]272 local_value["datatype"] = "string"273 local_value["references"] = []274 local_value["references"].append(source_ref)275 # bounding box in qualifier276 # relevant property is "relative position within image" (P2677) 277 if "boundingBoxes" in annotation["publisher"]:278 local_qualifier = {}279 local_qualifier_value = {}280 local_qualifier_value["value"] = annotation["publisher"]["boundingBoxes"]281 local_qualifier_value["datatype"] = "string"282 local_qualifier["P2677"] = local_qualifier_value283 local_value["qualifiers"] = []284 local_value["qualifiers"].append(local_qualifier)285 relation["claims"]["P123"] = []286 relation["claims"]["P123"].append(local_value)287 if "url" in annotation:288 # reference URL (P854) 289 local_value = {}290 local_value["value"] = annotation["url"]["normalizedForm"]291 local_value["datatype"] = "url"292 local_value["references"] = []293 local_value["references"].append(source_ref)294 # bounding box in qualifier295 # relevant property is "relative position within image" (P2677) 296 if "boundingBoxes" in annotation["url"]:297 local_qualifier = {}298 local_qualifier_value = {}299 local_qualifier_value["value"] = annotation["url"]["boundingBoxes"]300 local_qualifier_value["datatype"] = "string"301 local_qualifier["P2677"] = local_qualifier_value302 local_value["qualifiers"] = []303 local_value["qualifiers"].append(local_qualifier)304 relation["claims"]["P854"] = []305 relation["claims"]["P854"].append(local_value)306 if "wikipediaExternalRef" in annotation:307 # imported from Wikimedia project (P143) 308 local_value = {}309 local_value["value"] = annotation["wikipediaExternalRef"]310 local_value["datatype"] = "url"311 local_value["references"] = []312 local_value["references"].append(source_ref)313 relation["claims"]["P460"] = []314 relation["claims"]["P460"].append(local_value)315 316 if "wikidataId" in annotation:317 local_value = {}318 local_value["value"] = annotation["wikidataId"]319 local_value["datatype"] = "wikibase-item"320 local_value["references"] = []321 local_value["references"].append(source_ref)322 relation["claims"]["P460"] = []323 relation["claims"]["P460"].append(local_value)324 if "context" in annotation: 325 # quotation or excerpt (P7081) 326 local_value = {}327 local_value["value"] = annotation["context"]328 local_value["datatype"] = "string"329 local_value["references"] = []330 local_value["references"].append(source_ref)331 # bounding box in qualifier332 # relevant property is "relative position within image" (P2677) 333 # note: currently bounding box for the context sentence not outputted by the software-mention module, but334 # we can load it if present for the future335 if "boundingBoxes" in annotation:336 local_qualifier = {}337 local_qualifier_value = {}338 local_qualifier_value["value"] = annotation["boundingBoxes"]339 local_qualifier_value["datatype"] = "string"340 local_qualifier["P2677"] = local_qualifier_value341 local_value["qualifiers"] = []342 local_value["qualifiers"].append(local_qualifier)343 relation["claims"]["P7081"] = []344 relation["claims"]["P7081"].append(local_value)345 relation["_from"] = "documents/" + local_doc["_key"]346 relation["_to"] = "software/" + software['_key']347 relation["_key"] = local_doc["_key"] + "_" + software['_key'] + "_" + str(index_annot)348 relation["_id"] = "citations/" + relation["_key"]349 stagingArea.staging_graph.insert_edge("citations", edge=relation)350 # bibliographical reference attached to the citation context, this will be represented as 351 # a reference relation, from the citing document to the cited document, with related software information352 # in the relation353 if "references" in annotation:354 for reference in annotation["references"]:355 # store the referenced document as document vertex (it will be deduplicated in a further stage) if not already present356 referenced_document = None357 cursor_ref = stagingArea.documents.find({'_key': reference["reference_id"]["$oid"]}, skip=0, limit=1)358 if cursor_ref.count()>0:359 referenced_document = cursor_ref.next()360 if referenced_document == None:361 # this is the usual case. we create a new document entity from the extracted bibliographical reference metadata362 referenced_document = stagingArea.init_entity_from_template("document", source=source_ref)363 if referenced_document is None:364 raise("cannot init document entity from default template")365 referenced_document['_key'] = reference["reference_id"]["$oid"]366 referenced_document['_id'] = "documents/" + reference["reference_id"]["$oid"]367 # get the metadata from the mentions database368 mention_reference = stagingArea.db.collection('references').get({'_key': reference["reference_id"]["$oid"]})369 if mention_reference is None:370 logging.warning("warning: reference object indicated in an annotation does not exist, _key: " + reference["reference_id"]["$oid"])371 continue372 # document metadata stays as they are (e.g. full CrossRef record)373 referenced_document['metadata'] = stagingArea.tei2json(mention_reference['tei'])374 # DOI index375 if "DOI" in referenced_document['metadata']:376 referenced_document["index_doi"] = referenced_document['metadata']['DOI'].lower()377 # title/first author last name index378 if "title" in referenced_document['metadata'] and "author" in referenced_document['metadata']:379 local_key = stagingArea.title_author_key(referenced_document['metadata']['title'], referenced_document['metadata']['author'])380 if local_key != None:381 referenced_document["index_title_author"] = local_key382 if not stagingArea.staging_graph.has_vertex(referenced_document["_id"]):383 stagingArea.staging_graph.insert_vertex("documents", referenced_document)384 # property is "cites work" (P2860) that we can include in the citation edge385 local_value = {}386 local_value["value"] = reference["reference_id"]["$oid"]387 local_value["datatype"] = "external-id"388 local_value["references"] = []389 local_value["references"].append(source_ref)390 # bounding box in qualifier391 # relevant property is "relative position within image" (P2677) 392 # note: currently bounding box for the context sentence not outputted by the software-mention module, but393 # we can load it if present for the future394 if "boundingBoxes" in reference:395 local_qualifier = {}396 local_qualifier_value = {}397 local_qualifier_value["value"] = reference["boundingBoxes"]398 local_qualifier_value["datatype"] = "string"399 local_qualifier["P2677"] = local_qualifier_value400 local_value["qualifiers"] = []401 local_value["qualifiers"].append(local_qualifier)402 # refkey in qualifier (number of the reference in the full bibliographical section of the citing paper)403 if "refkey" in reference:404 local_qualifier = {}405 local_qualifier_value = {}406 local_qualifier_value["value"] = reference["refkey"]407 local_qualifier_value["datatype"] = "string"408 local_qualifier["PA02"] = local_qualifier_value409 if "qualifiers" not in local_value:410 local_value["qualifiers"] = []411 local_value["qualifiers"].append(local_qualifier)412 # label is the reference marker used by the citing paper as call-out to the full reference entry 413 if "label" in reference:414 local_qualifier = {}415 local_qualifier_value = {}416 local_qualifier_value["value"] = reference["label"]417 local_qualifier_value["datatype"] = "string"418 local_qualifier["PA03"] = local_qualifier_value419 if "qualifiers" not in local_value:420 local_value["qualifiers"] = []421 local_value["qualifiers"].append(local_qualifier)422 if not "P2860" in relation["claims"]:423 relation["claims"]["P2860"] = []424 relation["claims"]["P2860"].append(local_value)425 # reference relation with specific edge426 relation_ref = {}427 relation_ref["claims"] = {}428 # "P2860" property "cites work ", add software associated to the citation context,429 relation_ref["claims"]["P2860"] = []430 local_value = {}431 local_value["value"] = "software/" + software['_key']432 local_value["datatype"] = "external-id"433 local_value["references"] = []434 local_value["references"].append(source_ref)435 relation_ref["claims"]["P2860"].append(local_value)436 # we add an index to the software identifier, which will be useful when filtering the437 # references related to a given software438 relation_ref["index_software"] = "software/" + software['_key']439 relation_ref["_from"] = local_doc['_id']440 relation_ref["_to"] = referenced_document['_id']441 relation_ref["_key"] = local_doc["_key"] + "_" + referenced_document['_key'] + "_" + str(index_annot)442 relation_ref["_id"] = "references/" + relation_ref["_key"]443 if not stagingArea.staging_graph.has_edge(relation_ref["_id"]):444 stagingArea.staging_graph.insert_edge("references", edge=relation_ref)445 # update citation edge document with the added reference information446 stagingArea.staging_graph.update_edge(relation)447 index_annot += 1448def check_value_exists(claim, property_name, value):449 '''450 Check in the claim if a property is present and if the property has the given value451 '''452 if property_name in claim:453 for claim_value in claim[property_name]:454 if claim_value["value"] == value:455 return True...
populate_from_r.py
Source:populate_from_r.py
1'''2Populate the staging area graph from CRAN and rOpenSci imported documents3'''4import os5import json6import pybtex7from arango import ArangoClient8from populate_staging_area import StagingArea9import logging10import logging.handlers11from tqdm import tqdm12def populate(stagingArea):13 database_name_rOpenSci = "rOpenSci"14 database_name_cran = "CRAN" 15 print("Populate staging area from rOpenSci")16 if not stagingArea.sys_db.has_database(database_name_rOpenSci):17 print("rOpenSci import database does not exist: you need to first import rOpenSci resources")18 stagingArea.db = stagingArea.client.db(database_name_rOpenSci, username=stagingArea.config['arangodb']['arango_user'], password=stagingArea.config['arangodb']['arango_pwd'])19 packages = stagingArea.db.collection('packages')20 populate_r(stagingArea, packages, stagingArea.get_source(database_name_rOpenSci))21 print("Populate staging area from CRAN")22 if not stagingArea.sys_db.has_database(database_name_cran):23 print("CRAN import database does not exist: you need to first import CRAN resources")24 stagingArea.db = stagingArea.client.db(database_name_cran, username=stagingArea.config['arangodb']['arango_user'], password=stagingArea.config['arangodb']['arango_pwd'])25 packages = stagingArea.db.collection('packages')26 populate_r(stagingArea, packages, stagingArea.get_source(database_name_cran))27 # we set the dependencies in a second pass, having all the packages entities put in relation now set28 print("dependencies rOpenSci...")29 stagingArea.db = stagingArea.client.db(database_name_rOpenSci, username=stagingArea.config['arangodb']['arango_user'], password=stagingArea.config['arangodb']['arango_pwd'])30 packages = stagingArea.db.collection('packages')31 set_dependencies(stagingArea, packages, stagingArea.get_source(database_name_rOpenSci))32 print("dependencies CRAN...")33 stagingArea.db = stagingArea.client.db(database_name_cran, username=stagingArea.config['arangodb']['arango_user'], password=stagingArea.config['arangodb']['arango_pwd'])34 packages = stagingArea.db.collection('packages')35 set_dependencies(stagingArea, packages, stagingArea.get_source(database_name_cran))36 37def populate_r(stagingArea, collection, source_ref):38 relator_file = os.path.join("data", "resources", "relator_code_cran.json")39 if not os.path.isfile(relator_file): 40 logging.error("Error when loading relator code: " + relator_file)41 return None42 with open(relator_file) as relator_f:43 relator_code_cran = json.load(relator_f)44 cursor = stagingArea.db.aql.execute(45 'FOR doc IN packages RETURN doc', ttl=3600, full_count=True46 )47 stats = cursor.statistics()48 total_packages = 049 if 'fullCount' in stats:50 total_packages = stats['fullCount']51 pbar = tqdm(total=total_packages)52 for package in cursor:53 #print(package['Package'], "...")54 # package as software vertex collection55 software = stagingArea.init_entity_from_template("software", source=source_ref)56 if software is None:57 raise("cannot init software entity from default template")58 software['labels'] = package['Package']59 # wikidata description are short phrase, so it correspond to R package title, 60 if "Title" in package:61 software['descriptions'] = package['Title']62 # for the actual package description, there is no "content summary" property, so we introduce a field "summary"63 if 'Description' in package:64 software['summary'] = package['Description']65 #software['id'] = package['_key']66 if stagingArea.db.name == "CRAN":67 # for CRAN we don't have random ID, so we have to create one - to be consistent with MongoDB ones68 # used for most of the others sources, we use an hexa identifier of length 24 69 local_id = stagingArea.get_uid()70 software['_key'] = local_id71 software['_id'] = "software/" + local_id72 else:73 software['_key'] = package['_key']74 software['_id'] = "software/" + package['_key']75 if "git_repository" in package:76 local_value = {}77 local_value["value"] = package["git_repository"]78 local_value["datatype"] = "url"79 local_value["references"] = []80 local_value["references"].append(source_ref)81 software["claims"]["P1324"] = []82 software["claims"]["P1324"].append(local_value)83 # programming language (P277) is always R (Q206904) here84 local_value = {}85 local_value["value"] = "Q206904"86 local_value["datatype"] = "wikibase-item"87 local_value["references"] = []88 local_value["references"].append(source_ref)89 software["claims"]["P277"] = []90 software["claims"]["P277"].append(local_value)91 # copyright license is P27592 if "License" in package:93 local_value = {}94 local_value["value"] = package["License"]95 local_value["datatype"] = "string"96 local_value["references"] = []97 local_value["references"].append(source_ref)98 # this will be the object of a further disambiguation to match the license entity99 software["claims"]["P275"] = []100 software["claims"]["P275"].append(local_value)101 # version info (P348)102 if "Version" in package:103 local_value = {}104 local_value["value"] = package["Version"]105 local_value["datatype"] = "string"106 local_value["references"] = []107 local_value["references"].append(source_ref)108 software["claims"]["P348"] = []109 software["claims"]["P348"].append(local_value)110 # official website url is P856 and usuer manual/documentation url is P2078111 # for rOpenSci manual/doc is always with https://docs.ropensci.org/ prefix112 # for CRAN we have a distinct manual field usually pointing to a PDF, 113 # URL being for the official website (or git repo)114 if "Manual" in package:115 local_value = {}116 local_value["value"] = package["Manual"]117 local_value["datatype"] = "url"118 local_value["references"] = []119 local_value["references"].append(source_ref)120 software["claims"]["P2078"] = []121 software["claims"]["P2078"].append(local_value)122 if "URL" in package and len(package["URL"]) > 0:123 for url in package["URL"]:124 local_value = {}125 local_value["value"] = url126 local_value["datatype"] = "url"127 local_value["references"] = []128 local_value["references"].append(source_ref)129 if not "P2078" in software["claims"]:130 software["claims"]["P2078"] = []131 software["claims"]["P2078"].append(local_value)132 # original identifier133 local_value = {}134 local_value["value"] = package["_key"]135 local_value["datatype"] = "external-id"136 if stagingArea.db.name == "CRAN":137 software["claims"]["P5565"] = []138 software["claims"]["P5565"].append(local_value)139 else:140 software["claims"]["PA1"] = []141 software["claims"]["PA1"].append(local_value)142 replaced = False143 if stagingArea.db.name == "CRAN":144 # we check if the package is not already in the KB, and aggregate/merge with this existing one if yes145 cursor = stagingArea.software.find({'labels': package["Package"]}, skip=0, limit=1)146 if cursor.count()>0:147 existing_software = cursor.next()148 existing_software = stagingArea.aggregate_with_merge(existing_software, software)149 #del existing_software["_rev"]150 #print(existing_software)151 stagingArea.staging_graph.update_vertex(existing_software)152 software = existing_software153 replaced = True154 if not replaced:155 stagingArea.staging_graph.insert_vertex("software", software)156 maintainer = None157 if "Maintainer" in package:158 maintainer = package["Maintainer"]159 # authors 160 if "Authors@R" in package:161 for author in package["Authors@R"]:162 maintainer_consumed = process_author(stagingArea, author, software['_key'], relator_code_cran, source_ref, maintainer)163 if maintainer_consumed:164 maintainer = None165 elif "Authors" in package:166 # author field is relevant only if Authors@R is not 167 for author in package["Authors"]:168 maintainer_consumed = process_author(stagingArea, author, software['_key'], relator_code_cran, source_ref, maintainer)169 if maintainer_consumed:170 maintainer = None171 elif "Author" in package:172 # author field is relevant only if Authors@R is not 173 for author in package["Author"]:174 maintainer_consumed = process_author(stagingArea, author, software['_key'], relator_code_cran, source_ref, maintainer)175 if maintainer_consumed:176 maintainer = None177 if "References" in package:178 for reference in package["References"]:179 stagingArea.process_reference_block(package["References"], software, source_ref)180 # this will add "references" relation between the software and the referenced documents181 pbar.update(1)182 pbar.close()183 184def set_dependencies(stagingArea, collection, source_ref):185 # we use an AQL query to avoid limited life of cursor that cannot be changed otherwise186 cursor = stagingArea.db.aql.execute(187 'FOR doc IN packages RETURN doc', ttl=600188 )189 # this pass will set the dependencies190 for package in cursor:191 if not "_hard_deps" in package and not "_soft_deps" in package:192 continue193 # get the first software entry194 cursor = stagingArea.software.find({'labels': package["Package"]}, skip=0, limit=1)195 if cursor.count()>0:196 software1 = cursor.next()197 else:198 continue199 # hard dependencies are edge relations200 if "_hard_deps" in package:201 for dependency in package["_hard_deps"]:202 # relation are via the dependencies edge collection, they relate two software (or packages or libraries)203 # property is "depends on software" (P1547) 204 cursor = stagingArea.software.find({'labels': dependency["package"]}, skip=0, limit=1)205 if cursor.count()>0:206 software2 = cursor.next()207 add_dependency(stagingArea, dependency, software1, software2, source_ref, the_type="hard")208 else:209 continue210 # soft dependencies211 if "_soft_deps" in package:212 for dependency in package["_soft_deps"]:213 # relation are via the dependencies edge collection, they relate two software (or packages or libraries)214 # property is "depends on software" (P1547) 215 cursor = stagingArea.software.find({'labels': dependency["package"]}, skip=0, limit=1)216 if cursor.count()>0:217 software2 = cursor.next()218 add_dependency(stagingArea, dependency, software1, software2, source_ref, the_type="soft")219 else:220 continue 221def add_dependency(stagingArea, dependency, software1, software2, source_ref, the_type=None):222 # relation are via the dependencies edge collection, they relate two software (or packages or libraries)223 # property is "depends on software" (P1547) 224 relation = {}225 relation["claims"] = {}226 relation["claims"]["P1547"] = []227 # add version (P348) if present228 if "version" in dependency:229 local_value = {}230 local_value["value"] = dependency["version"]231 local_value["datatype"] = "string"232 local_value["references"] = []233 local_value["references"].append(source_ref)234 relation["claims"]["P348"] = []235 relation["claims"]["P348"].append(local_value)236 # indicate hard or soft dependencies, we express it as qualitfier of the P1547 property,237 # with the property "has quality" (P1552) with string value (normally it's an item value, 238 # but we have to relax somewhere to express that easily)239 local_value = {}240 local_value["value"] = the_type241 local_value["datatype"] = "string"242 local_value["references"] = []243 local_value["references"].append(source_ref)244 relation["claims"]["P1547"].append({"qualifiers": {}})245 relation["claims"]["P1547"][0]["qualifiers"]["P1552"] = []246 relation["claims"]["P1547"][0]["qualifiers"]["P1552"].append(local_value)247 relation["_from"] = software1['_id']248 # find the target dependency software with its unique package name249 relation["_to"] = software2['_id']250 relation["_key"] = software1["_key"] + "_" + software2['_key'] + "_hard"251 relation["_id"] = "dependencies/" + relation["_key"]252 if not stagingArea.staging_graph.has_edge(relation["_id"]):253 stagingArea.staging_graph.insert_edge("dependencies", edge=relation)254def process_author(stagingArea, author, software_key, relator_code_cran, source_ref, maintainer=None):255 '''256 Process an author in the Author or Author@R fields257 If the role is funder (fnd), we normally don't have a person but an organization and the relation258 should be an edge "funding".259 If the role is "copyright holder" (cph), the relation is the edge "copyrights". In this case, and 260 also observed for authorship, we could habe an organization and not always a person. 261 '''262 # check role case funder263 if "roles" in author:264 if isinstance(author["roles"], str):265 author["roles"] = [ author["roles"] ]266 if "fnd" in author["roles"]:267 # this is an organization268 org_name = None269 if 'given' in author:270 org_name = author['given']271 elif 'full_name' in author:272 org_name = author['full_name']273 if org_name == None:274 return False275 organization = stagingArea.init_entity_from_template("organization", source=source_ref)276 if organization is None:277 raise("cannot init organization entity from default template")278 279 organization["labels"] = org_name280 local_org_id = stagingArea.get_uid()281 organization["_key"] = local_org_id282 organization["_id"] = "organizations/" + organization["_key"]283 stagingArea.staging_graph.insert_vertex("organizations", organization)284 # funding relation285 relation = {}286 relation["claims"] = {}287 relation["claims"]['P8324'] = [ {"references": [ source_ref ] } ]288 relation["_from"] = organization["_id"]289 relation["_to"] = "software/" + software_key290 relation["_id"] = "funding/" + organization["_key"] + "_" + software_key291 stagingArea.staging_graph.insert_edge("funding", edge=relation)292 if "cph" in author["roles"]:293 # the organization is also a copyright holder, so we had a copyrights relation too294 relation = {}295 relation["claims"] = {}296 relation["claims"]['P8324'] = [ {"references": [ source_ref ] } ]297 relation["_from"] = organization["_id"]298 relation["_to"] = "software/" + software_key299 relation["_id"] = "copyrights/" + organization["_key"] + "_" + software_key300 stagingArea.staging_graph.insert_edge("copyrights", edge=relation)301 return False302 person = stagingArea.init_entity_from_template("person", source=source_ref)303 if person is None:304 raise("cannot init person entity from default template")305 if "full_name" in author:306 person["labels"] = author['full_name']307 elif 'given' in author and 'family' in author:308 if isinstance(author['given'], str):309 person["labels"] = author['given'] + " " + author['family']310 else:311 for giv in author['given']:312 person["labels"] += giv + " "313 person["labels"] += author['family']314 elif 'given' in author:315 person["labels"] = author['given']316 else:317 # there is no name part available318 return None319 if 'given' in author:320 # "given name" -> P735321 local_value = {}322 local_value["value"] = author['given']323 local_value["datatype"] = "string"324 local_value["references"] = []325 local_value["references"].append(source_ref)326 person["claims"]["P735"] = []327 person["claims"]["P735"].append(local_value)328 329 if 'family' in author:330 # "family name" -> P734331 local_value = {}332 local_value["value"] = author['family']333 local_value["datatype"] = "string"334 local_value["references"] = []335 local_value["references"].append(source_ref)336 person["claims"]["P734"] = []337 person["claims"]["P734"].append(local_value)338 if 'orcid' in author:339 # P496340 local_value = {}341 local_value["value"] = author['orcid']342 local_value["datatype"] = "external-id"343 local_value["references"] = []344 local_value["references"].append(source_ref)345 person["claims"]["P496"] = []346 person["claims"]["P496"].append(local_value)347 person["index_orcid"] = author['orcid']348 if 'email' in author:349 # P968350 local_value = {}351 local_value["value"] = author['email']352 local_value["datatype"] = "url"353 local_value["references"] = []354 local_value["references"].append(source_ref)355 person["claims"]["P968"] = []356 person["claims"]["P968"].append(local_value)357 # github identifier P2037358 # Google Scholar author ID P1960359 # if only full_name is available, we would need grobid to further parse the name360 # check orcid duplicate361 matched_person = None362 if 'orcid' in author:363 cursor = stagingArea.persons.find({'index_orcid': author['orcid']}, skip=0, limit=1)364 if cursor.count()>0:365 matched_person = cursor.next()366 if matched_person != None:367 person = stagingArea.aggregate_with_merge(matched_person, person)368 stagingArea.staging_graph.update_vertex(person)369 else:370 local_id = stagingArea.get_uid()371 person["_key"] = local_id372 person["_id"] = "persons/" + person["_key"]373 stagingArea.staging_graph.insert_vertex("persons", person)374 if 'roles' in author:375 for role in author["roles"]:376 # relation based on role, via the actor edge collection377 if not role in relator_code_cran:378 # try some cleaning379 role = role.replace(")", "")380 role = role.strip("\"")381 if not role in relator_code_cran:382 logging.warning("Error unknown role " + role + " defaulting to Contributor")383 role = "ctb"384 wikidata_property = relator_code_cran[role]["wikidata"]385 set_role(stagingArea, wikidata_property, person, software_key, relator_code_cran[role]["marc_term"].replace(" ", "_"), source_ref)386 else:387 # role is undefined, we default to contributor (maybe not be the best choice?)388 set_role(stagingArea, relator_code_cran['ctb']["wikidata"], person, software_key, "Contributor", source_ref)389 if maintainer is None:390 return True391 match = False392 if "full_name" in maintainer:393 if "full_name" in person:394 if maintainer["full_name"] == person["full_name"]:395 match = True396 elif "given" in person:397 if maintainer["full_name"].find(person["given"]) != -1:398 if "family" in person:399 if maintainer["full_name"].find(person["given"]) != -1:400 match = True401 else:402 match = True403 if match == True:404 # if email not present add it405 if "email" in maintainer and not "email" in person:406 person["email"] = maintainer["email"]407 if "full_name" in maintainer and not "full_name" in person:408 person["full_name"] = maintainer["full_name"]409 # add maintainer role, maintained by (P126)410 relation = {}411 relation["claims"] = {}412 relation["claims"]["P126"] = [ {"references": [ source_ref ] } ]413 relation["_from"] = "person/" + person["_key"]414 relation["_to"] = "software/" + software_key415 relation["_key"] = person["_key"] + "_" + software_key + "maintainer"416 stagingArea.staging_graph.insert_edge(stagingArea.actors, edge=relation)417 return True418 return False419def set_role(stagingArea, wikidata_property, person, software_key, role_term, source_ref):420 relation = {}421 relation["claims"] = {}422 relation["claims"][wikidata_property] = [ {"references": [ source_ref ] } ]423 relation["_from"] = person["_id"]424 relation["_to"] = "software/" + software_key425 relation["_id"] = "actors/" + person["_key"] + "_" + software_key + "_" + role_term426 # check if not already there (conservative check ;)427 if not stagingArea.staging_graph.has_edge(relation["_id"]):...
test_nonlocal_statement.py
Source:test_nonlocal_statement.py
1from unittest import TestCase2__all__ = ['NonlocalStatement']3GLOBAL_VALUE: int = 04class NonlocalStatement(TestCase):5 def setUp(self) -> None:6 # pylint: disable=global-statement7 global GLOBAL_VALUE8 GLOBAL_VALUE = 09 def test_access_nonlocal_without_nonlocal_statement(self) -> None:10 local_value: int = 011 def read() -> int:12 return local_value13 self.assertEqual(local_value, read(), 'We can access nonlocal variable without "nonlocal" statement usage. '14 'In this case "local_value" is a free variable')15 def test_redefine_free_variable(self) -> None:16 # pylint: disable=unused-variable17 free_value: int = 018 def read() -> int:19 # pylint: disable=used-before-assignment20 value: int = free_value # local variable 'free_value' referenced before assignment21 free_value: int = value22 return free_value23 with self.assertRaises(UnboundLocalError):24 read()25 def test_modify_nonlocal_without_nonlocal_statement(self) -> None:26 local_value: int = 027 def modify() -> None:28 # pylint: disable=unused-variable29 local_value = 4230 modify()31 self.assertEqual(local_value, 0, 'We cannot modify nonlocal variable without "nonlocal" statement usage.'32 'Shadowing local variable will be just declared.')33 def test_modify_nonlocal_with_nonlocal_statement(self) -> None:34 local_value: int = 035 # def modify(local_value) # Names listed in a nonlocal statement must not be defined as formal parameters36 def modify() -> None:37 # v = local_value # SyntaxError: name 'local_value' is used prior to nonlocal declaration38 # local_value = -1 # SyntaxError: name 'local_value' is assigned to before nonlocal declaration39 # nonlocal GLOBAL_VALUE # SyntaxError: no binding for nonlocal 'GLOBAL_VALUE' found40 nonlocal local_value41 local_value = 4242 modify()43 self.assertEqual(local_value, 42, 'We can modify nonlocal variable with "nonlocal" statement usage.')44 def test_modify_multiple_scopes_single_nonlocal(self) -> None:45 local_value: int = 046 def modify_lvl1() -> None:47 def modify_lvl2() -> None:48 nonlocal local_value49 local_value = 4250 modify_lvl2()51 modify_lvl1()52 self.assertEqual(local_value, 42, 'We can modify nonlocal variable with "nonlocal" statement usage '53 'even a few level above.')54 def test_modify_multiple_scopes_multiple_nonlocal(self) -> None:55 local_value: int = 056 def modify_lvl1() -> None:57 local_value: int = -158 def modify_lvl2():59 nonlocal local_value60 local_value = 4261 modify_lvl2()62 modify_lvl1()63 self.assertEqual(local_value, 0, 'We can modify nonlocal variable with "nonlocal" statement usage '64 'only from the nearest enclosing scope')65 def test_create_nonlocal_with_nonlocal_statement(self) -> None:66 # def create():67 # nonlocal local_value # SyntaxError: no binding for nonlocal 'local_value' found (the scope in which a68 # # new binding should be created69 # # cannot be determined unambiguously)70 # local_value = 4271 #72 # create()73 pass74 def test_read_free_variable_in_exec_without_context(self) -> None:75 # pylint: disable=unused-variable76 local_variable: int = 077 def read() -> None:78 # pylint: disable=exec-used79 exec("value = local_variable")80 with self.assertRaises(NameError):81 read()82 def test_read_free_variable_in_exec_with_context(self) -> None:83 local_variable: int = 084 def read() -> None:85 # pylint: disable=exec-used86 exec("value = local_variable", {'local_variable': local_variable})...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!