Best Python code snippet using fMBT_python
mets.py
Source:mets.py
1"""Code for parsing METS files."""2from __future__ import annotations3from collections import defaultdict4from dataclasses import dataclass5from typing import Dict, Iterable, List, Mapping, Optional6import requests7import shortuuid8from lxml import etree9#: Namespaces that are going to be used during XML parsing10NAMESPACES = {11 'mets': 'http://www.loc.gov/METS/',12 'mods': 'http://www.loc.gov/mods/v3',13 'dv': 'http://dfg-viewer.de/',14 'xlink': 'http://www.w3.org/1999/xlink'}15# Utility datatypes16@dataclass17class ImageInfo:18 """Metadata about an image."""19 id: str20 url: str21 mimetype: Optional[str] = None22 width: Optional[int] = None23 height: Optional[int] = None24@dataclass25class PhysicalItem:26 """A METS physical item (most often a page)."""27 ident: str28 label: str29 files: Iterable[ImageInfo]30 image_ident: Optional[str] = None31 @property32 def max_dimensions(self):33 """Maximum dimensions in pixels."""34 return max((f.width, f.height) for f in self.files)35 @property36 def min_dimensions(self):37 """Minimum dimensions in pixels."""38 return min((f.width, f.height) for f in self.files)39@dataclass40class TocEntry:41 """A table of contents entry."""42 children: List[TocEntry] # pylint:disable=undefined-variable43 physical_ids: List[str]44 logical_id: str45 label: str46 type: str47class MetsParseError(Exception):48 """An exception that occured while parsing a METS file."""49 def __init__(self, message: str, debug_info: dict = None) -> None:50 super().__init__(message)51 self.debug_info = debug_info52class MetsDocument:53 """A parsed METS document.54 Note that this is not a generic METS parser, but currently highly coupled55 to both the METS/MODS profile required by the German 'DFG Viewer' and the56 later use as IIIF metadata (which is **not** intended to be57 machine-readable)58 """59 _tree: etree.ElementTree60 _mods_root: etree.Element61 identifiers: Dict[str, str]62 primary_id: str63 physical_items: Dict[str, PhysicalItem]64 toc_entries: Iterable[TocEntry]65 metadata: dict66 files: Dict[str, ImageInfo]67 def __init__(self, mets_tree: etree.ElementTree, url: str = None,68 primary_id: str = None) -> None:69 """Parse a METS document.70 This will read the metadata, table of contents and general71 file informations. The width and heights of the images is72 not determined.73 """74 self.url = url75 self._tree = mets_tree76 self._mods_root = self._xpath("(.//mods:mods)[1]")[0]77 self.identifiers = {78 e.get('type'): e.text79 for e in self._xpath("./mods:identifier", self._mods_root)}80 recordid_elem = self._find(81 ".//mods:recordInfo/mods:recordIdentifier", self._mods_root)82 if recordid_elem is not None:83 key = recordid_elem.get('source')84 self.identifiers[key] = recordid_elem.text85 self.primary_id = primary_id or self._get_unique_identifier()86 self.metadata = self._read_metadata()87 self.files = self._read_files()88 if not self.files:89 raise MetsParseError(90 f"METS at {self.url} does not reference any JPEG images")91 self.physical_items = self._read_physical_items()92 self.toc_entries = self._read_toc_entries()93 def _xpath(self, xpath: str,94 elem: etree.Element = None) -> List[etree.Element]:95 elem = elem if elem is not None else self._tree96 return elem.xpath(xpath, namespaces=NAMESPACES)97 def _find(self, path: str, elem: etree.Element = None) -> etree.Element:98 elem = elem if elem is not None else self._tree99 return elem.find(path, namespaces=NAMESPACES)100 def _findall(self, path: str,101 elem: etree.Element = None) -> List[etree.Element]:102 elem = elem if elem is not None else self._tree103 return elem.findall(path, namespaces=NAMESPACES)104 def _findtext(self, path: str, elem: etree.Element = None) -> str:105 elem = elem if elem is not None else self._tree106 return elem.findtext(path, namespaces=NAMESPACES)107 def _parse_title(self, title_elem: etree.Element) -> str:108 title = self._findtext(".//mods:title", title_elem)109 nonsort = self._findtext(".//mods:nonSort", title_elem)110 if nonsort:111 title = nonsort + title112 subtitle = self._findtext(".//mods:subTitle", title_elem)113 if subtitle:114 title = f"{title}. {subtitle}"115 return title116 def _parse_name(self, name_elem: etree.Element) -> str:117 name = self._findtext("./mods:displayForm", name_elem)118 if not name:119 name = " ".join(120 e.text for e in self._findall("./mods:namePart", name_elem))121 return name122 def _read_persons(self) -> Mapping[str, List[str]]:123 persons: Mapping[str, List[str]] = defaultdict(list)124 name_elems = self._xpath("./mods:name", self._mods_root)125 for e in name_elems:126 name = self._parse_name(e)127 role = self._findtext("./mods:role/mods:roleTerm", e)128 if role == 'aut':129 persons['creator'].append(name)130 else:131 persons['other_persons'].append(name)132 return persons133 def _read_origin(self) -> Mapping[str, str]:134 info_elem = self._find("./mods:originInfo", self._mods_root)135 return {136 'publisher': self._findtext("./mods:publisher", info_elem),137 'pub_place': self._findtext("./mods:place/mods:placeTerm",138 info_elem),139 'pub_date': self._findtext("./mods:dateIssued", info_elem)}140 def _get_unique_identifier(self) -> str:141 identifier = ''142 for type_ in ('oai', 'urn'):143 # Identifiers that are intended to be globally unique144 if identifier:145 break146 identifier = self._findtext(f"./mods:identifier[@type='{type_}']",147 self._mods_root)148 if not identifier:149 # MODS recordIdentifier, usually available on ZVDD documents150 identifier = self._findtext(151 "./mods:recordInfo/mods:recordIdentifier",152 self._mods_root)153 if not identifier:154 # Random identifier155 identifier = shortuuid.uuid()156 return identifier157 def _read_titles(self) -> List[str]:158 title_elems = self._findall("./mods:titleInfo", self._mods_root)159 if not title_elems:160 # For items with no title of their own that are part of a larger161 # multi-volume work162 title_elems = [self._findall(163 ".//mods:relatedItem[@type='host']/mods:titleInfo")[0]]164 # TODO: Use information from table of contents to find out about165 # titles of multi-volume work166 titles = [self._parse_title(e) for e in title_elems]167 part_number = self._findtext(".//mods:part/mods:detail/mods:number")168 if part_number:169 titles = [f"{title} ({part_number})" for title in titles]170 return titles171 def _read_metadata(self) -> dict:172 metadata: dict = {}173 metadata.update(self._read_persons())174 metadata.update(self._read_origin())175 metadata['title'] = self._read_titles()176 owner_url = self._findtext(".//mets:rightsMD//dv:ownerSiteURL")177 owner = self._findtext(".//mets:rightsMD//dv:owner")178 if owner_url:179 metadata['attribution'] = (180 f"<a href='{owner_url}'>{owner or owner_url}</a>")181 elif owner:182 metadata['attribution'] = owner183 else:184 metadata['attribution'] = 'Unknown'185 metadata['logo'] = self._findtext(186 ".//mets:rightsMD//dv:ownerLogo")187 metadata['see_also'] = []188 if self.url:189 metadata['see_also'].append(190 [{'@id': self.url, 'format': 'text/xml',191 'profile': 'http://www.loc.gov/METS/'}])192 pdf_url = self._xpath(193 ".//mets:fileGrp[@USE='DOWNLOAD']/"194 "mets:file[@MIMETYPE='application/pdf']/"195 "mets:FLocat/@xlink:href")196 if pdf_url and len(pdf_url) == 1:197 metadata['see_also'].append(198 {'@id': pdf_url, 'format': 'application/pdf'})199 metadata['related'] = self._findtext(200 ".//mets:digiprovMD//dv:presentation")201 license_ = self._findtext(".//dv:rights/dv:license")202 if not license_:203 license_ = self._findtext(".//mods:accessCondition")204 if not license_:205 license_ = 'reserved'206 metadata['license'] = license_207 # TODO: mods:physicalDescription208 metadata['language'] = self._findtext(209 ".//mods:languageTerm[@type='text']")210 metadata['genre'] = self._findtext(".//mods:genre")211 metadata['description'] = self._findtext(".//mods:abstract") or ""212 # TODO: Add mods:notes to description213 return metadata214 def _read_files(self) -> Dict[str, ImageInfo]:215 img_specs = (self._get_image_specs(e)216 for e in self._findall(".//mets:file"))217 return {info.id: info for info in img_specs218 if info.url and info.url.startswith('http')219 and info.mimetype == 'image/jpeg'}220 def _read_physical_items(self) -> Dict[str, PhysicalItem]:221 """Create a map from physical IDs to (label, image_info) pairs."""222 if self.files is None:223 raise ValueError(224 "Can't read physical items before files have been read.")225 physical_items = {}226 pages = self._findall(227 ".//mets:structMap[@TYPE='PHYSICAL']"228 "/mets:div[@TYPE='physSequence']"229 "/mets:div[@TYPE='page']")230 for page_elem in sorted(pages, key=lambda e: int(e.get('ORDER'))):231 page_id = page_elem.get('ID')232 for label_attr in ('LABEL', 'ORDERLABEL', 'ORDER'):233 label = page_elem.get(label_attr)234 if label:235 break236 if not label:237 label = '?'238 files = [239 self.files[ptr.get('FILEID')]240 for ptr in self._findall("./mets:fptr", page_elem)241 if ptr.get('FILEID') in self.files]242 physical_items[page_id] = PhysicalItem(243 page_id, label, [f for f in files if f is not None])244 return physical_items245 def _parse_tocentry(self, toc_elem: etree.Element,246 lmap: Mapping[str, List[str]]) -> TocEntry:247 """Parse a toc entry subtree to a MetsTocEntry object."""248 log_id = toc_elem.get('ID')249 entry = TocEntry(250 children=[], physical_ids=lmap.get(log_id, []),251 type=toc_elem.get('TYPE'),252 logical_id=log_id, label=toc_elem.get('LABEL'))253 for e in self._findall("./mets:div", toc_elem):254 entry.children.append(self._parse_tocentry(e, lmap))255 return entry256 def _read_toc_entries(self) -> List[TocEntry]:257 """Create trees of TocEntries from the METS."""258 toc_entries = []259 lmap: Dict[str, List[str]] = {}260 mappings = [261 (e.get('{%s}from' % NAMESPACES['xlink']),262 e.get('{%s}to' % NAMESPACES['xlink']))263 for e in self._findall(".//mets:structLink//mets:smLink")]264 for logical_id, physical_id in mappings:265 if logical_id not in lmap:266 lmap[logical_id] = []267 lmap[logical_id].append(physical_id)268 for e in self._xpath(".//mets:structMap[@TYPE='LOGICAL']/mets:div"):269 toc_entries.append(self._parse_tocentry(e, lmap))270 return toc_entries271 def _get_image_specs(self, file_elem: etree.Element) -> ImageInfo:272 image_id = file_elem.get('ID')273 mimetype = file_elem.get('MIMETYPE').replace('jpg', 'jpeg')274 location = self._xpath(275 "./mets:FLocat[@LOCTYPE='URL']/@xlink:href", file_elem)276 return ImageInfo(image_id, location[0] if location else None, mimetype)277def get_basic_info(mets_url):278 from .iiif import make_label279 xml = requests.get(mets_url, allow_redirects=True).content280 tree = etree.fromstring(xml)281 doc = MetsDocument(tree, url=mets_url)282 thumb_urls = doc._xpath(283 ".//mets:file[@MIMETYPE='image/jpeg']/mets:FLocat/@xlink:href")284 if not thumb_urls:285 thumb_urls = doc._xpath(286 ".//mets:file[@MIMETYPE='image/jpg']/mets:FLocat/@xlink:href")287 return {288 'metsurl': mets_url,289 'label': make_label(doc.metadata),290 'thumbnail': thumb_urls[0] if thumb_urls else None,291 'attribution': {292 'logo': doc.metadata['logo'],293 'owner': doc.metadata['attribution']294 }...
SourceViewDialog.py
Source:SourceViewDialog.py
1__version__ = "$Id$"2import ViewDialog3import windowinterface, usercmd4from usercmd import *5import features6class SourceViewDialog(ViewDialog.ViewDialog):7 readonly = features.SOURCE_VIEW_EDIT not in features.feature_set8 def __init__(self):9 ViewDialog.ViewDialog.__init__(self, 'sourceview_')10 self.__textwindow = None11 self.__setCommonCommandList()12 self._findReplaceDlg = None13 self._findText = ''14 self._replaceText = None15 self._findOptions = (0,0)16 self.__selChanged = 117 self.__replacing = 018 def __setCommonCommandList(self):19 self._commonCommandList = [SELECTNODE_FROM_SOURCE(callback = (self.onRetrieveNode, ())),20 FIND(callback = (self.onFind, ())),21 FINDNEXT(callback = (self.onFindNext, ())),22 ]23 self._copyCommand = [COPY(callback = (self.onCopy, ()))]24 if self.readonly:25 self._undoCommand = []26 self._cutCommand = []27 self._pasteComamnd = []28 else:29 self._commonCommandList.append(REPLACE(callback = (self.onReplace, ())))30 self._undoCommand = [UNDO(callback = (self.onUndo, ()))]31 self._copyCommand.append(CUT(callback = (self.onCut, ())))32 self._pasteComamnd = [PASTE(callback = (self.onPaste, ()))]33 def destroy(self):34 self.__textwindow = None35 def show(self):36 self.load_geometry()37 if not self.__textwindow:38 import MenuTemplate39 self.window = self.__textwindow = \40 self.toplevel.window.textwindow("", readonly=self.readonly, xywh=self.last_geometry, title='Source')41 self.__textwindow.set_mother(self)42 self.setpopup(MenuTemplate.POPUP_SOURCEVIEW)43 else:44 # Pop it up45 self.pop()46 self.__updateCommandList()47 self.__textwindow.setListener(self)48 def get_geometry(self):49 if self.__textwindow:50 self.last_geometry = self.__textwindow.getgeometry(windowinterface.UNIT_PXL)51 return self.last_geometry52 def pop(self):53 if self.__textwindow != None:54 self.__textwindow.pop()55 def __updateCommandList(self):56 commandList = []57 # undo58 if not self.readonly and self.__textwindow.canUndo():59 commandList.append(UNDO(callback = (self.onUndo, ())))60 # copy/paste/cut61 if self.__textwindow.isSelected():62 commandList = commandList + self._copyCommand63 if not self.readonly and not self.__textwindow.isClipboardEmpty():64 commandList = commandList + self._pasteComamnd65 # other operations all the time actived66 commandList = commandList+self._commonCommandList67 self.setcommandlist(commandList)68 def is_showing(self):69 if self.__textwindow:70 return 171 else:72 return 073 def hide(self):74 self.save_geometry()75 if self.__textwindow:76 self.__textwindow.removeListener()77 self.__textwindow.close()78 self.__textwindow = None79 if self._findReplaceDlg:80 self._findReplaceDlg.hide()81 self._findReplaceDlg = None82 def setcommandlist(self, commandlist):83 if self.__textwindow:84 self.__textwindow.set_commandlist(commandlist)85 def setpopup(self, template):86 if self.__textwindow:87 self.__textwindow.setpopupmenu(template)88 def get_text(self):89 if self.__textwindow:90 return self.__textwindow.gettext()91 else:92 print "ERROR (get_text): No text window."93 def set_text(self, text, colors=[]):94 if self.__textwindow:95 return self.__textwindow.settext(text, colors)96 else:97 print "ERROR (set text): No text window"98 def is_changed(self):99 if self.readonly:100 return 0101 if self.__textwindow:102 return self.__textwindow.isChanged()103 def select_chars(self, s, e):104 if self.__textwindow:105 self.__textwindow.select_chars(s,e)106 def select_lines(self, sLine, eLine):107 if self.__textwindow:108 self.__textwindow.select_line(sLine)109 # return the current line pointed by the carret110 def getCurrentCharIndex(self):111 if self.__textwindow:112 return self.__textwindow.getCurrentCharIndex()113 return -1114 # return the line number115 def getLineNumber(self):116 if self.__textwindow:117 return self.__textwindow.getLineNumber()118 return 0119 #120 # text window listener interface121 #122 # this call back is called when the selection change123 def onSelChanged(self):124 self.__updateCommandList()125 self.__selChanged = 1126 if not self.__replacing and self._findReplaceDlg != None:127 self._findReplaceDlg.enableReplace(0)128 # this call back is called when the content of the clipboard change (or may have changed)129 def onClipboardChanged(self):130 self.__updateCommandList()131 #132 # command listener interface133 #134 # note: these copy, paste and cut operation don't use the GRiNS clipboard135 def onCopy(self):136 self.__textwindow.Copy()137 def onCut(self):138 if self.readonly:139 self.__textwindow.Copy()140 else:141 self.__textwindow.Cut()142 def onPaste(self):143 if not self.readonly:144 self.__textwindow.Paste()145 def onUndo(self):146 if not self.readonly:147 self.__textwindow.Undo()148 #149 # find/replace support150 #151 def onFind(self):152 # if another dlg was showed, we hide it153 if self._findReplaceDlg != None:154 self._findReplaceDlg.hide()155 self._findReplaceDlg = None156 import win32dialog157 self._findReplaceDlg = win32dialog.FindDialog(self.doFindNext, self._findText, self._findOptions, self.window)158 self._findReplaceDlg.show()159 def onFindNext(self):160 if self._findText != None:161 self.doFindNext(self._findText, self._findOptions)162 def onReplace(self):163 if self.readonly:164 return165 # if another dlg was shown, we hide it166 if self._findReplaceDlg != None:167 self._findReplaceDlg.hide()168 self._findReplaceDlg = None169 import win32dialog170 self._findReplaceDlg = win32dialog.ReplaceDialog(self.doFindNext, self.doReplace, self.doReplaceAll, \171 self._findText, self._replaceText, self._findOptions, self.window)172 self._findReplaceDlg.show()173 if not self.__selChanged:174 self._findReplaceDlg.enableReplace(1)175 def doFindNext(self, text, options):176 if not self.__textwindow:177 return178 # save the text and options for the next time179 self._findText = text180 self._findOptions = options181 found = 0182 # first search from the current position183 begin = self.getCurrentCharIndex()184 if self.__textwindow.findNext(begin, text, options) == None:185 # not found, search from the begining186 ret = self.__textwindow.findNext(0, text, options)187 if ret == None:188 windowinterface.showmessage("No more occurrences.", mtype = 'error')189 else:190 found = 1191 else:192 found = 1193 if self._findReplaceDlg != None:194 self._findReplaceDlg.enableReplace(found)195 # raz the flag whitch allows to know if a replace if directly possible.196 self.__selChanged = 0197 def doReplace(self, text, options, replaceText):198 if self.readonly or not self.__textwindow:199 return200 # save the text and options for the next time201 self._findOptions = options202 if self._findText != text:203 # the findwhat value has changed since the last find204 # in this special case, we jump to the next value without automatic replace205 self._findText = text206 self.onFindNext()207 return208 self.__replacing = 1209 # save the text and options for the next time210 self._findText = text211 # save the text for the next time212 self._replaceText = replaceText213 if self.__textwindow:214 self.__textwindow.replaceSel(replaceText)215 # seek on the next occurrence found216 self.onFindNext()217 self.__replacing = 0218 def doReplaceAll(self, text, options, replaceText):219 if self.readonly or not self.__textwindow:220 return221 self.__replacing = 1222 # save the text and options for the next time223 self._findText = text224 self._findOptions = options225 begin = 0226 nocc = 0227 lastPos = None228 pos = self.__textwindow.findNext(begin, self._findText, self._findOptions)229 while pos != None and lastPos != pos:230 lastPos = pos231 self.__textwindow.replaceSel(replaceText)232 begin = self.getCurrentCharIndex()233 nocc = nocc + 1234 pos = self.__textwindow.findNext(begin, self._findText, self._findOptions)235 self.__replacing = 0...
ResultsReader.py
Source:ResultsReader.py
...16 for s in map(_ns, path):17 xpath += s + '/'18 xpath = xpath.rstrip('/')19 return xpath20def _findText(root, xpath) -> str:21 try:22 return root.find(xpath).text23 except AttributeError:24 return None25def _findNodes(root, xpath):26 try:27 return root.findall(xpath)28 except AttributeError:29 return None30class ResultListReader:31 def __init__(self, file:SpooledTemporaryFile):32 self.file = file33 with self.file as f:34 tree = parse(f)35 root = tree.getroot()36 if root.tag != _ns('ResultList'):37 raise TypeError(f'{root.tag} is not an ResultList')38 self.root = root39 self.race_name = _findText(self.root, _nspath(["Event", "Name"]))40 self.class_results = _findNodes(self.root, _nspath(["ClassResult"]))41class ClassResultReader:42 def __init__(self, el):43 if el.tag != _ns('ClassResult'):44 raise TypeError(f'{el.tag} is not a ClassResult')45 self.root = el46 self.class_name = _findText(self.root, _nspath(["Class", "Name"]))47 self.class_name_short = _findText(self.root, _nspath(["Class", "ShortName"]))48 self.course_name = _findText(self.root, _nspath(["Course", "Name"]))49 self.course_id = _findText(self.root, _nspath(["Course", "Id"]))50 self.course_length = _findText(self.root, _nspath(["Course", "Length"]))51 self.course_climb = _findText(self.root, _nspath(["Course", "Climb"]))52 self.course_controls = _findText(self.root, _nspath(["Course", "NumberOfControls"]))53 self.result_nodes = _findNodes(self.root, _nspath(["PersonResult"]))54class PersonResultReader:55 def __init__(self, el):56 if el.tag != _ns('PersonResult'):57 raise TypeError(f'{el.tag} is not a PersonResult')58 self.root = el59 self.first_name = _findText(self.root, _nspath(["Person", "Name", "Given"]))60 self.last_name = _findText(self.root, _nspath(["Person", "Name", "Family"]))61 self.bib = _findText(self.root, _nspath(["Result", "BibNumber"]))62 self.start_time = _findText(self.root, _nspath(["Result", "StartTime"]))63 if self.start_time is not None:64 try:65 self.start_time = datetime.strptime(self.start_time, TIMEFORMAT)66 except ValueError:67 self.start_time = datetime.strptime(self.start_time[:-6], TIMEFORMAT)68 self.finish_time = _findText(self.root, _nspath(["Result", "FinishTime"]))69 if self.finish_time is not None:70 try:71 self.finish_time = datetime.strptime(self.finish_time, TIMEFORMAT)72 except ValueError:73 self.finish_time = datetime.strptime(self.finish_time[:-6], TIMEFORMAT)74 self.time = _findText(self.root, _nspath(["Result", "Time"]))75 self.status = _findText(self.root, _nspath(["Result", "Status"]))76 self.control_card = _findText(self.root, _nspath(["Result", "ControlCard"]))77 self.club_name = _findText(self.root, _nspath(["Organisation", "Name"]))78 if self.club_name == 'None':79 self.club_name = None80 self.club_name_short = _findText(self.root, _nspath(["Organisation", "ShortName"]))81 if self.club_name_short == 'None':...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!