Best JavaScript code snippet using playwright-internal
Henan.py
Source:Henan.py
1import json2import scrapy3from Testchen.items import TestchenItem4import re5from scrapy import cmdline, selector6class JiangxiSpider(scrapy.Spider):7 name = 'Jiangxi'8 allowed_domains = ['hndzzbtb.hndrc.gov.cn']9 start_urls = ['http://hndzzbtb.hndrc.gov.cn/services/hl/getSelect?response=application/json&pageIndex=1&pageSize=22&day=&sheng=x1&qu=&xian=&title=×tart=&timeend=&categorynum=002001001&siteguid=3955b792-fb32-4dc1-8935-49ad516ae6db']10 def parse(self, response):11 # print(response.text)12 paydata = json.loads(response.text)13 data = paydata['return']14 data_json = json.loads(data)15 for i in data_json['Table']:16 # print('http://hndzzbtb.hndrc.gov.cn' + i['href'])17 sourceUrl ='http://hndzzbtb.hndrc.gov.cn' + i['href']18 yield scrapy.Request("http://hndzzbtb.hndrc.gov.cn" + i['href'],meta={'sourceUrl':sourceUrl}, callback=self.parse2)19 if 'num' in response.meta:20 x = int(response.meta['num'])21 else:22 x = 123 if x<=2:24 x = x + 125 print(x)26 url ="http://hndzzbtb.hndrc.gov.cn/services/hl/getSelect?response=application/json&pageIndex="+str(x)+"&pageSize=22&day=&sheng=x1&qu=&xian=&title=×tart=&timeend=&categorynum=002001001&siteguid=3955b792-fb32-4dc1-8935-49ad516ae6db"27 # print(url)28 yield scrapy.Request(url,meta={'num':str(x)},callback=self.parse)29 def parse2(self, response):30 item = TestchenItem()31 sourceUrl = response.request.meta['sourceUrl']32 print(sourceUrl)33 #item['sourceUrl'] = sourceUrl34 nums = 'æ '35 a = ''36 content = response.xpath('//div[@class="ewb-container ewb-mt24"]')37 for node in content:38 p = node.xpath('.//p')39 for d in p:40 text = d.xpath('.//text()').extract()41 text_str = "".join(text).replace("\n", "")42 a += text_str + '\n'43 a = a.replace("\xa0", "")44 a = a.replace(" ", "")45 a = a.replace(" ", "")46 a = a.replace("建设å°ç¹ï¼\n", "建设å°ç¹ï¼")47 # print(a)48 pNo = re.findall('项ç®ç¼å·ï¼(.*?)\n', a, re.S)49 if pNo:50 pNo = pNo[0]51 pNo = "".join(pNo.split())52 else:53 pNo = re.findall('å·¥ç¨ç¼å·ï¼(.*?)\n', a, re.S)54 if pNo:55 pNo = pNo[0]56 pNo = "".join(pNo.split())57 else:58 pNo = re.findall('ææ ç¼å·ï¼(.*?)\n|ææ ç¼å·ï¼[0-9A-Z]{7}-[0-9A-Z]{8}\n', a, re.S)59 if pNo:60 pNo = pNo[0]61 pNo = "".join(pNo.split())62 else:63 pNo = re.findall('éè´ç¼å·ï¼(.*?)\n', a, re.S)64 if pNo:65 pNo = pNo[0]66 pNo = "".join(pNo.split())67 else:68 pNo = str(nums)69 pName = re.findall('éè´é¡¹ç®å称ï¼ï¼(.*?)\n', a, re.S)70 if pName:71 pName = pName[0]72 pName = "".join(pName.split())73 else:74 pName = re.findall('项ç®å称ï¼(.*?)\n', a, re.S)75 if pName:76 pName = pName[0]77 pName = "".join(pName.split())78 else:79 pName = re.findall('å·¥ç¨å称ï¼(.*?)\n', a, re.S)80 if pName:81 pName = pName[0]82 pName = "".join(pName.split())83 else:84 pName = re.findall('项ç®(.*?)å·²', a, re.S)85 if pName:86 pName = pName[0]87 pName = "".join(pName.split())88 else:89 pName = str(nums)90 entName = re.findall('ææ 人ï¼(.*?)\n', a, re.S)91 if entName:92 entName = entName[0]93 entName = "".join(entName.split())94 else:95 entName = str(nums)96 pAddr = re.findall('建设å°ç¹ï¼(.*?)\n', a, re.S)97 if pAddr:98 pAddr = pAddr[0]99 pAddr = "".join(pAddr.split())100 else:101 pAddr = re.findall('å·¥ç¨å°ç¹ï¼(.*?)\n', a, re.S)102 if pAddr:103 pAddr = pAddr[0]104 pAddr = "".join(pAddr.split())105 else:106 pAddr = str(nums)107 pApprovalName = str(nums)108 pApproveOrg = str(nums)109 pSupervision = re.findall('è¡æ¿çç£é¨é¨ï¼(.*?)\n', a, re.S)110 if pSupervision:111 pSupervision = pSupervision[0]112 pSupervision = "".join(pSupervision.split())113 pSupervision = str(nums)114 pubTime = str(nums)115 pBudget = re.findall('æ»æèµï¼(.*?)\n', a, re.S)116 if pBudget:117 pBudget = pBudget[0]118 pBudget = "".join(pBudget.split())119 else:120 pBudget = re.findall('ä¼°ç®ä»·ï¼(.*?)\n', a, re.S)121 if pBudget:122 pBudget = pBudget[0]123 pBudget = "".join(pBudget.split())124 else:125 pBudget = re.findall('ææ æ§å¶ä»·ï¼(.*?)\n', a, re.S)126 if pBudget:127 pBudget = pBudget[0]128 pBudget = "".join(pBudget.split())129 else:130 pBudget = re.findall('é¢ç®éé¢ï¼(.*?)\n', a, re.S)131 if pBudget:132 pBudget = pBudget[0]133 pBudget = "".join(pBudget.split())134 else:135 pBudget = re.findall('èµéæ»é¢ï¼(.*?)\n', a, re.S)136 if pBudget:137 pBudget = pBudget[0]138 pBudget = "".join(pBudget.split())139 else:140 pBudget = re.findall('æèµéé¢ï¼(.*?)\n', a, re.S)141 if pBudget:142 pBudget = pBudget[0]143 pBudget = "".join(pBudget.split())144 else:145 pBudget = str(nums)146 linkman = re.findall('è系人ï¼[\u3400-\u9FFF]{3}|è系人ï¼[\u3400-\u9FFF]{2}', a, re.S)147 if linkman:148 linkman = linkman[0]149 linkman = "".join(linkman.split())150 else:151 linkman = re.findall('项ç®è´è´£äººï¼(.*?)\n', a, re.S)152 if linkman:153 linkman = linkman[0]154 linkman = "".join(linkman.split())155 else:156 linkman = re.findall('è系人ï¼(.*?)\n', a, re.S)157 if linkman:158 linkman = linkman[0]159 linkman = "".join(linkman.split())160 else:161 linkman = str(nums)162 tel = re.findall('çµè¯ï¼(.*?)\n', a, re.S)163 if tel:164 tel = tel[0]165 tel = "".join(tel.split())166 else:167 tel = re.findall('èç³»æ¹å¼ï¼(.*?)\n', a, re.S)168 if tel:169 tel = tel[0]170 tel = "".join(tel.split())171 else:172 tel = str(nums)173 Mobile = tel174 email = re.findall('çµåé®ä»¶ï¼(.*?)\n', a, re.S)175 if email:176 email = email[0]177 email = "".join(email.split())178 else:179 email = str(nums)180 fox = re.findall('ä¼ çï¼(.*?)\n', a, re.S)181 if fox:182 fox = fox[0]183 fox = "".join(fox.split())184 else:185 fox = str(nums)186 agentName = re.findall('ææ 代çæºæï¼(.*?)\n', a, re.S)187 if agentName:188 agentName = agentName[0]189 agentName = "".join(agentName.split())190 else:191 agentName = str(nums)192 agentAddr = re.findall('å°åï¼(.*?)\n', a, re.S)193 if agentAddr:194 agentAddr = agentAddr[1]195 agentAddr = "".join(agentAddr.split())196 else:197 agentAddr = re.findall('å°ç¹ï¼(.*?)\nè系人', a, re.S)198 if agentAddr:199 agentAddr = agentAddr[1]200 agentAddr = "".join(agentAddr.split())201 else:202 agentAddr = str(nums)203 agentLinkman = re.findall('è系人ï¼[\u3400-\u9FFF]{3}|è系人ï¼[\u3400-\u9FFF]{2}', a, re.S)204 if agentLinkman:205 agentLinkman = agentLinkman[1]206 agentLinkman = "".join(agentLinkman.split())207 else:208 agentLinkman = re.findall('项ç®è´è´£äººï¼(.*?)\n', a, re.S)209 if agentLinkman:210 agentLinkman = agentLinkman[1]211 agentLinkman = "".join(agentLinkman.split())212 else:213 agentLinkman = re.findall('è系人ï¼(.*?)\n', a, re.S)214 if agentLinkman:215 agentLinkman = agentLinkman[1]216 agentLinkman = "".join(agentLinkman.split())217 else:218 agentLinkman = str(nums)219 agentTel = re.findall('çµè¯ï¼(.*?)\n', a, re.S)220 if agentTel:221 agentTel = agentTel[1]222 agentTel = "".join(agentTel.split())223 else:224 agentTel = re.findall('èç³»æ¹å¼ï¼(.*?)\n', a, re.S)225 if agentTel:226 agentTel = agentTel[1]227 agentTel = "".join(agentTel.split())228 else:229 agentTel = str(nums)230 agentMobile = agentTel231 agentEmail = re.findall('çµåé®ä»¶ï¼(.*?)\n', a, re.S)232 if agentEmail:233 agentEmail = agentEmail[1]234 agentEmail = "".join(agentEmail.split())235 else:236 agentEmail = str(nums)237 agentFox = re.findall('ä¼ çï¼(.*?)\n', a, re.S)238 if agentFox:239 agentFox = agentFox[0]240 agentFox = "".join(agentFox.split())241 else:242 agentFox = str(nums)243 bidTime = re.findall('å¼æ æ¶é´(.*?)å', a, re.S)244 if bidTime:245 bidTime = bidTime[0]246 bidTime = "".join(bidTime.split())247 else:248 bidTime = re.findall('å¼æ æ¶é´ï¼(.*?)\n', a, re.S)249 if bidTime:250 bidTime = bidTime[0]251 bidTime = "".join(bidTime.split())252 else:253 bidTime = re.findall('å¼æ æ¶é´åå°ç¹\n1.æ¶é´ï¼(.*?)\n', a, re.S)254 if bidTime:255 bidTime = bidTime[0]256 bidTime = "".join(bidTime.split())257 else:258 bidTime = str(nums)259 bidAddr = re.findall('线ä¸å¼æ å°ç¹ï¼(.*?)ã', a, re.S)260 if bidAddr:261 bidAddr = bidAddr[0]262 bidAddr = "".join(bidAddr.split())263 else:264 bidAddr = re.findall('å¼æ å°ç¹ï¼(.*?)\n', a, re.S)265 if bidAddr:266 bidAddr = bidAddr[0]267 bidAddr = "".join(bidAddr.split())268 else:269 bidAddr = re.findall('åãå°ç¹(.*?)ã', a, re.S)270 if bidAddr:271 bidAddr = bidAddr[0]272 bidAddr = "".join(bidAddr.split())273 else:274 bidAddr = re.findall('ï¼å°ç¹(.*?)ã', a, re.S)275 if bidAddr:276 bidAddr = bidAddr[0]277 bidAddr = "".join(bidAddr.split())278 else:279 bidAddr = '线ä¸'280 getfileStartTime = re.findall('ææ æ件æ¶é´ï¼(.*?)\n', a, re.S)281 if getfileStartTime:282 getfileStartTime = getfileStartTime[0]283 getfileStartTime = "".join(getfileStartTime.split())284 else:285 getfileStartTime = re.findall('æ件åºå®æ¶é´ï¼(.*?)\n', a, re.S)286 if getfileStartTime:287 getfileStartTime = getfileStartTime[0]288 getfileStartTime = "".join(getfileStartTime.split())289 else:290 getfileStartTime = re.findall('å¡ææåå ææ è
ï¼è¯·äº(.*?)æ¥', a, re.S)291 if getfileStartTime:292 getfileStartTime = getfileStartTime[0]293 getfileStartTime = "".join(getfileStartTime.split())294 else:295 getfileStartTime = re.findall('è·åç£åæ件æ¶é´ï¼(.*?)\n', a, re.S)296 if getfileStartTime:297 getfileStartTime = getfileStartTime[0]298 getfileStartTime = "".join(getfileStartTime.split())299 else:300 getfileStartTime = re.findall('è·åææ æ件åæ¥åæ¶é´\n1.æ¶é´ï¼(.*?)\n', a, re.S)301 if getfileStartTime:302 getfileStartTime = getfileStartTime[0]303 getfileStartTime = "".join(getfileStartTime.split())304 else:305 getfileStartTime = re.findall('ææ æ件è·åæ¶é´ï¼(.*?)\n', a, re.S)306 if getfileStartTime:307 getfileStartTime = getfileStartTime[0]308 getfileStartTime = "".join(getfileStartTime.split())309 else:310 getfileStartTime = str(nums)311 getfileEndTime = re.findall('è·åæ¶é´(.*?)\n', a, re.S)312 if getfileEndTime:313 getfileEndTime = getfileEndTime[0]314 getfileEndTime = "".join(getfileEndTime.split())315 else:316 getfileEndTime = str(nums)317 getfileTimeDesc = re.findall('ææ æ件é交çæªæ¢æ¶é´ï¼ææ æªæ¢æ¶é´ï¼ä¸åï¼(.*?)ï¼', a, re.S)318 if getfileTimeDesc:319 getfileTimeDesc = getfileTimeDesc[0]320 getfileTimeDesc = "".join(getfileTimeDesc.split())321 else:322 getfileTimeDesc = re.findall('æ æ件é交çæªæ¢æ¶é´(å¼æ æ¶é´)ï¼(.*?)\n', a, re.S)323 if getfileTimeDesc:324 getfileTimeDesc = getfileTimeDesc[0]325 getfileTimeDesc = "".join(getfileTimeDesc.split())326 else:327 getfileTimeDesc = re.findall('é交æªæ¢æ¶é´ï¼(.*?)\n', a, re.S)328 if getfileTimeDesc:329 getfileTimeDesc = getfileTimeDesc[0]330 getfileTimeDesc = "".join(getfileTimeDesc.split())331 else:332 getfileTimeDesc = str(nums)333 files = str(nums)334 text = a335 print(pName) # 项ç®å称336 print(entName) # 项ç®æ³äºº337 print(pAddr) # 项ç®å°å338 print(pApprovalName) # 项ç®æ¹æå称339 print(pApproveOrg) # 审æ¹åä½340 print(pSupervision) # ç管é¨é¨341 print(pubTime) # 项ç®å»ºç«æ¶é´342 print(pNo) # 项ç®ç¼å·343 print(pBudget) # 项ç®é¢ç®344 print(linkman) # ææ 人345 print(tel) # èç³»çµè¯346 print(Mobile) # èç³»çµè¯347 print(email) # çµåé®ç®±348 print(fox) # ä¼ ç349 print(agentName) # 代çæºæå称350 print(agentAddr) # 代çæºæå°å351 print(agentLinkman) # 代çæºæè系人352 print(agentTel) # 代çæºæçµè¯353 print(agentMobile) # 代çæºæçµè¯354 print(agentEmail) # 代çæºæçµåé®ç®±355 print(agentFox) # 代çæºæä¼ ç356 print(files) # é件357 print(bidTime) # å¼æ æ¶é´358 print(bidAddr) # å¼æ å°ç¹359 print(getfileStartTime) # è·åæ¶é´360 #print(getfileEndTime) # è·åæ¶é´361 print(getfileTimeDesc) # æ¶é´è¯´æ362 #print(text) # ææ ææ¬363if __name__ == '__main__':...
hunan.py
Source:hunan.py
1import json2import scrapy3import re4from scrapy import cmdline, selector5from Testchen.items import TestchenItem6class HunanSpider(scrapy.Spider):7 name = 'hunan'8 start_urls = ['https://www.hnsggzy.com/queryContent_20-jygk.jspx?title=&origin=&inDates=&channelId=845&ext=%E6%8B%9B%E6%A0%87/%E8%B5%84%E5%AE%A1%E5%85%AC%E5%91%8A&beginTime=&endTime=']9 def parse(self, response, **kwargs):10 a = response.xpath('//div[@class="article-content"]/ul/li')11 for node in a:12 url = node.xpath('./div/a/@href').extract()13 for c in url:14 sourceUrl = c15 yield scrapy.Request(sourceUrl, meta={'sourceUrl':sourceUrl}, callback=self.parse2)16 if 'num' in response.meta:17 x = int(response.meta['num'])18 else:19 x = 2020 if x < 40:21 x = x + 122 url = "https://www.hnsggzy.com/queryContent_" + str(x) + "-jygk.jspx?title=&origin=&inDates=&channelId=845&ext=%E6%8B%9B%E6%A0%87/%E8%B5%84%E5%AE%A1%E5%85%AC%E5%91%8A&beginTime=&endTime="23 yield scrapy.Request(url, meta={'num': str(x)}, callback=self.parse)24 def parse2(self, response):25 item = TestchenItem()26 sourceUrl = response.request.meta['sourceUrl']27 a = ''28 nums = ''29 content = response.xpath('//div[@class="content-article"]')30 for node in content:31 p = node.xpath('./div//p')32 for d in p:33 text = d.xpath('.//text()').getall()34 text_str = "".join(text).replace("\n", "")35 a += text_str + '\n'36 a = a.replace("\xa0", "")37 a = a.replace(" ", "")38 a = a.replace(" ", "")39 a = a.replace("\u3000", "")40 a = a.replace("\r", "")41 a = a.replace("\t", "")42 pName = re.findall('å·¥ç¨å称ï¼(.*?)\s', a, re.S)43 if pName:44 pName = pName[0]45 pName = "".join(pName.split())46 else:47 pName = re.findall('项ç®å称ï¼(.*?)\s', a, re.S)48 if pName:49 pName = pName[0]50 pName = "".join(pName.split())51 else:52 pName = re.findall('项ç®å称:(.*?)\s', a, re.S)53 if pName:54 pName = pName[0]55 pName = "".join(pName.split())56 else:57 pName = re.findall('\ï¼å称ï¼(.*?)\s', a, re.S)58 if pName:59 pName = pName[0]60 pName = "".join(pName.split())61 else:62 pName = str(nums)63 if len(pName) > 50:64 pName = str(nums)65 else:66 pName = pName67 entName = re.findall('ææ 人ï¼(.*?)\s', a, re.S)68 if entName:69 entName = entName[0]70 entName = "".join(entName.split())71 else:72 entName = re.findall('éè´äººï¼(.*?)\s', a, re.S)73 if entName:74 entName = entName[0]75 entName = "".join(entName.split())76 else:77 entName = re.findall('ææ 人ï¼\s(.*?)\så°å', a, re.S)78 if entName:79 entName = entName[0]80 entName = "".join(entName.split())81 else:82 entName = str(nums)83 pAddr = re.findall('建设å°ç¹ï¼(.*?)\s', a, re.S)84 if pAddr:85 pAddr = pAddr[0]86 pAddr = "".join(pAddr.split())87 else:88 pAddr = str(nums)89 pNo = re.findall('ææ 代çç¼å·ï¼(.*?)\s', a, re.S)90 if pNo:91 pNo = pNo[0]92 pNo = "".join(pNo.split())93 else:94 pNo = re.findall('ææ ç¼å·ï¼(.*?)\s', a, re.S)95 if pNo:96 pNo = pNo[0]97 pNo = "".join(pNo.split())98 else:99 pNo = str(nums)100 pBudget = re.findall('ææ éé¢ä¸º(.*?)å
', a, re.S)101 if pBudget:102 pBudget = pBudget[0]103 pBudget = "".join(pBudget.split())104 pBudget = pBudget + 'å
'105 else:106 pBudget = re.findall('项ç®æ»æèµ(.*?)å
', a, re.S)107 if pBudget:108 pBudget = pBudget[0]109 pBudget = "".join(pBudget.split())110 pBudget = pBudget + 'å
'111 else:112 pBudget = str(nums)113 linkman = re.findall('è系人:(.*?)\s', a, re.S)114 if linkman:115 linkman = linkman[0]116 linkman = "".join(linkman.split())117 else:118 linkman = re.findall('è系人ï¼(.*?)\s', a, re.S)119 if linkman:120 linkman = linkman[0]121 linkman = "".join(linkman.split())122 else:123 linkman = re.findall('è系人ï¼\s(.*?)\sçµè¯', a, re.S)124 if linkman:125 linkman = linkman[0]126 linkman = "".join(linkman.split())127 else:128 linkman = str(nums)129 if len(linkman) > 8:130 linkman = str(nums)131 else:132 linkman = linkman133 if linkman:134 midtel = "(?s){}[\s]+çµè¯ï¼(.*?)\s".format(linkman)135 tel = re.findall(midtel, a, re.S)136 if tel:137 tel = tel[0]138 tel = "".join(tel.split())139 else:140 tel = str(nums)141 else:142 tel = str(nums)143 if tel == str(nums):144 tel = re.findall('çµè¯ï¼(.*?)\s', a, re.S)145 if tel:146 tel = tel[0]147 tel = "".join(tel.split())148 else:149 tel = re.findall('çµè¯ï¼\s(.*?)\sä¼ ç', a, re.S)150 if tel:151 tel = tel[0]152 tel = "".join(tel.split())153 else:154 tel = str(nums)155 else:156 tel = tel157 if len(tel) > 20:158 tel = str(nums)159 else:160 tel = tel161 agentName = re.findall('ææ 代çæºæï¼(.*?)\s', a, re.S)162 if agentName:163 agentName = agentName[0]164 agentName = "".join(agentName.split())165 else:166 agentName = re.findall('代çæºæï¼(.*?)\s', a, re.S)167 if agentName:168 agentName = agentName[0]169 agentName = "".join(agentName.split())170 else:171 agentName = re.findall('代çæºæï¼\s(.*?)\så°å', a, re.S)172 if agentName:173 agentName = agentName[0]174 agentName = "".join(agentName.split())175 else:176 agentName = str(nums)177 agentAddr = re.findall('å°åï¼(.*?)\s', a, re.S)178 if agentAddr:179 try:180 agentAddr = agentAddr[1]181 agentAddr = "".join(agentAddr.split())182 except Exception as e:183 print('æ ', e)184 agentAddr = re.findall('å°åï¼(.*?)\s', a, re.S)185 if agentAddr:186 agentAddr = agentAddr[0]187 agentAddr = "".join(agentAddr.split())188 else:189 agentAddr = str(nums)190 pass191 else:192 agentAddr = str(nums)193 if len(agentAddr) > 50:194 agentAddr = str(nums)195 else:196 agentAddr = agentAddr197 agentLinkman = re.findall('è系人ï¼(.*?)\s', a, re.S)198 if agentLinkman:199 try:200 agentLinkman = agentLinkman[1]201 agentLinkman = "".join(agentLinkman.split())202 except Exception as e:203 print('æ ', e)204 agentLinkman = re.findall('è系人:(.*?)\s', a, re.S)205 if agentLinkman:206 try:207 agentLinkman = agentLinkman[1]208 agentLinkman = "".join(agentLinkman.split())209 except Exception as e:210 print('æ ', e)211 agentLinkman = str(nums)212 pass213 else:214 agentLinkman = str(nums)215 pass216 else:217 agentLinkman = str(nums)218 if agentLinkman != str(nums):219 agentLinkman = agentLinkman220 else:221 agentLinkman = re.findall('è系人ï¼(.*?)\s', a, re.S)222 if agentLinkman:223 try:224 agentLinkman = agentLinkman[0]225 agentLinkman = "".join(agentLinkman.split())226 except Exception as e:227 print('æ ', e)228 agentLinkman = re.findall('è系人:(.*?)\s', a, re.S)229 if agentLinkman:230 try:231 agentLinkman = agentLinkman[0]232 agentLinkman = "".join(agentLinkman.split())233 except Exception as e:234 print('æ ', e)235 agentLinkman = str(nums)236 pass237 else:238 agentLinkman = str(nums)239 pass240 else:241 agentLinkman = str(nums)242 if len(agentLinkman) > 10:243 agentLinkman = str(nums)244 else:245 agentLinkman = agentLinkman246 if agentLinkman:247 midagentTel = "(?s){}[\s]+çµè¯ï¼(.*?)\s".format(agentLinkman)248 agentTel = re.findall(midagentTel, a, re.S)249 if agentTel:250 agentTel = agentTel[0]251 agentTel = "".join(agentTel.split())252 else:253 agentTel = re.findall('èç³»æ¹å¼ï¼(.*?)\s', a, re.S)254 if agentTel:255 try:256 agentTel = agentTel[1]257 agentTel = "".join(agentTel.split())258 except Exception as e:259 print('æ ', e)260 agentTel = re.findall('èç³»æ¹å¼ï¼(.*?)\s', a, re.S)261 if agentTel:262 agentTel = agentTel[0]263 agentTel = "".join(agentTel.split())264 else:265 agentTel = str(nums)266 pass267 else:268 agentTel = re.findall('çµè¯ï¼(.*?)\s', a, re.S)269 if agentTel:270 try:271 agentTel = agentTel[1]272 agentTel = "".join(agentTel.split())273 except Exception as e:274 print('æ ', e)275 agentTel = re.findall('çµè¯ï¼(.*?)\s', a, re.S)276 if agentTel:277 agentTel = agentTel[0]278 agentTel = "".join(agentTel.split())279 else:280 agentTel = str(nums)281 pass282 else:283 agentTel = str(nums)284 else:285 agentTel = str(nums)286 if len(agentTel) > 30:287 agentTel = str(nums)288 else:289 agentTel = agentTel290 agentEmail = re.findall('é®ç®±ï¼(.*?)\s', a, re.S)291 if agentEmail:292 agentEmail = agentEmail[0]293 agentEmail = "".join(agentEmail.split())294 else:295 agentEmail = str(nums)296 if len(agentEmail) > 30:297 agentEmail = str(nums)298 else:299 agentEmail = agentEmail300 agentFax = re.findall('ä¼ çï¼(.*?)\s', a, re.S)301 if agentFax:302 agentFax = agentFax[0]303 agentFax = "".join(agentFax.split())304 agentFax = agentFax.replace('ä¼ çï¼', '')305 else:306 agentFax = str(nums)307 if len(agentFax) > 30:308 agentFax = str(nums)309 else:310 agentFax = agentFax311 bidTime = re.findall('å¼æ æ¶é´(.*?)å', a, re.S)312 if bidTime:313 bidTime = bidTime[0]314 bidTime = "".join(bidTime.split())315 bidTime = bidTime.replace("ï¼", "")316 bidTime = bidTime + 'å'317 else:318 bidTime = re.findall('ï¼ä¸å\ï¼(.*?)å', a, re.S)319 if bidTime:320 bidTime = bidTime[0]321 bidTime = "".join(bidTime.split())322 bidTime = bidTime + 'å'323 else:324 bidTime = str(nums)325 if len(bidTime) > 50:326 bidTime = str(nums)327 else:328 bidTime = bidTime329 bidAddr = re.findall('å¼æ å°ç¹ï¼(.*?)ï¼', a, re.S)330 if bidAddr:331 bidAddr = bidAddr[0]332 bidAddr = "".join(bidAddr.split())333 else:334 bidAddr = re.findall('é交å°ç¹ä¸º(.*?)\s', a, re.S)335 if bidAddr:336 bidAddr = bidAddr[0]337 bidAddr = "".join(bidAddr.split())338 else:339 bidAddr = str(nums)340 if len(bidAddr) > 70:341 bidAddr = str(nums)342 else:343 bidAddr = bidAddr344 getfileStartTime = re.findall('请äº(.*?)æ¢', a, re.S)345 if getfileStartTime:346 getfileStartTime = getfileStartTime[0]347 getfileStartTime = "".join(getfileStartTime.split())348 else:349 getfileStartTime = re.findall('ææ 人ä»(.*?)æ¢', a, re.S)350 if getfileStartTime:351 getfileStartTime = getfileStartTime[0]352 getfileStartTime = "".join(getfileStartTime.split())353 else:354 getfileStartTime = re.findall('请ä»(.*?)\ï¼å京æ¶é´', a, re.S)355 if getfileStartTime:356 getfileStartTime = getfileStartTime[0]357 getfileStartTime = "".join(getfileStartTime.split())358 else:359 getfileStartTime = re.findall('请ä»(.*?)å', a, re.S)360 if getfileStartTime:361 getfileStartTime = getfileStartTime[0]362 getfileStartTime = "".join(getfileStartTime.split())363 getfileStartTime = getfileStartTime + 'å'364 else:365 getfileStartTime = str(nums)366 if len(getfileStartTime) > 50:367 getfileStartTime = str(nums)368 else:369 getfileStartTime = getfileStartTime370 getfileTimeDesc = re.findall('å¼æ æ¶é´(.*?)å', a, re.S)371 if getfileTimeDesc:372 getfileTimeDesc = getfileTimeDesc[0]373 getfileTimeDesc = "".join(getfileTimeDesc.split())374 getfileTimeDesc = getfileTimeDesc.replace("ï¼", "")375 getfileTimeDesc = getfileTimeDesc + 'å'376 else:377 getfileTimeDesc = re.findall('ï¼ä¸å\ï¼(.*?)å', a, re.S)378 if getfileTimeDesc:379 getfileTimeDesc = getfileTimeDesc[0]380 getfileTimeDesc = "".join(getfileTimeDesc.split())381 getfileTimeDesc = getfileTimeDesc + 'å'382 else:383 getfileTimeDesc = str(nums)384 if len(getfileTimeDesc) > 40:385 getfileTimeDesc = str(nums)386 else:387 getfileTimeDesc = getfileTimeDesc388 mobile = tel389 email = ''390 fax = ''391 agentMobile = agentTel392 text = a393 getfileEndTime = ''394 prov = ''395 city = ''396 district = ''397 spider = 'hunan'398 source = ' æ¹åçå
Œ
±èµæºäº¤ææå¡å¹³å°'399 pApprovalName = ''400 pApproveOrg = ''401 pSupervision = ''402 pubTime = ''403 files = ''404 item['entName'] = entName405 item['pNo'] = pNo406 item['pName'] = pName407 item['pAddr'] = pAddr408 item['pApprovalName'] = pApprovalName409 item['pApproveOrg'] = pApproveOrg410 item['pSupervision'] = pSupervision411 item['pubTime'] = pubTime412 item['pBudget'] = pBudget413 item['linkman'] = linkman414 item['tel'] = tel415 item['mobile'] = mobile416 item['email'] = email417 item['fax'] = fax418 item['bidTime'] = bidTime419 item['bidAddr'] = bidAddr420 item['agentName'] = agentName421 item['agentAddr'] = agentAddr422 item['agentLinkman'] = agentLinkman423 item['agentTel'] = agentTel424 item['agentMobile'] = agentMobile425 item['agentEmail'] = agentEmail426 item['agentFax'] = agentFax427 item['prov'] = prov428 item['city'] = city429 item['district'] = district430 item['spider'] = spider431 item['source'] = source432 item['sourceUrl'] = sourceUrl433 item['getfileStartTime'] = getfileStartTime434 item['getfileEndTime'] = getfileEndTime435 item['getfileTimeDesc'] = getfileTimeDesc436 item['text'] = text437 item['files'] = files438 # print(item)439 yield item440if __name__ == '__main__':...
beijing.py
Source:beijing.py
1import json2import scrapy3from Testchen.items import TestchenItem4import re5from scrapy import cmdline, selector6import time7class BeijingSpider(scrapy.Spider):8 name = 'beijing'9 start_urls = ['http://www.ccgp-beijing.gov.cn/xxgg/sjzfcggg/sjzbgg/index_1.html']10 def parse(self, response, **kwargs):11 # print(response.text)12 a = response.xpath('//ul[@class="xinxi_ul"]')13 for node in a:14 url = node.xpath('./li/a/@href').extract()15 for c in url:16 c = c.replace('./','/')17 # print(c)18 sourceUrl = 'http://www.ccgp-beijing.gov.cn/xxgg/sjzfcggg/sjzbgg' + c19 yield scrapy.Request(sourceUrl, meta={'sourceUrl': sourceUrl}, callback=self.parse2)20 if 'num' in response.meta:21 x = int(response.meta['num'])22 else:23 x = 124 if x < 143:25 x = x + 126 url ="http://www.ccgp-beijing.gov.cn/xxgg/sjzfcggg/sjzbgg/index_" + str(x) + ".html"27 yield scrapy.Request(url,meta={'num':str(x)},callback=self.parse)28 def parse2(self, response):29 item = TestchenItem()30 sourceUrl = response.request.meta['sourceUrl']31 a = ''32 nums = ''33 content = response.xpath('//div[@align="left"]')34 for node in content:35 p = node.xpath('.//p')36 for d in p:37 text = d.xpath('.//text()').getall()38 text_str = "".join(text).replace("\n", "")39 a += text_str + '\n'40 a = a.replace("\xa0", "")41 a = a.replace(" ", "")42 a = a.replace(" ", "")43 a = a.replace("\u3000", "")44 a = a.replace("\r", "")45 a = a.replace("\t", "")46 pName = re.findall('项ç®å称ï¼(.*?)\s', a, re.S)47 if pName:48 pName = pName[0]49 pName = "".join(pName.split())50 else:51 pName = str(nums)52 if len(pName) > 50:53 pName = str(nums)54 else:55 pName = pName56 entName = re.findall('éè´äººä¿¡æ¯\så称ï¼(.*?)\s', a, re.S)57 if entName:58 entName = entName[0]59 entName = "".join(entName.split())60 else:61 entName = re.findall('éè´äººå称ï¼(.*?)\s', a, re.S)62 if entName:63 entName = entName[0]64 entName = "".join(entName.split())65 else:66 entName = str(nums)67 pAddr = re.findall('项ç®å°ç¹ï¼(.*?)\s', a, re.S)68 if pAddr:69 pAddr = pAddr[0]70 pAddr = "".join(pAddr.split())71 else:72 midpAddr = "(?s){}[\s]+å°åï¼(.*?)\s".format(entName)73 pAddr = re.findall(midpAddr, a, re.S)74 if pAddr:75 pAddr = pAddr[0]76 pAddr = "".join(pAddr.split())77 else:78 pAddr = str(nums)79 pNo = re.findall('项ç®ç¼å·ï¼(.*?)\s', a, re.S)80 if pNo:81 pNo = pNo[0]82 pNo = "".join(pNo.split())83 else:84 pNo = re.findall('æ 段ç¼å·:(.*?)\s', a, re.S)85 if pNo:86 pNo = pNo[0]87 pNo = "".join(pNo.split())88 else:89 pNo = str(nums)90 pBudget = re.findall('é¢ç®éé¢ï¼(.*?)\s', a, re.S)91 if pBudget:92 pBudget = pBudget[0]93 pBudget = "".join(pBudget.split())94 else:95 pBudget = re.findall('èµéæ¥æºå项ç®æèµä¼°ç®é¢ï¼(.*?)\s', a, re.S)96 if pBudget:97 pBudget = pBudget[0]98 pBudget = "".join(pBudget.split())99 else:100 pBudget = str(nums)101 if len(pBudget)>20:102 pBudget = str(nums)103 else:104 pBudget = pBudget105 midlinkman = re.findall('èç³»æ¹å¼ï¼(.*?)\s', a, re.S)106 if midlinkman:107 midlinkman = midlinkman[0]108 midlinkman = "".join(midlinkman.split())109 linkman = re.findall('[\u4e00-\u9fa5]+', midlinkman, re.S)110 if linkman:111 linkman = linkman[0]112 linkman = "".join(linkman.split())113 else:114 linkman = str(nums)115 else:116 linkman = re.findall('è系人ï¼(.*?)\s', a, re.S)117 if linkman:118 linkman = linkman[0]119 linkman = "".join(linkman.split())120 else:121 linkman = str(nums)122 if len(linkman) > 8:123 linkman = str(nums)124 else:125 linkman = linkman126 if linkman:127 midtel = "(?s){}+,(.*?)\s".format(linkman)128 tel = re.findall(midtel, a, re.S)129 if tel:130 tel = tel[0]131 tel = "".join(tel.split())132 else:133 tel = str(nums)134 else:135 tel = re.findall('çµè¯ï¼(.*?)\s', a, re.S)136 if tel:137 tel = tel[0]138 tel = "".join(tel.split())139 else:140 tel = str(nums)141 if len(tel) > 20:142 tel = str(nums)143 else:144 tel = tel145 fax = re.findall('ä¼ çï¼(.*?)\s', a, re.S)146 if fax:147 fax = fax[0]148 fax = "".join(fax.split())149 else:150 fax = str(nums)151 if len(fax) > 20:152 fax = str(nums)153 else:154 fax = fax155 agentName = re.findall('ææ 代çæºæï¼(.*?)\s', a, re.S)156 if agentName:157 agentName = agentName[0]158 agentName = "".join(agentName.split())159 else:160 agentName = re.findall('éè´ä»£çæºæä¿¡æ¯\så称ï¼(.*?)\s', a, re.S)161 if agentName:162 agentName = agentName[0]163 agentName = "".join(agentName.split())164 else:165 agentName = str(nums)166 midagentAddr = "(?s){}[\s]+å°åï¼(.*?)\s".format(agentName)167 agentAddr = re.findall(midagentAddr, a, re.S)168 if agentAddr:169 agentAddr = agentAddr[0]170 agentAddr = "".join(agentAddr.split())171 else:172 agentAddr = re.findall('å°åï¼(.*?)\s', a, re.S)173 if agentAddr:174 try:175 agentAddr = agentAddr[1]176 agentAddr = "".join(agentAddr.split())177 except Exception as e:178 print('æ ', e)179 agentAddr = str(nums)180 else:181 agentAddr = str(nums)182 if len(agentAddr) > 50:183 agentAddr = str(nums)184 else:185 agentAddr = agentAddr186 agentLinkman = re.findall('项ç®è系人ï¼(.*?)\s', a, re.S)187 if agentLinkman:188 agentLinkman = agentLinkman[0]189 agentLinkman = "".join(agentLinkman.split())190 else:191 agentLinkman = re.findall('è系人ï¼(.*?)\s', a, re.S)192 if agentLinkman:193 try:194 agentLinkman = agentLinkman[1]195 agentLinkman = "".join(agentLinkman.split())196 except Exception as e:197 print('æ ', e)198 agentLinkman = str(nums)199 pass200 else:201 agentLinkman = str(nums)202 if len(agentLinkman) > 10:203 agentLinkman = str(nums)204 else:205 agentLinkman = agentLinkman206 midagentTel = re.findall('çµè¯ï¼(.*?)\s', a, re.S)207 if midagentTel:208 midagentTel = midagentTel[0]209 midagentTel = "".join(midagentTel.split())210 agentTel = re.findall(r'\d{4}-\d{7}|\d{3}-\d{8}|d{11}', a, re.S)211 if agentTel:212 agentTel = agentTel[0]213 agentTel = "".join(agentTel.split())214 else:215 agentTel = str(nums)216 else:217 agentTel = str(nums)218 if len(agentTel) > 30:219 agentTel = str(nums)220 else:221 agentTel = agentTel222 agentEmail = re.findall('é®ç®±ï¼(.*?)\s', a, re.S)223 if agentEmail:224 agentEmail = agentEmail[0]225 agentEmail = "".join(agentEmail.split())226 else:227 agentEmail = str(nums)228 if len(agentEmail) > 30:229 agentEmail = str(nums)230 else:231 agentEmail = agentEmail232 agentFax = re.findall('ä¼ çï¼(.*?)\s', a, re.S)233 if agentFax:234 try:235 agentFax = agentFax[1]236 agentFax = "".join(agentFax.split())237 except Exception as e:238 print('æ ', e)239 agentFax = str(nums)240 pass241 else:242 agentFax = str(nums)243 if len(agentFax) > 30:244 agentFax = str(nums)245 else:246 agentFax = agentFax247 bidTime = re.findall('å¼å¯\sæ¶é´ï¼(.*?)\s', a, re.S)248 if bidTime:249 bidTime = bidTime[0]250 bidTime = "".join(bidTime.split())251 else:252 bidTime = re.findall('å¼æ æ¶é´:(.*?)\s', a, re.S)253 if bidTime:254 bidTime = bidTime[0]255 bidTime = "".join(bidTime.split())256 else:257 bidTime = re.findall('æ交ææ æ件æªæ¢æ¶é´ãå¼æ æ¶é´åå°ç¹\s(.*?)\s', a, re.S)258 if bidTime:259 bidTime = bidTime[0]260 bidTime = "".join(bidTime.split())261 else:262 bidTime = str(nums)263 midbidAddr = "(?s){}[\s]+å°ç¹ï¼(.*?)\s".format(bidTime)264 bidAddr = re.findall(midbidAddr, a, re.S)265 if bidAddr:266 bidAddr = bidAddr[0]267 bidAddr = "".join(bidAddr.split())268 else:269 bidAddr = re.findall('å¼æ å°ç¹:(.*?)\s', a, re.S)270 if bidAddr:271 bidAddr = bidAddr[0]272 bidAddr = "".join(bidAddr.split())273 else:274 bidAddr = str(nums)275 if len(bidAddr) > 70:276 bidAddr = str(nums)277 else:278 bidAddr = bidAddr279 getfileStartTime = re.findall('è·åææ æ件\sæ¶é´ï¼(.*?)\s', a, re.S)280 if getfileStartTime:281 getfileStartTime = getfileStartTime[0]282 getfileStartTime = "".join(getfileStartTime.split())283 else:284 getfileStartTime = re.findall('è·åéè´æ件\sæ¶é´ï¼(.*?)\s', a, re.S)285 if getfileStartTime:286 getfileStartTime = getfileStartTime[0]287 getfileStartTime = "".join(getfileStartTime.split())288 else:289 getfileStartTime = re.findall('è·åç«äºæ§ç£åæ件çæ¶é´ï¼(.*?)\s', a, re.S)290 if getfileStartTime:291 getfileStartTime = getfileStartTime[0]292 getfileStartTime = "".join(getfileStartTime.split())293 else:294 getfileStartTime = str(nums)295 if len(getfileStartTime) > 80:296 getfileStartTime = str(nums)297 else:298 getfileStartTime = getfileStartTime299 getfileTimeDesc = re.findall('å
¬åæé\s(.*?)\s', a, re.S)300 if getfileTimeDesc:301 getfileTimeDesc = getfileTimeDesc[0]302 getfileTimeDesc = "".join(getfileTimeDesc.split())303 else:304 getfileTimeDesc = re.findall('é交ææ æ件çæªæ¢æ¶é´ä¸º(.*?)ã', a, re.S)305 if getfileTimeDesc:306 getfileTimeDesc = getfileTimeDesc[0]307 getfileTimeDesc = "".join(getfileTimeDesc.split())308 else:309 getfileTimeDesc = str(nums)310 if len(getfileTimeDesc) > 40:311 getfileTimeDesc = str(nums)312 else:313 getfileTimeDesc = getfileTimeDesc314 mobile = tel315 email = ''316 agentMobile = agentTel317 text = a318 getfileEndTime = ''319 prov = ''320 city = ''321 district = ''322 spider = 'beijing'323 source = 'å京æ¿åºéè´ç½'324 pApprovalName = ''325 pApproveOrg = ''326 pSupervision = ''327 pubTime = ''328 files = ''329 item['pNo'] = pNo330 item['pName'] = pName331 item['entName'] = entName332 item['pAddr'] = pAddr333 item['pApprovalName'] = pApprovalName334 item['pApproveOrg'] = pApproveOrg335 item['pSupervision'] = pSupervision336 item['pubTime'] = pubTime337 item['pBudget'] = pBudget338 item['linkman'] = linkman339 item['tel'] = tel340 item['mobile'] = mobile341 item['email'] = email342 item['fax'] = fax343 item['bidTime'] = bidTime344 item['bidAddr'] = bidAddr345 item['agentName'] = agentName346 item['agentAddr'] = agentAddr347 item['agentLinkman'] = agentLinkman348 item['agentTel'] = agentTel349 item['agentMobile'] = agentMobile350 item['agentEmail'] = agentEmail351 item['agentFax'] = agentFax352 item['prov'] = prov353 item['city'] = city354 item['district'] = district355 item['spider'] = spider356 item['source'] = source357 item['sourceUrl'] = sourceUrl358 item['getfileStartTime'] = getfileStartTime359 item['getfileEndTime'] = getfileEndTime360 item['getfileTimeDesc'] = getfileTimeDesc361 item['text'] = text362 item['files'] = files363 # print(item)364 yield item365if __name__ == '__main__':...
builder.js
Source:builder.js
2let _ = require('lodash');3let fs = require('fs');4let path = require('path');5module.exports = builder;6function getFile(locale, file) {7 if (locale === 'en-DK' || fs.existsSync(path.join(__dirname, `${file}.json`))) {8 return require(file);9 } else {10 console.log(`!Attention: Translation file ${file}.json not found. Falling back to en-DK (developers original text)`);11 return {};12 }13}14function builder(locale, folder, keepEmptyStrings) {15 folder = folder || 'translations';16 keepEmptyStrings = keepEmptyStrings || false;17 locale = locale || 'en';18 let translations = _.merge(19 {},20 getFile(locale, `./${folder}/${locale}/enums/mixs`),21 getFile(locale, `./${folder}/${locale}/enums/basisOfRecord`),22 getFile(locale, `./${folder}/${locale}/enums/cms`),23 getFile(locale, `./${folder}/${locale}/enums/collectionPreservationType`),24 getFile(locale, `./${folder}/${locale}/enums/collectionContentType`),25 getFile(locale, `./${folder}/${locale}/enums/collectionAccessionStatus`),26 getFile(locale, `./${folder}/${locale}/enums/continent`),27 getFile(locale, `./${folder}/${locale}/enums/country`),28 getFile(locale, `./${folder}/${locale}/enums/discipline`),29 getFile(locale, `./${folder}/${locale}/enums/downloadFormat`),30 getFile(locale, `./${folder}/${locale}/enums/dwcaExtension`),31 getFile(locale, `./${folder}/${locale}/enums/endpointType`),32 getFile(locale, `./${folder}/${locale}/enums/establishmentMeans`),33 getFile(locale, `./${folder}/${locale}/enums/identifierType`),34 getFile(locale, `./${folder}/${locale}/enums/installationType`),35 getFile(locale, `./${folder}/${locale}/enums/institutionType`),36 getFile(locale, `./${folder}/${locale}/enums/institutionGovernance`),37 getFile(locale, `./${folder}/${locale}/enums/integerEnums`),38 getFile(locale, `./${folder}/${locale}/enums/issueEnum`),39 getFile(locale, `./${folder}/${locale}/enums/issueHelp`),40 getFile(locale, `./${folder}/${locale}/enums/iucnRedListCategory`),41 getFile(locale, `./${folder}/${locale}/enums/threatStatus`),42 getFile(locale, `./${folder}/${locale}/enums/language`),43 getFile(locale, `./${folder}/${locale}/enums/license`),44 getFile(locale, `./${folder}/${locale}/enums/mediaType`),45 getFile(locale, `./${folder}/${locale}/enums/nameTypeEnum`),46 getFile(locale, `./${folder}/${locale}/enums/nameUsageOrigin`),47 getFile(locale, `./${folder}/${locale}/enums/occurrenceIssue`),48 getFile(locale, `./${folder}/${locale}/enums/originEnum`),49 getFile(locale, `./${folder}/${locale}/enums/projections`),50 getFile(locale, `./${folder}/${locale}/enums/region`),51 getFile(locale, `./${folder}/${locale}/enums/role`),52 getFile(locale, `./${folder}/${locale}/enums/taxonomicStatus`),53 getFile(locale, `./${folder}/${locale}/enums/participationStatus`),54 getFile(locale, `./${folder}/${locale}/enums/taxonRank`),55 getFile(locale, `./${folder}/${locale}/enums/typeStatus`),56 getFile(locale, `./${folder}/${locale}/enums/extension`),57 getFile(locale, `./${folder}/${locale}/enums/error`),58 getFile(locale, `./${folder}/${locale}/enums/datasetType`),59 getFile(locale, `./${folder}/${locale}/components/grscicoll`),60 getFile(locale, `./${folder}/${locale}/components/counts`),61 getFile(locale, `./${folder}/${locale}/components/feedback`),62 getFile(locale, `./${folder}/${locale}/components/filters`),63 getFile(locale, `./${folder}/${locale}/components/galleryBar`),64 getFile(locale, `./${folder}/${locale}/components/nameParser`),65 getFile(locale, `./${folder}/${locale}/components/pagination`),66 getFile(locale, `./${folder}/${locale}/components/footer`),67 getFile(locale, `./${folder}/${locale}/components/filterNames`),68 getFile(locale, `./${folder}/${locale}/components/ocurrenceFieldNames`),69 getFile(locale, `./${folder}/${locale}/components/occurrenceKey`),70 getFile(locale, `./${folder}/${locale}/components/occurrenceSearch`),71 getFile(locale, `./${folder}/${locale}/components/downloadReport`),72 getFile(locale, `./${folder}/${locale}/components/validation`),73 getFile(locale, `./${folder}/${locale}/components/healthDetails`),74 getFile(locale, `./${folder}/${locale}/components/healthSummary`),75 getFile(locale, `./${folder}/${locale}/components/installation`),76 getFile(locale, `./${folder}/${locale}/components/gbifNetwork`),77 getFile(locale, `./${folder}/${locale}/components/network`),78 getFile(locale, `./${folder}/${locale}/components/terms`),79 getFile(locale, `./${folder}/${locale}/components/participationStatus`),80 getFile(locale, `./${folder}/${locale}/components/participant`),81 getFile(locale, `./${folder}/${locale}/components/map`),82 getFile(locale, `./${folder}/${locale}/components/species`),83 getFile(locale, `./${folder}/${locale}/components/speciesSearch`),84 getFile(locale, `./${folder}/${locale}/components/homepage`),85 getFile(locale, `./${folder}/${locale}/components/profile`),86 getFile(locale, `./${folder}/${locale}/components/cms`),87 getFile(locale, `./${folder}/${locale}/components/downloads`),88 getFile(locale, `./${folder}/${locale}/components/directory`),89 getFile(locale, `./${folder}/${locale}/components/contactUs`),90 getFile(locale, `./${folder}/${locale}/components/metrics`),91 getFile(locale, `./${folder}/${locale}/components/resource`),92 getFile(locale, `./${folder}/${locale}/components/resourceSearch`),93 getFile(locale, `./${folder}/${locale}/components/dataset`),94 getFile(locale, `./${folder}/${locale}/components/datasetRegistry`),95 getFile(locale, `./${folder}/${locale}/components/tools`),96 getFile(locale, `./${folder}/${locale}/components/search`),97 getFile(locale, `./${folder}/${locale}/components/publisher`),98 getFile(locale, `./${folder}/${locale}/components/country`),99 getFile(locale, `./${folder}/${locale}/components/trends`),100 getFile(locale, `./${folder}/${locale}/components/eoi`),101 getFile(locale, `./${folder}/${locale}/components/intervals`),102 getFile(locale, `./${folder}/${locale}/components/phrases`),103 getFile(locale, `./${folder}/${locale}/components/downloadUsage`)104 );105 if (!keepEmptyStrings) {106 removeEmptyStrings(translations);107 }108 return translations;109}110function removeEmptyStrings(obj) {111 _.each(obj, (val, key) => {112 if (typeof val === 'string' && val === '') {113 delete obj[key];114 } else if (typeof (val) === 'object') {115 removeEmptyStrings(obj[key]);116 }117 });...
op-restricted-names.js
Source:op-restricted-names.js
...6 {fullPath:'/b', isDirectory:true},7 {fullPath:'/c', isDirectory:true}8 ],9 tests: [10 function(helper) { helper.getFile('/', '.', {create:true}, FileError.SECURITY_ERR); },11 function(helper) { helper.getFile('/', '..', {create:true}, FileError.SECURITY_ERR); },12 function(helper) { helper.getFile('/', 'con', {create:true}, 0); },13 function(helper) { helper.getFile('/', 'CON', {create:true}, 0); },14 function(helper) { helper.getFile('/', 'Con', {create:true}, 0); },15 function(helper) { helper.getFile('/', 'cOn.txt', {create:true}, 0); },16 function(helper) { helper.getFile('/', 'a/coN', {create:true}, 0); },17 function(helper) { helper.getFile('/', 'prn', {create:true}, 0); },18 function(helper) { helper.getFile('/', 'PRN', {create:true}, 0); },19 function(helper) { helper.getFile('/', 'Prn', {create:true}, 0); },20 function(helper) { helper.getFile('/', 'pRn.txt', {create:true}, 0); },21 function(helper) { helper.getFile('/', 'a/prN', {create:true}, 0); },22 function(helper) { helper.getFile('/', 'aux', {create:true}, 0); },23 function(helper) { helper.getFile('/', 'AUX', {create:true}, 0); },24 function(helper) { helper.getFile('/', 'Aux', {create:true}, 0); },25 function(helper) { helper.getFile('/', 'aUx.txt', {create:true}, 0); },26 function(helper) { helper.getFile('/', 'a/auX', {create:true}, 0); },27 function(helper) { helper.getFile('/', 'nul', {create:true}, 0); },28 function(helper) { helper.getFile('/', 'NUL', {create:true}, 0); },29 function(helper) { helper.getFile('/', 'Nul', {create:true}, 0); },30 function(helper) { helper.getFile('/', 'nUl.txt', {create:true}, 0); },31 function(helper) { helper.getFile('/', 'a/nuL', {create:true}, 0); },32 function(helper) { helper.getFile('/', 'com1', {create:true}, 0); },33 function(helper) { helper.getFile('/', 'COM2', {create:true}, 0); },34 function(helper) { helper.getFile('/', 'Com4', {create:true}, 0); },35 function(helper) { helper.getFile('/', 'cOM7.foo', {create:true}, 0); },36 function(helper) { helper.getFile('/', 'a/coM9', {create:true}, 0); },37 function(helper) { helper.getFile('/', 'lpt1', {create:true}, 0); },38 function(helper) { helper.getFile('/', 'LPT2', {create:true}, 0); },39 function(helper) { helper.getFile('/', 'Lpt4', {create:true}, 0); },40 function(helper) { helper.getFile('/', 'lPT7.foo', {create:true}, 0); },41 function(helper) { helper.getFile('/', 'a/lpT9', {create:true}, 0); },42 function(helper) { helper.getFile('/', 'foo ', {create:true}, 0); },43 function(helper) { helper.getFile('/', 'foo\n', {create:true}, 0); },44 function(helper) { helper.getFile('/', 'foo.', {create:true}, 0); },45 function(helper) { helper.copy('/a', '/', 'foo ', 0); },46 function(helper) { helper.copy('/a', '/', 'foo\t', 0); },47 function(helper) { helper.copy('/a', '/', 'foo..', 0); },48 function(helper) { helper.move('/a', '/', 'foo ', 0); },49 function(helper) { helper.move('/b', '/', 'foo\t\t', 0); },50 function(helper) { helper.move('/c', '/', 'foo.....', 0); },51 ],52 postcondition: [53 {fullPath:'/foo ', isDirectory:true},54 {fullPath:'/foo\t\t', isDirectory:true},55 {fullPath:'/foo.....', isDirectory:true}56 ],57 },58];
op-restricted-unicode.js
Source:op-restricted-unicode.js
2 {3 name: 'RestrictedUnicodeChars',4 precondition: [ ],5 tests: [6 function(helper) { helper.getFile('/', 'ab', {create:true}); },7 // Embedded NULs aren't allowed; anything else in the first 32 is fair game.8 function(helper) { helper.getFile('/', 'a\u0000b', {create:true}, FileError.INVALID_MODIFICATION_ERR); },9 function(helper) { helper.getFile('/', 'a\u0001b', {create:true}, 0); },10 function(helper) { helper.getFile('/', 'a\u0002b', {create:true}, 0); },11 function(helper) { helper.getFile('/', 'a\u0003b', {create:true}, 0); },12 function(helper) { helper.getFile('/', 'a\u0004b', {create:true}, 0); },13 function(helper) { helper.getFile('/', 'a\u0005b', {create:true}, 0); },14 function(helper) { helper.getFile('/', 'a\u0006b', {create:true}, 0); },15 function(helper) { helper.getFile('/', 'a\u0007b', {create:true}, 0); },16 function(helper) { helper.getFile('/', 'a\u0008b', {create:true}, 0); },17 function(helper) { helper.getFile('/', 'a\u0009b', {create:true}, 0); },18 function(helper) { helper.getFile('/', 'a\u000ab', {create:true}, 0); },19 function(helper) { helper.getFile('/', 'a\u000bb', {create:true}, 0); },20 function(helper) { helper.getFile('/', 'a\u000cb', {create:true}, 0); },21 function(helper) { helper.getFile('/', 'a\u000db', {create:true}, 0); },22 function(helper) { helper.getFile('/', 'a\u000eb', {create:true}, 0); },23 function(helper) { helper.getFile('/', 'a\u000fb', {create:true}, 0); },24 function(helper) { helper.getFile('/', 'a\u0010b', {create:true}, 0); },25 function(helper) { helper.getFile('/', 'a\u0011b', {create:true}, 0); },26 function(helper) { helper.getFile('/', 'a\u0012b', {create:true}, 0); },27 function(helper) { helper.getFile('/', 'a\u0013b', {create:true}, 0); },28 function(helper) { helper.getFile('/', 'a\u0014b', {create:true}, 0); },29 function(helper) { helper.getFile('/', 'a\u0015b', {create:true}, 0); },30 function(helper) { helper.getFile('/', 'a\u0016b', {create:true}, 0); },31 function(helper) { helper.getFile('/', 'a\u0017b', {create:true}, 0); },32 function(helper) { helper.getFile('/', 'a\u0018b', {create:true}, 0); },33 function(helper) { helper.getFile('/', 'a\u0019b', {create:true}, 0); },34 function(helper) { helper.getFile('/', 'a\u001ab', {create:true}, 0); },35 function(helper) { helper.getFile('/', 'a\u001bb', {create:true}, 0); },36 function(helper) { helper.getFile('/', 'a\u001cb', {create:true}, 0); },37 function(helper) { helper.getFile('/', 'a\u001db', {create:true}, 0); },38 function(helper) { helper.getFile('/', 'a\u001eb', {create:true}, 0); }39 ],40 postcondition: [41 {fullPath:'/ab'},42 ],43 },...
op-restricted-chars.js
Source:op-restricted-chars.js
...5 {fullPath:'/ab', isDirectory:true},6 ],7 tests: [8 // Test for difficult characters in 'path' parameters.9 function(helper) { helper.getFile('/', 'a<b', {create:true}, 0); },10 function(helper) { helper.getFile('/', 'a>b', {create:true}, 0); },11 function(helper) { helper.getFile('/', 'a:b', {create:true}, 0); },12 function(helper) { helper.getFile('/', 'a?b', {create:true}, 0); },13 function(helper) { helper.getFile('/', 'a*b', {create:true}, 0); },14 function(helper) { helper.getFile('/', 'a"b', {create:true}, 0); },15 function(helper) { helper.getFile('/', 'a|b', {create:true}, 0); },16 function(helper) { helper.getFile('/', '<ab', {create:true}, 0); },17 function(helper) { helper.getFile('/', ':ab', {create:true}, 0); },18 function(helper) { helper.getFile('/', '?ab', {create:true}, 0); },19 function(helper) { helper.getFile('/', '*ab', {create:true}, 0); },20 function(helper) { helper.getFile('/', '"ab', {create:true}, 0); },21 function(helper) { helper.getFile('/', '|ab', {create:true}, 0); },22 function(helper) { helper.getFile('/', 'ab<', {create:true}, 0); },23 function(helper) { helper.getFile('/', 'ab:', {create:true}, 0); },24 function(helper) { helper.getFile('/', 'ab?', {create:true}, 0); },25 function(helper) { helper.getFile('/', 'ab*', {create:true}, 0); },26 function(helper) { helper.getFile('/', 'ab"', {create:true}, 0); },27 function(helper) { helper.getFile('/', 'ab|', {create:true}, 0); },28 // Only '\\' is disallowed.29 function(helper) { helper.getFile('/', 'a\\b', {create:true}, FileError.INVALID_MODIFICATION_ERR); },30 // Test for difficult characters in 'name' parameters.31 function(helper) { helper.copy('/ab', '/', ' a<b', 0); },32 function(helper) { helper.copy('/ab', '/', ' a:b', 0); },33 function(helper) { helper.copy('/ab', '/', ' a?b', 0); },34 function(helper) { helper.copy('/ab', '/', ' a*b', 0); },35 function(helper) { helper.copy('/ab', '/', ' a"b', 0); },36 function(helper) { helper.copy('/ab', '/', ' a|b', 0); },37 // 'Name' parameter cannot contain '/' or '\\'.38 function(helper) { helper.copy('/ab', '/', 'a/b', FileError.INVALID_MODIFICATION_ERR); },39 function(helper) { helper.copy('/ab', '/', 'a\\b', FileError.INVALID_MODIFICATION_ERR); },40 ],41 postcondition: [42 {fullPath:'/ab', isDirectory:true},43 {fullPath:'a<b', isDirectory:false},...
Using AI Code Generation
1const fs = require('fs');2const path = require('path');3const { chromium } = require('playwright');4const { getFile } = require('playwright/lib/server/browserContext');5(async () => {6 const browser = await chromium.launch();7 const context = await browser.newContext();8 const page = await context.newPage();9 const data = await getFile(page, '/tmp/download.pdf');10 fs.writeFileSync(path.join(__dirname, '/tmp/download.pdf'), data);11 await browser.close();12})();
Using AI Code Generation
1const fs = require('fs');2const path = require('path');3const playwright = require('playwright');4const { getFile } = require('playwright/lib/server/browserContext');5const { chromium } = require('playwright');6(async () => {7 const browser = await chromium.launch({ headless: false });8 const page = await browser.newPage();9 await page.screenshot({ path: 'google.png' });10 const file = await getFile(page, 'google.png');11 const newfilePath = path.join(__dirname, 'google.png');12 fs.writeFileSync(newfilePath, file);13 await browser.close();14})();15{16 "scripts": {17 },18 "dependencies": {19 }20}
Using AI Code Generation
1const { getFile } = require('playwright/lib/utils/utils');2const path = require('path');3(async () => {4 const filePath = path.join(__dirname, 'test.txt');5 const file = await getFile(filePath);6 console.log(file);7})();8{ name: 'test.txt',9 buffer: <Buffer 48 65 6c 6c 6f 20 57 6f 72 6c 64 21> }
Using AI Code Generation
1const { getFile } = require('playwright/lib/server/chromium/crNetworkManager');2const fs = require('fs');3(async () => {4 fs.writeFileSync('example.html', file);5})();6const { chromium } = require('playwright');7(async () => {8 const browser = await chromium.launch();9 const page = await browser.newPage();10 const file = await page._delegate._browserContext._browser._downloadManager._downloads[0].path();11 console.log(file);12})();
Using AI Code Generation
1const { getFile } = require('playwright/lib/server/chromium/crNetworkManager');2const fs = require('fs');3(async () => {4 fs.writeFileSync('./image.png', file);5})();6{7 "scripts": {8 },9 "dependencies": {10 }11}
Using AI Code Generation
1const { getFile } = require('playwright/lib/server/supplements/recorder/recorderSupplement');2const file = await getFile('test.txt');3console.log(file);4const { writeFile } = require('playwright/lib/server/supplements/recorder/recorderSupplement');5await writeFile('test.txt', 'This is a test file');6const { deleteFile } = require('playwright/lib/server/supplements/recorder/recorderSupplement');7await deleteFile('test.txt');8import { getFile } from 'playwright/lib/server/supplements/recorder/recorderSupplement';9const file = await getFile('test.txt');10console.log(file);11import { writeFile } from 'playwright/lib/server/supplements/recorder/recorderSupplement';12await writeFile('test.txt', 'This is a test file');13import { deleteFile } from 'playwright/lib/server/supplements/recorder/recorderSupplement';14await deleteFile('test.txt');15Your name to display (optional):16Your name to display (optional):
Using AI Code Generation
1const { test, expect } = require('@playwright/test');2test('My test', async ({ page }) => {3 const file = await page._context._browserContext._browser._downloadManager._downloads[0]._downloadPath;4 const fileName = await page._context._browserContext._browser._downloadManager._downloads[0]._downloadPath.split("/").pop();5 const fileSize = await page._context._browserContext._browser._downloadManager._downloads[0]._downloadSize;6 console.log(file);7 console.log(fileName);8 console.log(fileSize);9});10Your name to display (optional):11Your name to display (optional):
LambdaTest’s Playwright tutorial will give you a broader idea about the Playwright automation framework, its unique features, and use cases with examples to exceed your understanding of Playwright testing. This tutorial will give A to Z guidance, from installing the Playwright framework to some best practices and advanced concepts.
Get 100 minutes of automation test minutes FREE!!