Best Python code snippet using elementium_python
test_find_link.py
Source:test_find_link.py
1# coding=utf-82import os3import re4import json5import unittest6import responses7import find_link8import urllib.parse9from unittest.mock import patch10def wiki_url(params):11 default = {12 'action': 'query',13 'formatversion': 2,14 'format': 'json',15 }16 base = 'https://en.wikipedia.org/w/api.php?'17 url = base + urllib.parse.urlencode({**default, **params})18 print(url)19 return url20def one_page(page):21 return json.dumps({'query': {'pages': [page]}})22def json_query(query):23 return json.dumps({'query': query})24class TestFindLink(unittest.TestCase):25 @responses.activate26 def test_get_case_from_content(self):27 title = 'London congestion charge'28 url = wiki_url({29 'prop': 'revisions|info',30 'rvprop': 'content|timestamp',31 'titles': title,32 })33 body = one_page({34 "revisions": [{35 "timestamp": "2015-08-07T15:37:03Z",36 "content": ("The '''London congestion charge''' is a fee "37 "charged on most motor vehicles operating within "38 "the Congestion Charge Zone (CCZ)")39 }]40 })41 responses.add(responses.GET, url, body=body, match_querystring=True)42 self.assertEqual(find_link.core.get_case_from_content(title), title)43 article = 'MyCar is exempt from the London Congestion Charge, road tax and parking charges.'44 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:45 (c, r) = func('London congestion charge', article)46 self.assertEqual(r, 'London congestion charge')47 @responses.activate48 def test_get_wiki_info(self):49 body = one_page({50 'pageid': 312605,51 'ns': 0,52 'title': 'Government budget deficit',53 'touched': '2011-11-24T22:06:21Z',54 'lastrevid': 462258859,55 'counter': '',56 'length': 1407157 })58 url = wiki_url({59 'redirects': '',60 'titles': 'government budget deficit',61 'prop': 'info',62 })63 responses.add(responses.GET, url, body=body, match_querystring=True)64 redirect = find_link.api.get_wiki_info('government budget deficit')65 self.assertIsNone(redirect)66 body = one_page({67 'ns': 0,68 'title': 'Government budget deficits',69 'missing': True70 })71 url = wiki_url({72 'redirects': '',73 'titles': 'government budget deficits',74 'prop': 'info',75 })76 responses.add(responses.GET, url, body=body, match_querystring=True)77 self.assertRaises(find_link.api.Missing,78 find_link.api.get_wiki_info,79 'government budget deficits')80 @responses.activate81 def test_cat_start(self):82 body = json.dumps({"query": {"allpages": []}})83 url = 'https://en.wikipedia.org/w/api.php'84 responses.add(responses.GET, url, body=body)85 self.assertEqual(find_link.api.cat_start('test123'), [])86 @responses.activate87 def test_all_pages(self):88 title = 'Government budget deficit'89 body = json_query({90 "allpages": [{"pageid": 312605, "ns": 0, "title": title}]91 })92 url = wiki_url({'apfilterredir': 'nonredirects',93 'apprefix': title,94 'list': 'allpages',95 'apnamespace': 0,96 'aplimit': 500})97 responses.add(responses.GET, url, body=body, match_querystring=True)98 result = find_link.api.all_pages(title)99 self.assertListEqual(result, [])100 @responses.activate101 def test_categorymembers(self):102 body = json_query({"categorymembers": []})103 url = wiki_url({104 'cmnamespace': 0,105 'list':106 'categorymembers',107 'cmlimit': 500,108 'cmtitle': 'Test123'109 })110 responses.add(responses.GET, url, body=body, match_querystring=True)111 self.assertListEqual(find_link.core.categorymembers('test123'), [])112 @responses.activate113 def test_is_redirect_to(self):114 title_from = 'Bread maker'115 title_to = 'Bread machine'116 body = one_page({117 'ns': 0,118 'title': 'Bread maker',119 'touched': '2015-06-21T15:12:00Z',120 'length': 27,121 'redirect': True122 })123 url = wiki_url({'titles': 'Bread maker', 'prop': 'info'})124 responses.add(responses.GET, url, body=body, match_querystring=True)125 url = wiki_url({'titles': 'Bread maker', 'prop': 'revisions', 'rvprop': 'content'})126 body = one_page({127 'ns': 0,128 'title': 'Bread maker',129 'revisions': [{130 'contentformat': 'text/x-wiki',131 'contentmodel': 'wikitext',132 'content': '#REDIRECT [[Bread machine]]'133 }]134 })135 responses.add(responses.GET, url, body=body, match_querystring=True)136 self.assertTrue(find_link.core.is_redirect_to(title_from, title_to))137 title_from = 'Sugarlump'138 title_to = 'Sugar'139 url = wiki_url({'prop': 'info', 'titles': 'Sugarlump'})140 body = one_page({141 'ns': 0,142 'title': 'Sugarlump',143 'missing': True,144 'contentmodel': 'wikitext',145 })146 responses.add(responses.GET, url, body=body, match_querystring=True)147 self.assertFalse(find_link.core.is_redirect_to(title_from, title_to))148 @responses.activate149 def test_wiki_redirects(self):150 url = wiki_url({151 'blfilterredir': 'redirects',152 'bllimit': 500,153 'bltitle': 'market town',154 'list': 'backlinks',155 'blnamespace': 0,156 })157 body = json_query({158 'backlinks': [159 {'title': 'Market-town', 'redirect': ''},160 {'title': 'Market towns', 'redirect': ''},161 {'title': 'Marktgemeinde', 'redirect': ''},162 {'title': 'Market right', 'redirect': ''},163 {'title': 'Market towns in England', 'redirect': ''},164 {'title': 'Market rights', 'redirect': ''},165 {'title': 'Market charter', 'redirect': ''},166 {'title': 'Market town privileges', 'redirect': ''}167 ]168 })169 responses.add(responses.GET, url, body=body, match_querystring=True)170 result = find_link.api.wiki_redirects('market town')171 self.assertTrue(all(isinstance(title, str) for title in result))172 def test_en_dash(self):173 title = u'obsessive\u2013compulsive disorder'174 content = 'This is a obsessive-compulsive disorder test'175 (c, r) = find_link.match.find_link_in_content(title, content)176 self.assertEqual(r, title)177 self.assertEqual(c, u'This is a [[obsessive\u2013compulsive disorder]] test')178 (c, r) = find_link.match.find_link_in_content(title, content)179 self.assertEqual(r, title)180 self.assertEqual(c, u'This is a [[obsessive\u2013compulsive disorder]] test')181 content = 'This is a [[obsessive-compulsive]] disorder test'182 (c, r) = find_link.match.find_link_in_content(title, content)183 self.assertEqual(r, title)184 self.assertEqual(c, u'This is a [[obsessive\u2013compulsive disorder]] test')185 (c, r) = find_link.match.find_link_in_content(title, content)186 self.assertEqual(r, title)187 self.assertEqual(c, u'This is a [[obsessive\u2013compulsive disorder]] test')188 @responses.activate189 def test_wiki_search(self):190 url = wiki_url({191 'list': 'search',192 'srlimit': 50,193 'srsearch': '"hedge"',194 'continue': '',195 'srwhat': 'text'196 })197 body = json_query({198 "searchinfo": {"totalhits": 444},199 "search": [{200 "ns": 0,201 "title": "Coaching inn",202 "snippet": "approximately the mid-17th century for a period of about 200 years, the <span class=\"searchmatch\">coaching</span> <span class=\"searchmatch\">inn</span>, sometimes called a coaching house or staging inn, was a vital part of",203 "size": 4918,204 "wordcount": 561,205 "timestamp": "2015-08-04T13:20:24Z"206 }, {207 "ns": 0,208 "title": "Varbuse",209 "snippet": "Estonian Road Museum is located in the former Varbuse <span class=\"searchmatch\">coaching</span> <span class=\"searchmatch\">inn</span>. Varbuse <span class=\"searchmatch\">coaching</span> <span class=\"searchmatch\">inn</span> Estonian Road Museum "Population by place",210 "size": 2350,211 "wordcount": 96,212 "timestamp":"2015-01-02T23:23:10Z"213 }]214 })215 responses.add(responses.GET, url, body=body, match_querystring=True)216 url = wiki_url({217 'continue': '',218 'srsearch': '"coaching inn"',219 'list': 'search',220 'srlimit': 50,221 'srwhat': 'text',222 })223 responses.add(responses.GET, url, body=body, match_querystring=True)224 totalhits, results = find_link.api.wiki_search('coaching inn')225 self.assertGreater(totalhits, 0)226 totalhits, results = find_link.api.wiki_search('hedge (finance)')227 self.assertGreater(totalhits, 0)228 @responses.activate229 def test_do_search(self):230 url = wiki_url({231 'continue': '',232 'action': 'query',233 'srsearch': '"market town"',234 'srwhat': 'text',235 'format': 'json',236 'list': 'search',237 'srlimit': '50',238 })239 body = '{"query":{"searchinfo":{"totalhits":3593},"search":[{"ns":0,"title":"Market town","snippet":"<span class=\\"searchmatch\\">Market</span> <span class=\\"searchmatch\\">town</span> or market right is a legal term, originating in the medieval period, for a European settlement that has the right to host markets, distinguishing","size":10527,"wordcount":1362,"timestamp":"2015-06-25T18:19:23Z"},{"ns":0,"title":"V\\u011btrn\\u00fd Jen\\u00edkov","snippet":"V\\u011btrn\\u00fd Jen\\u00edkov (Czech pronunciation: [\\u02c8vj\\u025btr\\u0329ni\\u02d0\\u02c8j\\u025b\\u0272i\\u02d0kof]) is a <span class=\\"searchmatch\\">market</span> <span class=\\"searchmatch\\">town</span> in the Jihlava District, Vyso\\u010dina Region of the Czech Republic. About 582","size":833,"wordcount":76,"timestamp":"2013-02-28T19:49:34Z"}]}}'240 responses.add(responses.GET, url, body=body, match_querystring=True)241 url = wiki_url({242 'continue': '',243 'blnamespace': '0',244 'bllimit': '500',245 'action': 'query',246 'format': 'json',247 'list': 'backlinks',248 'bltitle': 'market town'249 })250 body = '{"query":{"backlinks":[{"pageid":1038,"ns":0,"title":"Aarhus"},{"pageid":1208,"ns":0,"title":"Alan Turing"},{"pageid":2715,"ns":0,"title":"Abergavenny"},{"pageid":4856,"ns":0,"title":"Borough"},{"pageid":5391,"ns":0,"title":"City"},{"pageid":6916,"ns":0,"title":"Colony"},{"pageid":8166,"ns":0,"title":"Devon"},{"pageid":13616,"ns":0,"title":"Howard Carter"},{"pageid":13861,"ns":0,"title":"Hampshire"},{"pageid":13986,"ns":0,"title":"Hertfordshire"},{"pageid":16143,"ns":0,"title":"John Locke"},{"pageid":16876,"ns":0,"title":"Kingston upon Thames"},{"pageid":19038,"ns":0,"title":"Municipality"},{"pageid":20206,"ns":0,"title":"Manchester"},{"pageid":22309,"ns":0,"title":"Oslo"},{"pageid":22422,"ns":0,"title":"Olney Hymns"},{"pageid":23241,"ns":0,"title":"Telecommunications in China"},{"pageid":25798,"ns":0,"title":"Reykjav\\u00edk"},{"pageid":25897,"ns":0,"title":"Road"},{"pageid":26316,"ns":0,"title":"Racial segregation"}]}}'251 responses.add(responses.GET, url, body=body, match_querystring=True)252 url = wiki_url({253 'apnamespace': '14',254 'aplimit': '500',255 'format': 'json',256 'action': 'query',257 'apprefix': 'market town',258 'apfilterredir': 'nonredirects',259 'list': 'allpages'260 })261 body = '{"query":{"allpages":[{"pageid":27601242,"ns":14,"title":"Category:Market towns"}]}}'262 responses.add(responses.GET, url, body=body, match_querystring=True)263 url = wiki_url({264 'cmlimit': '500',265 'action': 'query',266 'cmtitle': 'Category:Market town',267 'cmnamespace': '0',268 'format': 'json',269 'list': 'categorymembers'270 })271 body = '{"query":{"categorymembers":[]}}'272 responses.add(responses.GET, url, body=body, match_querystring=True)273 url = wiki_url({274 'cmlimit': '500',275 'action': 'query',276 'cmtitle': 'Category:Market towns',277 'cmnamespace': '0',278 'format': 'json',279 'list': 'categorymembers'280 })281 body = '{"query":{"categorymembers":[]}}'282 responses.add(responses.GET, url, body=body, match_querystring=True)283 url = wiki_url({284 'apfilterredir': 'nonredirects',285 'apprefix': 'Market town',286 'list': 'allpages',287 'apnamespace': 0,288 'aplimit': 500,289 })290 body = '{"query":{"allpages":[{"pageid":145965,"ns":0,"title":"Market town"},{"pageid":13941316,"ns":0,"title":"Market towns of Buskerud county"}]}}'291 responses.add(responses.GET, url, body=body, match_querystring=True)292 url = wiki_url({293 'prop': 'templates',294 'continue': '',295 'tlnamespace': 10,296 'titles': 'V\u011btrn\u00fd Jen\u00edkov',297 'tllimit': 500,298 })299 body = '{"query":{"pages":[{"pageid":17087711,"ns":0,"title":"V\u011btrn\u00fd Jen\u00edkov","templates":[{"ns":10,"title":"Template:Asbox"},{"ns":10,"title":"Template:Commons"}]}]}}'300 responses.add(responses.GET, url, body=body, match_querystring=True)301 reply = find_link.core.do_search('market town', None)302 self.assertIsInstance(reply, dict)303 self.assertSetEqual(set(reply.keys()), {'totalhits', 'results', 'longer'})304 self.assertGreater(reply['totalhits'], 0)305 self.assertIsInstance(reply['results'], list)306 self.assertGreater(len(reply['results']), 0)307 self.assertTrue(any(title.startswith('Market towns of') for title in reply['longer']))308 def test_parse_cite(self):309 bindir = os.path.abspath(os.path.dirname(__file__))310 filename = os.path.join(bindir, 'cite_parse_error')311 sample = open(filename).read()312 found_duty = False313 for a, b in find_link.match.parse_cite(sample):314 if 'duty' in b.lower():315 found_duty = True316 self.assertTrue(found_duty)317 def test_avoid_link_in_cite(self):318 tp = 'magic'319 content = 'test <ref>{{cite web|title=Magic|url=http://magic.com}}</ref>'320 (c, r) = find_link.match.find_link_in_content(tp, content + ' ' + tp)321 self.assertEqual(c, content + ' [[' + tp + ']]')322 self.assertEqual(r, tp)323 self.assertRaises(find_link.match.NoMatch, find_link.match.find_link_in_content, tp, content)324 tp = 'abc'325 content = '==Early life==\n<ref>{{cite news|}}</ref>abc'326 (c, r) = find_link.match.find_link_in_content(tp, content)327 self.assertEqual(c, content.replace(tp, '[[' + tp + ']]'))328 self.assertEqual(r, tp)329 def test_coastal_sage_scrub(self):330 sample = '''Depend on a [[habitat]] that has shown substantial historical or recent declines in size. This criterion infers the population viability of a species based on trends in the habitats upon which it specializes. Coastal [[wetland]]s, particularly in the urbanized [[San Francisco Bay]] and south-coastal areas, alluvial fan [[sage (plant)|sage]] [[scrubland|scrub]] and coastal sage scrub in the southern coastal basins, and arid scrub in the [[San Joaquin Valley]], are examples of California habitats that have seen dramatic reductions in size in recent history. Species that specialize in these habitats generally meet the criteria for Threatened or Endangered status or Special Concern status;'''331 (c, r) = find_link.match.find_link_in_chunk('coastal sage scrub', sample)332 self.assertEqual(c, sample.replace('coastal sage scrub', '[[coastal sage scrub]]'))333 self.assertEqual(r, 'coastal sage scrub')334 def test_section_iter(self):335 result = list(find_link.match.section_iter('test'))336 self.assertListEqual(result, [(None, 'test')])337 text = '''==Heading 1 ==338Paragraph 1.339==Heading 2 ==340Paragraph 2.341'''342 expect = [343 ('==Heading 1 ==\n', 'Paragraph 1.\n'),344 ('==Heading 2 ==\n', 'Paragraph 2.\n')345 ]346 self.assertListEqual(list(find_link.match.section_iter(text)), expect)347 def test_get_subsections(self):348 text = '''==Heading 1 ==349Paragraph 1.350==Heading 2 ==351Paragraph 2.352===Level 2===353Paragraph 3.354==Heading 4==355Paragraph 4.356'''357 self.assertEqual(find_link.match.get_subsections(text, 4), '')358 @responses.activate359 def test_match_found(self):360 url = wiki_url({'prop': 'revisions|info', 'titles': 'payment protection insurance', 'rvprop': 'content|timestamp'})361 content = "{{multiple issues|\n{{Globalize|2=the United Kingdom|date=July 2011}}\n{{Original research|date=April 2009}}\n}}\n'''Payment protection insurance''' ('''PPI'''), also known as '''credit insurance''', '''credit protection insurance''', or '''loan repayment insurance''', is an insurance product that enables consumers to insure repayment of credit if the borrower dies, becomes ill or disabled, loses a job, or faces other circumstances that may prevent them from earning income to service the debt. It is not to be confused with [[income protection insurance]], which is not specific to a debt but covers any income. PPI was widely sold by banks and other credit providers as an add-on to the loan or overdraft product.<ref>{{cite web | url=http://www.fsa.gov.uk/consumerinformation/product_news/insurance/payment_protection_insurance_/what-is-ppi | title=What is payment protection insurance? | accessdate=17 February 2014}}</ref>"362 body = json.dumps({363 "query": {364 "pages": [365 {366 "title": "Payment protection insurance",367 "revisions": [{"timestamp": "2016-03-26T17:56:25Z", "content": content}]368 }369 ]370 }371 })372 responses.add(responses.GET, url, body=body, match_querystring=True)373 l = 'payment protection insurance'374 l2 = 'payment Protection Insurance'375 m = re.compile('(P)' + l[1:], re.I).match('P' + l2[1:])376 self.assertEqual(find_link.match.match_found(m, l, None), l)377 def test_avoid_link_in_heading(self):378 tp = 'test phrase'379 content = '''380=== Test phrase ===381This sentence contains the test phrase.'''382 (c, r) = find_link.match.find_link_in_content(tp, content)383 self.assertEqual(c, content.replace(tp, '[[' + tp + ']]'))384 self.assertEqual(r, tp)385 @responses.activate386 @patch('find_link.match.get_case_from_content', lambda s: None)387 def test_find_link_in_content(self): # this test is slow388 # orig_get_case_from_content = find_link.core.get_case_from_content389 # find_link.core.get_case_from_content = lambda s: None390 self.assertRaises(find_link.match.NoMatch, find_link.match.find_link_in_content, 'foo', 'bar')391 input_content = 'Able to find this test\n\nphrase in an article.'392 self.assertRaises(find_link.match.NoMatch,393 find_link.match.find_link_in_content,394 'test phrase', input_content)395 input_content = 'Able to find this test \n \n phrase in an article.'396 self.assertRaises(find_link.match.NoMatch,397 find_link.match.find_link_in_content,398 'test phrase', input_content)399 otrain = 'Ticketing on the O-Train works entirely on a proof-of-payment basis; there are no ticket barriers or turnstiles, and the driver does not check fares.'400 (c, r) = find_link.match.find_link_in_content('ticket barriers', otrain, linkto='turnstile')401 self.assertEqual(c, otrain.replace('turnstile', '[[turnstile]]'))402 self.assertEqual(r, 'turnstile')403 sample = """On April 26, 2006, Snoop Dogg and members of his entourage were arrested after being turned away from [[British Airways]]' first class lounge at [[Heathrow Airport]]. Snoop and his party were not allowed to enter the lounge because some of the entourage were flying first class, other members in economy class. After the group was escorted outside, they vandalized a duty-free shop by throwing whiskey bottles. Seven police officers were injured in the midst of the disturbance. After a night in prison, Snoop and the other men were released on bail on April 27, but he was unable to perform at the Premier Foods People's Concert in [[Johannesburg]] on the same day. As part of his bail conditions, he had to return to the police station in May. The group has been banned by British Airways for "the foreseeable future."<ref>{{cite news|url=http://news.bbc.co.uk/1/hi/entertainment/4949430.stm |title=Rapper Snoop Dogg freed on bail |publisher=BBC News |date=April 27, 2006 |accessdate=January 9, 2011}}</ref><ref>{{cite news|url=http://news.bbc.co.uk/1/hi/entertainment/4953538.stm |title=Rap star to leave UK after arrest |publisher=BBC News |date=April 28, 2006 |accessdate=January 9, 2011}}</ref> When Snoop Dogg appeared at a London police station on May 11, he was cautioned for [[affray]] under [[Fear or Provocation of Violence|Section 4]] of the [[Public Order Act 1986|Public Order Act]] for use of threatening words or behavior.<ref>{{cite news|url=http://newsvote.bbc.co.uk/1/hi/entertainment/4761553.stm|title=Rap star is cautioned over brawl |date=May 11, 2006|publisher=BBC News |accessdate=July 30, 2009}}</ref> On May 15, the [[Home Office]] decided that Snoop Dogg should be denied entry to the United Kingdom for the foreseeable future due to the incident at Heathrow as well as his previous convictions in the United States for drugs and firearms offenses.<ref>{{cite web|url=http://soundslam.com/articles/news/news.php?news=060516_snoopb |title=Soundslam News |publisher=Soundslam.com |date=May 16, 2006 |accessdate=January 9, 2011}}</ref><ref>{{cite web|url=http://uk.news.launch.yahoo.com/dyna/article.html?a=/060516/340/gbrj1.html&e=l_news_dm |title=Snoop 'banned from UK' |publisher=Uk.news.launch.yahoo.com |accessdate=January 9, 2011}}</ref> Snoop Dogg's visa card was rejected by local authorities on March 24, 2007 because of the Heathrow incident.<ref>{{cite news |first=VOA News |title=Rapper Snoop Dogg Arrested in UK |date=April 27, 2006 |publisher=Voice of America |url=http://classic-web.archive.org/web/20060603120934/http://voanews.com/english/archive/2006-04/2006-04-27-voa17.cfm |work=VOA News |accessdate=December 31, 2008}}</ref> A concert at London's Wembley Arena on March 27 went ahead with Diddy (with whom he toured Europe) and the rest of the show."""404 (c, r) = find_link.match.find_link_in_content('duty-free shop', sample)405 self.assertEqual(c, sample.replace('duty-free shop', '[[duty-free shop]]'))406 self.assertEqual(r, 'duty-free shop')407 sample = '[[Retriever]]s are typically used when [[waterfowl]] hunting. Since a majority of waterfowl hunting employs the use of small boats'408 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:409 (c, r) = func('waterfowl hunting', sample)410 self.assertEqual(c, sample.replace(']] hunting', ' hunting]]'))411 self.assertEqual(r, 'waterfowl hunting')412 sample = 'abc [[File:Lufschiffhafen Jambol.jpg|thumb|right|Jamboli airship hangar in Bulgaria]] abc'413 q = 'airship hangar'414 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:415 (c, r) = func(q, sample)416 self.assertEqual(c, sample.replace(q, '[[' + q + ']]'))417 self.assertEqual(r, q)418 sample = 'It is relatively easy for insiders to capture insider-trading like gains through the use of "open market repurchases." Such transactions are legal and generally encouraged by regulators through safeharbours against insider trading liability.'419 q = 'insider trading'420 q = 'ski mountaineering' # Germán Cerezo Alonso 421 sample = 'started ski mountaineering in 1994 and competed first in the 1997 Catalunyan Championship. He finished fifth in the relay event of the [[2005 European Championship of Ski Mountaineering]].'422 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:423 (c, r) = func(q, sample)424 self.assertEqual(c, sample.replace(q, '[[' + q + ']]'))425 self.assertEqual(r, q)426 q = 'fall of the Iron Curtain'427 linkto = 'revolutions of 1989'428 sample = 'With the fall of the [[Iron Curtain]] and the associated'429 #search_for_link = mk_link_matcher(q)430 #m = search_for_link(sample)431 #replacement = match_found(m, q, linkto)432 #self.assertEqual(replacement, 'revolutions of 1989|fall of the Iron Curtain]]')433 (c, r) = find_link.match.find_link_in_chunk(q, sample, linkto=linkto)434 self.assertEqual(c, sample.replace('fall of the [[', '[[revolutions of 1989|fall of the '))435 self.assertEqual(r, 'revolutions of 1989|fall of the Iron Curtain')436 q = 'religious conversion'437 sample = 'There were no reports of [[forced religious conversion]], including of minor U.S. citizens'438 self.assertRaises(find_link.match.LinkReplace, find_link.match.find_link_in_chunk, q, sample)439 q = 'two-factor authentication'440 sample = "Two factor authentication is a 'strong authentication' method as it"441 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:442 (c, r) = func(q, sample)443 self.assertEqual(c, "[[Two-factor authentication]] is a 'strong authentication' method as it")444 self.assertEqual(r, q[0].upper() + q[1:])445 q = 'spherical trigonometry'446 sample = 'also presents the spherical trigonometrical formulae'447 (c, r) = find_link.match.find_link_in_content('spherical trig', sample, linkto=q)448 self.assertEqual(c, 'also presents the [[spherical trigonometry|spherical trigonometrical]] formulae')449 self.assertEqual(r, 'spherical trigonometry|spherical trigonometrical')450 q = 'post-World War II baby boom'451 sample = 'huge boost during the post World War II [[Baby Boomer|Baby Boom]].'452 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:453 (c, r) = func(q, sample)454 self.assertEqual(c, 'huge boost during the [[post-World War II baby boom]].')455 self.assertEqual(r, q)456 q = 'existence of God'457 sample = 'with "je pense donc je suis" or "[[cogito ergo sum]]" or "I think, therefore I am", argued that "the self" is something that we can know exists with [[epistemology|epistemological]] certainty. Descartes argued further that this knowledge could lead to a proof of the certainty of the existence of [[God]], using the [[ontological argument]] that had been formulated first by [[Anselm of Canterbury]].{{Citation needed|date=January 2012}}'458 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:459 (c, r) = func(q, sample)460 self.assertEqual(c, sample.replace('existence of [[God', '[[existence of God'))461 self.assertEqual(r, q)462 q = 'virtual machine'463 sample = 'It compiles Python programs into intermediate bytecode, which is executed by the virtual machine. Jython compiles into Java byte code, which can then be executed by every [[Java Virtual Machine]] implementation. This also enables the use of Java class library functions from the Python program.'464 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:465 (c, r) = func(q, sample)466 self.assertEqual(c, sample.replace('virtual machine', '[[virtual machine]]'))467 self.assertEqual(r, q)468 url = wiki_url({469 'prop': 'info',470 'redirects': '',471 'titles': 'Teleological argument'472 })473 body = json.dumps({474 'query': {475 'pages': [{476 'pageid': 30731,477 'ns': 0,478 'title': 'Teleological argument',479 }],480 }481 })482 q = 'existence of God'483 sample = '[[Intelligent design]] is an [[Teleological argument|argument for the existence of God]],'484 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:485 responses.add(responses.GET, url, body=body, match_querystring=True)486 self.assertRaises(find_link.match.LinkReplace, func, q, sample)487 q = 'correlation does not imply causation'488 sample = 'Indeed, an important axiom that social scientists cite, but often forget, is that "[[correlation]] does not imply [[Causality|causation]]."'489 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:490 (c, r) = func(q, sample)491 self.assertEqual(c, 'Indeed, an important axiom that social scientists cite, but often forget, is that "[[correlation does not imply causation]]."')492 self.assertEqual(r, q)493 sample = "A '''pedestal desk''' is usually a large free-standing [[desk]]"494 self.assertRaises(find_link.match.NoMatch, find_link.match.find_link_in_content, 'standing desk', sample)495 pseudocode1 = 'These languages are typically [[Dynamic typing|dynamically typed]], meaning that variable declarations and other [[Boilerplate_(text)#Boilerplate_code|boilerplate code]] can be omitted.'496 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:497 (c, r) = func('boilerplate code', pseudocode1)498 self.assertEqual(c, pseudocode1.replace('Boilerplate_(text)#Boilerplate_code|', ''))499 self.assertEqual(r, 'boilerplate code')500 pseudocode2 = 'Large amounts of [[boilerplate (text)#Boilerplate code|boilerplate]] code.'501 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:502 (c, r) = func('boilerplate code', pseudocode2)503 self.assertEqual(c, pseudocode2.replace('(text)#Boilerplate code|boilerplate]] code', 'code]]'))504 self.assertEqual(r, 'boilerplate code')505 sample = 'particularly to handle the peak volumes of work generated by Payment Protection Insurance complaints.'506 (c, r) = find_link.match.find_link_in_content('payment protection insurance', sample)507 self.assertIn('payment protection insurance', c)508 (c, r) = find_link.match.find_link_in_text('payment protection insurance', sample)509 self.assertIn('payment protection insurance', c)510 if False:511 sample = 'further investigations on [[Extrajudicial punishment|extrajudicial killings]] by police forces.'512 q = 'extrajudicial killing'513 (c, r) = find_link.match.find_link_in_content(q, sample)514 self.assertIn(q, c)515 (c, r) = find_link.match.find_link_in_text(q, sample)516 self.assertIn(q, c)517 sample = 'units formed with [[SI prefix|metric prefixes]], such as kiloseconds'518 find_link.match.find_link_in_content('metric prefix', sample)519 sample = u"==Geography==\nA gem of Bermuda's coastline, it is surrounded by [[St. George's Parish, Bermuda|St. George's Parish]] in the north, east, south (Tucker's Town), and [[Hamilton Parish, Bermuda|Hamilton Parish]] in the west. A chain of islands and rocks stretches across the main opening to the [[Atlantic Ocean]], in the east, notably [[Cooper's Island, Bermuda|Cooper's Island]] (which was made a landmass contiguous to St. David's Island and Longbird Island in the 1940s), and [[Nonsuch Island, Bermuda|Nonsuch Island]]. The only channel suitable for large vessels to enter the harbour from the open Atlantic is [[Castle Roads, Bermuda|Castle Roads]], which was historically guarded by a number of fortifications, on [[Castle Island, Bermuda|Castle Island]], Brangman's Island, and Goat Island. Forts were also placed nearby on other small islands, and on the Tucker's Town peninsula of the Main Island. In the west, [[The Causeway, Bermuda|The Causeway]] crosses from the main island to St. David's Island, and beyond this a stretch of water known as [[Ferry Reach, Bermuda|Ferry Reach]] connects the harbour with [[St. George's Harbor, Bermuda|St. George's Harbour]] to the north, where Bermuda's first permanent settlement, [[St. George's, Bermuda|St. George's Town]], was founded in 1612. An unincorporated settlement, [[Tucker's Town, Bermuda|Tucker's Town]], was established on the [[peninsula]] of the [[Main Island, Bermuda|Main Island]] at the south-west of the harbour. The settlement was cleared by compulsory purchase order in the 1920s in order to create a luxury enclave where homes could be purchased by wealthy foreigners, and the attendant Mid Ocean Golf Club. In [[Hamilton Parish, Bermuda|Hamilton Parish]], on the western shore of the harbour, lies [[Walsingham Bay, Bermuda|Walsingham Bay]], the site where, in 1609-10, the crew of the wrecked [[Sea Venture]] built the ''[[Patience]]'', one of two ships built, which carried most of the survivors of the wrecking to [[Jamestown, Virginia|Jamestown]], [[Virginia]], in 1610. The ''Patience'' returned to Bermuda with [[George Somers|Admiral Sir George Somers]], who died in Bermuda later that year."520 find_link.match.find_link_in_content('compulsory purchase order', sample)521 if False:522 yard = "primary [[Hump yard|hump classification yards]] are located in Allentown."523 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:524 (c, r) = func('classification yard', yard)525 self.assertEqual(c, yard.replace('[[Hump yard|hump classification yards]]', 'hump [[classification yard]]s'))526 self.assertEqual(r, 'classification yard')527 yard2 = 'A major [[hump yard|railway classification yard]] is north of Blenheim at [[Spring Creek, New Zealand|Spring Creek]].'528 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:529 (c, r) = func('classification yard', yard2)530 self.assertEqual(c, yard2.replace('[[hump yard|railway classification yard]]', 'railway [[classification yard]]'))531 self.assertEqual(r, 'classification yard')532 yard3 = 'Five houses were destroyed and three others were damaged. A high school was also heavily damaged and railroad cars were thrown in a small freight classification yard. Four people were injured.'533 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:534 (c, r) = func('classification yard', yard3)535 self.assertEqual(c, yard3.replace('classification yard', '[[classification yard]]'))536 self.assertEqual(r, 'classification yard')537 #yard2 = 'For the section from [[Rotterdam]] to the large [[Kijfhoek (classification yard)|classification yard Kijfhoek]] existing track was reconstructed, but three quarters of the line is new, from Kijfhoek to [[Zevenaar]] near the German border.'538 #(c, r) = find_link.match.find_link_in_text('classification yard', yard2)539 if False:540 sample = 'GEHA also has a contract with the federal government to administer benefits for the [[Pre-existing Condition Insurance Plan]], which will be a transitional program until 2014.'541 q = 'pre-existing condition'542 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:543 self.assertRaises(find_link.match.LinkReplace, func, q, sample)544 station = 'Ticket barriers control access to all platforms, although the bridge entrance has no barriers.'545 (c, r) = find_link.match.find_link_in_content('ticket barriers', station, linkto='turnstile')546 self.assertEqual(c, station.replace('Ticket barriers', '[[Turnstile|Ticket barriers]]'))547 self.assertEqual(r, 'Turnstile|Ticket barriers')548 content = [549 'Able to find this test phrase in an article.',550 'Able to find this test phrase in an article.',551 'Able to find this test\n phrase in an article.',552 'Able to find this test \nphrase in an article.',553 'Able to find this test\nphrase in an article.',554 'Able to find this test-phrase in an article.',555 'Able to find this test PHRASE in an article.',556 'Able to find this TEST PHRASE in an article.',557 'Able to find this test\nPhrase in an article.',558 'Able to find this [[test]] phrase in an article.',559 'Able to find this TEST [[PHRASE]] in an article.',560 'Able to find this [[testing|test]] phrase in an article.',561 'Able to find this testphrase in an article.']562 for input_content in content:563 for func in find_link.match.find_link_in_content, find_link.match.find_link_in_text:564 (c, r) = func('test phrase', input_content)565 self.assertEqual(c, 'Able to find this [[test phrase]] in an article.')566 self.assertEqual(r, 'test phrase')567 q = 'recoil operation'568 article = 'pattern of long-recoil operation as the 25mm and 40mm guns.'569 search_for_link = find_link.match.mk_link_matcher(q)570 self.assertFalse(search_for_link(article))571 q = 'after-dinner speaker'572 linkto = 'public speaking'573 sample = 'in demand as an [[Public speaker|after-dinner speaker]].'574 (c, r) = find_link.match.find_link_in_chunk(q, sample, linkto=linkto)575 self.assertEqual(c, sample.replace('Public speaker', 'public speaking'))...
test_admin.py
Source:test_admin.py
...6 def test_can_create_release(self):7 # Ryan logs into the admin8 self.adminLogin()9 # He creates an unpublished release10 self.find_link('Releases').click()11 self.find_link('ADD RELEASE').click()12 self.find_name('title').send_keys('First release')13 self.find_name('date').send_keys(date_format(from_today(1)))14 self.find_name('cover_url').send_keys('http://localhost/cover.jpg')15 self.find_name('player_code').send_keys('<iframe></iframe>')16 self.find_name('description').send_keys('Release description')17 self.find_name('credits').send_keys('Release credits')18 self.find_name('_save').click()19 self.assertIn('First release', self.find_tag('body').text)20 # He verifies that it's not published21 self.get_url('/music')22 self.assertIn('Music', self.browser.title)23 self.assertNotIn('First release', self.find_tag('body').text)24 self.get_url('/music/first-release')25 self.assertNotIn('First release', self.browser.title)26 # He publishes the release27 self.get_url('/admin')28 self.find_link('Releases').click()29 self.find_link('First release').click()30 self.find_name('publish').click()31 self.find_name('_save').click()32 self.assertIn('First release', self.find_tag('body').text)33 # He verifies that it was published34 self.get_url('/music')35 self.find_link('First release').click()36 self.assertIn('First release', self.browser.title)37 # TODO: Test absence/presence of details?38class SongTestCase(AdminTestCase):39 def setUp(self):40 super().setUp()41 PublishedReleaseFactory.create(title='First release',42 slug='first-release')43 def test_can_create_song(self):44 # Ryan logs into the admin45 self.adminLogin()46 # He adds a published song47 self.find_link('Songs').click()48 self.find_link('ADD SONG').click()49 self.find_name('title').send_keys('First song')50 self.find_name('description').send_keys('Song description')51 self.find_name('player_code').send_keys('<iframe></iframe>')52 self.find_name('credits').send_keys('Song credits')53 self.find_name('lyrics').send_keys('Song lyrics')54 self.find_name('publish').click()55 self.find_name('_save').click()56 self.assertIn('First song', self.find_tag('body').text)57 # He adds an unpublished song58 self.find_link('ADD SONG').click()59 self.find_name('title').send_keys('Second song')60 self.find_name('_save').click()61 self.assertIn('Second song', self.find_tag('body').text)62 # He verifies that only the published song is on the site63 self.get_url('/songs')64 self.assertIn('Songs', self.browser.title)65 self.assertNotIn('Second song', self.find_tag('body').text)66 self.find_link('First song').click()67 self.assertIn('First song', self.browser.title)68 # He adds the songs to the release69 self.get_url('/admin')70 self.find_link('Songs').click()71 self.find_link('First song').click()72 self.find_select('release').select_by_visible_text('First release')73 self.find_name('track').send_keys('1')74 self.find_name('_save').click()75 self.find_link('Second song').click()76 self.find_select('release').select_by_visible_text('First release')77 self.find_name('track').send_keys('2')78 self.find_name('_save').click()79 # He verifies that only the published song is shown on the release80 self.get_url('/music/first-release')81 self.assertIn('First song', self.find_tag('body').text)82 self.assertNotIn('Second song', self.find_tag('body').text)83class VideoTestCase(AdminTestCase):84 # TODO: Duplicated in .test_models.VideoAutofillTestCase85 CASSETTE = 'hth/music/tests/fixtures/cassettes/vimeo.yaml'86 SOURCE_URL = 'https://vimeo.com/126794989'87 PREVIEW_URL = 'http://i.vimeocdn.com/video/517362144_640.jpg'88 EMBED_CODE = ('<iframe src="http://player.vimeo.com/video/126794989"'89 ' seamless allowfullscreen></iframe>\n')90 def setUp(self):91 super().setUp()92 PublishedReleaseFactory.create(title='First release',93 slug='first-release')94 def test_can_create_video(self):95 # Ryan logs into the admin96 self.adminLogin()97 # He adds a published video98 self.find_link('Videos').click()99 self.find_link('ADD VIDEO').click()100 self.find_name('title').send_keys('First video')101 self.find_name('source_url').send_keys('http://localhost')102 self.find_name('embed_code').send_keys('<iframe></iframe>')103 self.find_name('preview_url').send_keys('http://localhost/jpg')104 self.find_name('description').send_keys('Video description')105 self.find_name('credits').send_keys('Video credits')106 self.find_name('publish').click()107 self.find_name('_save').click()108 self.assertIn('First video', self.find_tag('body').text)109 # He adds an unpublished video110 self.find_link('ADD VIDEO').click()111 self.find_name('title').send_keys('Second video')112 self.find_name('_save').click()113 self.assertIn('Second video', self.find_tag('body').text)114 # He verifies that only the published video is on the site115 self.get_url('/video')116 self.assertNotIn('Second video', self.find_tag('body').text)117 self.find_link('First video').click()118 self.assertIn('First video', self.browser.title)119 # He adds the videos to the release120 self.get_url('/admin')121 self.find_link('Videos').click()122 self.find_link('First video').click()123 self.find_select('release').select_by_visible_text('First release')124 self.find_name('_save').click()125 self.find_link('Second video').click()126 self.find_select('release').select_by_visible_text('First release')127 self.find_name('_save').click()128 # He verifies that only the published video is shown on the release129 self.get_url('/music/first-release')130 self.assertIn('First video', self.find_tag('body').text)131 self.assertNotIn('Second video', self.find_tag('body').text)132 self.find_link('First video').click()133 self.assertIn('First video', self.browser.title)134 @vcr.use_cassette(CASSETTE)135 def test_autofill_from_source(self):136 # Ryan logs into the admin137 self.adminLogin()138 # He adds a published video, without preview_url and embed_code139 self.find_link('Videos').click()140 self.find_link('ADD VIDEO').click()141 self.find_name('title').send_keys('First video')142 self.find_name('source_url').send_keys(self.SOURCE_URL)143 self.find_name('publish').click()144 self.find_name('_continue').click()145 # He verifies that the preview_url and embed_code have been filled146 self.assertEqual(self.PREVIEW_URL,147 self.find_name('preview_url').get_attribute('value'))148 self.assertEqual(self.EMBED_CODE.strip(),149 self.find_name('embed_code').text)150 # He verifies that the published video is on the site151 self.get_url('/video')152 self.find_link('First video').click()153 self.assertIn('First video', self.browser.title)154class PressTestCase(AdminTestCase):155 def setUp(self):156 super().setUp()157 PublishedReleaseFactory.create(title='First release',158 slug='first-release')159 def test_can_create_quote(self):160 # Ryan logs into the admin161 self.adminLogin()162 # He adds a published quote163 self.find_link('Press').click()164 self.find_link('ADD PRESS').click()165 self.find_name('title').send_keys('First source')166 self.find_name('source_url').send_keys('http://example.com')167 self.find_name('date').send_keys(date_format(from_today(-30)))168 self.find_name('body').send_keys('First quote')169 self.find_name('publish').click()170 self.find_name('_save').click()171 self.assertIn('First source', self.find_tag('body').text)172 # He adds an unpublished quote173 self.find_link('ADD PRESS').click()174 self.find_name('title').send_keys('Second source')175 self.find_name('source_url').send_keys('http://foo.com')176 self.find_name('date').send_keys(date_format(from_today(-30)))177 self.find_name('_save').click()178 self.assertIn('Second source', self.find_tag('body').text)179 # He verifies that only the published quote is on the site180 self.get_url('/press')181 self.assertIn('Press', self.browser.title)182 self.assertIn('First source', self.find_tag('body').text)183 self.assertIn('First quote', self.find_tag('body').text)184 self.assertNotIn('Second source', self.find_tag('body').text)185 self.assertNotIn('Second quote', self.find_tag('body').text)186 # He adds the quotes to the release187 self.get_url('/admin')188 self.find_link('Press').click()189 self.find_link('First source').click()190 self.find_select('release').select_by_visible_text('First release')191 self.find_name('_save').click()192 self.find_link('Second source').click()193 self.find_select('release').select_by_visible_text('First release')194 self.find_name('_save').click()195 # He verifies that only the published quote is shown on the release196 self.get_url('/music/first-release')197 self.assertIn('First source', self.find_tag('body').text)198 self.assertNotIn('Second source', self.find_tag('body').text)199 def test_can_create_post(self):200 # Ryan logs into the admin201 self.adminLogin()202 # He adds a published press post203 self.find_link('Press').click()204 self.find_link('ADD PRESS').click()205 self.find_name('title').send_keys('Post title')206 self.find_name('body').send_keys('Post body')207 self.find_name('date').send_keys(date_format(from_today(-30)))208 self.find_name('quote').click()209 self.find_name('publish').click()210 self.find_name('_save').click()211 self.assertIn('Post title', self.find_tag('body').text)212 # He verifies that the post is on the site213 self.get_url('/press')214 self.assertIn('Press', self.browser.title)215 self.assertIn('Post title', self.find_tag('body').text)216 self.assertIn('Post body', self.find_tag('body').text)217 # He adds the post to the release218 self.get_url('/admin')219 self.find_link('Press').click()220 self.find_link('Post title').click()221 self.find_select('release').select_by_visible_text('First release')222 self.find_name('_save').click()223 # He verifies that post is shown on the release224 self.get_url('/music/first-release')...
scrape.py
Source:scrape.py
...18 # nwl_scraper = splits()19 # self.team1 = nwl_scraper.team1()20 # self.team2 = nwl_scraper.team2()21 ''' ----------------------------------- Scraping Functions --------------------------------------- '''22 def find_link(self, url, link_text, second_check = False):23 if second_check == True:24 if self.team1yr == '2022' and self.level.lower() == 'other':25 html = BeautifulSoup(requests.get(url).text, features= 'lxml')26 year, name, num_teams = self.sewp_info(html)27 num_teams = int(num_teams)28 s = "".join(c for c in html.find_all(text=Comment) if "table_container" in c)29 soup = BeautifulSoup(s, "html.parser")30 team_links = [('https://baseball-reference.com' + a['href']) for a in soup.select('[href*="/register/team.cgi?id="]')][:num_teams]31 return team_links32 elif second_check == False:33 # if self.team1yr != '2022' and self.team2yr != '2022':34 html = BeautifulSoup(requests.get(url).text, features = 'lxml')35 a_tags = html.find_all('a', href = True)36 return [link['href'] for link in a_tags if link.text == link_text]37 def parse_row(self, row):38 return [str(x.string) for x in row.find_all('td')]39 def sewp_info(self, html):40 spans = html.find_all('span')41 p_tags = html.find_all('p')42 return spans[8].text, spans[9].text, p_tags[1].text.split()[3]43 ''' ------------------------------- Baseball Specific Functions ---------------------------------- '''44 def find_baseball_data(self, url, teamyr = ''):45 if self.level.lower() == 'mlb':46 if teamyr != '2022':47 hitting_data = pd.read_html(url)[0].dropna(how = 'all').fillna(0)48 pitching_data = pd.read_html(url)[1].dropna(how = 'all').fillna(0)49 elif teamyr == '2022':50 # have to change the index because of the schedule tables in the current season51 hitting_data = pd.read_html(url)[-2].dropna(how = 'all').fillna(0)52 pitching_data = pd.read_html(url)[-1].dropna(how = 'all').fillna(0)53 hitting_data = hitting_data[hitting_data['Name'] != 'Name']54 pitching_data = pitching_data[pitching_data['Name'] != 'Name']55 hitting_data['Name'] = hitting_data['Name'].str.replace('*', '', regex = False)56 hitting_data['Name'] = hitting_data['Name'].str.replace('#', '', regex = False)57 pitching_data['Name'] = pitching_data['Name'].str.replace('*', '')58 pitching_data['Name'] = pitching_data['Name'].str.replace('#', '')59 elif self.level.lower() == 'other':60 # find pitching data in the comments61 hitting_data = pd.read_html(url)[0].dropna(how = 'all').fillna(0)62 hitting_data['Name'] = hitting_data['Name'].str.replace('*', '').str.replace('#', '')63 soup = BeautifulSoup(requests.get(url).text,'lxml')64 pitching_data = pd.read_html([x for x in soup.find_all(string=lambda text: isinstance(text, Comment)) if 'id="div_team_pitching"' in x][0])[0]65 pitching_data['Name'] = pitching_data['Name'].str.replace('*', '').str.replace('#', '')66 # in the future we should include statcast data and probs if the years are 2015 or later67 return hitting_data, pitching_data, hitting_data.columns, pitching_data.columns68 # sport specific functions69 def baseball(self):70 default_url = 'https://baseball-reference.com'71 def_url_other = 'https://www.baseball-reference.com/register/league.cgi'72 if self.level.lower() == 'other':73 league_link = default_url + self.find_link(def_url_other, self.league)[0]74 if self.team1yr == '2022':75 yr_link1 = self.find_link(league_link, self.team1yr)[2]76 team_links = self.find_link(default_url + yr_link1, self.team1, second_check= True)77 the_link = []78 # try em all79 for link in team_links:80 html = BeautifulSoup(requests.get(link).text, features = 'lxml')81 yr, name, num_teams = self.sewp_info(html)82 if name == self.team1:83 the_link.append(link)84 break85 hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(the_link[0])86 if self.team2yr == '2022':87 yr_link1 = self.find_link(league_link, self.team1yr)[2]88 team_links = self.find_link(default_url + yr_link1, self.team1, second_check= True)89 the_link = []90 # try em all91 for link in team_links:92 html = BeautifulSoup(requests.get(link).text, features = 'lxml')93 yr, name, num_teams = self.sewp_info(html)94 if name == self.team2:95 the_link.append(link)96 break97 hit2, pit2, hit_cols, pit_cols = self.find_baseball_data(the_link[0])98 if self.team1yr != '2022':99 yr_link1 = default_url + self.find_link(league_link, self.team1yr)[2]100 team1_link = default_url + self.find_link(yr_link1, self.team1)[0]101 hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(team1_link)102 if self.team2yr != '2022':103 yr_link2 = default_url + self.find_link(league_link, self.team2yr)[2]104 team2_link = default_url + self.find_link(yr_link2, self.team2)[0]105 hit2, pit2, hit_cols, pit_cols = self.find_baseball_data(team2_link)106 if self.team1yr != '2022' and self.team2yr != '2022':107 yr_link1 = default_url + self.find_link(league_link, self.team1yr)[2]108 yr_link2 = default_url + self.find_link(league_link, self.team2yr)[2]109 team1_link = default_url + self.find_link(yr_link1, self.team1)[0]110 team2_link = default_url + self.find_link(yr_link2, self.team2)[0]111 hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(team1_link)112 hit2, pit2, hit_cols, pit_cols = self.find_baseball_data(team2_link)113 return hit1, pit1, hit2, pit2, hit_cols, pit_cols114 115 # mlb branch116 elif self.team1yr != '2022' and self.team2yr != '2022':117 # find year links for each team118 yr_link1 = default_url + self.find_link('https://www.baseball-reference.com/leagues/', self.team1yr)[0]119 yr_link2 = default_url + self.find_link('https://www.baseball-reference.com/leagues/', self.team2yr)[0]120 # find team links for each team121 team1_link = default_url + self.find_link(yr_link1, self.team1)[0]122 team2_link = default_url + self.find_link(yr_link2, self.team2)[0]123 # get probability data from each team link124 hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(team1_link)125 hit2, pit2, hit_cols1, pit_cols1 = self.find_baseball_data(team2_link)126 return hit1, pit1, hit2, pit2, hit_cols, pit_cols127 128 # 2022 mlb branch since the current season html is different129 elif self.level.lower() == 'mlb' and self.team1yr == '2022' or self.team2yr == '2022':130 def_url = 'https://www.baseball-reference.com/leagues/majors/2022.shtml'131 # team1132 if self.team1yr == '2022':133 link1 = default_url + self.find_link(def_url, self.team1)[0]134 hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(link1, teamyr = self.team1yr)135 elif self.team1yr != '2022':136 # find year links for each team137 yr_link1 = default_url + self.find_link('https://www.baseball-reference.com/leagues/', self.team1yr)[0]138 # find team links for each team139 team1_link = default_url + self.find_link(yr_link1, self.team1)[0]140 # get probability data from each team link141 hit1, pit1, hit_cols, pit_cols = self.find_baseball_data(team1_link)142 if self.team2yr == '2022':143 link2 = default_url + self.find_link(def_url, self.team2)[0]144 hit2, pit2, hit_cols, pit_cols = self.find_baseball_data(link2, teamyr = self.team2yr)145 elif self.team2yr != '2022':146 # find year links for each team147 yr_link2 = default_url + self.find_link('https://www.baseball-reference.com/leagues/', self.team2yr)[0]148 # find team links for each team149 team2_link = default_url + self.find_link(yr_link2, self.team2)[0]150 # get probability data from each team link151 hit2, pit2, hit_cols1, pit_cols1 = self.find_baseball_data(team2_link)152 ...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!