Best Python code snippet using playwright-python
test_extract.py
Source:test_extract.py
...306 s = Series(["a3", "b3", "c2"], name="series_name", dtype=any_string_dtype)307 result = s.str.extract(r"(?P<letter>[a-z])", expand=True)308 expected = DataFrame({"letter": ["a", "b", "c"]}, dtype=any_string_dtype)309 tm.assert_frame_equal(result, expected)310def test_extractall(any_string_dtype):311 data = [312 "dave@google.com",313 "tdhock5@gmail.com",314 "maudelaperriere@gmail.com",315 "rob@gmail.com some text steve@gmail.com",316 "a@b.com some text c@d.com and e@f.com",317 np.nan,318 "",319 ]320 expected_tuples = [321 ("dave", "google", "com"),322 ("tdhock5", "gmail", "com"),323 ("maudelaperriere", "gmail", "com"),324 ("rob", "gmail", "com"),325 ("steve", "gmail", "com"),326 ("a", "b", "com"),327 ("c", "d", "com"),328 ("e", "f", "com"),329 ]330 pat = r"""331 (?P<user>[a-z0-9]+)332 @333 (?P<domain>[a-z]+)334 \.335 (?P<tld>[a-z]{2,4})336 """337 expected_columns = ["user", "domain", "tld"]338 s = Series(data, dtype=any_string_dtype)339 # extractall should return a DataFrame with one row for each match, indexed by the340 # subject from which the match came.341 expected_index = MultiIndex.from_tuples(342 [(0, 0), (1, 0), (2, 0), (3, 0), (3, 1), (4, 0), (4, 1), (4, 2)],343 names=(None, "match"),344 )345 expected = DataFrame(346 expected_tuples, expected_index, expected_columns, dtype=any_string_dtype347 )348 result = s.str.extractall(pat, flags=re.VERBOSE)349 tm.assert_frame_equal(result, expected)350 # The index of the input Series should be used to construct the index of the output351 # DataFrame:352 mi = MultiIndex.from_tuples(353 [354 ("single", "Dave"),355 ("single", "Toby"),356 ("single", "Maude"),357 ("multiple", "robAndSteve"),358 ("multiple", "abcdef"),359 ("none", "missing"),360 ("none", "empty"),361 ]362 )363 s = Series(data, index=mi, dtype=any_string_dtype)364 expected_index = MultiIndex.from_tuples(365 [366 ("single", "Dave", 0),367 ("single", "Toby", 0),368 ("single", "Maude", 0),369 ("multiple", "robAndSteve", 0),370 ("multiple", "robAndSteve", 1),371 ("multiple", "abcdef", 0),372 ("multiple", "abcdef", 1),373 ("multiple", "abcdef", 2),374 ],375 names=(None, None, "match"),376 )377 expected = DataFrame(378 expected_tuples, expected_index, expected_columns, dtype=any_string_dtype379 )380 result = s.str.extractall(pat, flags=re.VERBOSE)381 tm.assert_frame_equal(result, expected)382 # MultiIndexed subject with names.383 s = Series(data, index=mi, dtype=any_string_dtype)384 s.index.names = ("matches", "description")385 expected_index.names = ("matches", "description", "match")386 expected = DataFrame(387 expected_tuples, expected_index, expected_columns, dtype=any_string_dtype388 )389 result = s.str.extractall(pat, flags=re.VERBOSE)390 tm.assert_frame_equal(result, expected)391@pytest.mark.parametrize(392 "pat,expected_names",393 [394 # optional groups.395 ("(?P<letter>[AB])?(?P<number>[123])", ["letter", "number"]),396 # only one of two groups has a name.397 ("([AB])?(?P<number>[123])", [0, "number"]),398 ],399)400def test_extractall_column_names(pat, expected_names, any_string_dtype):401 s = Series(["", "A1", "32"], dtype=any_string_dtype)402 result = s.str.extractall(pat)403 expected = DataFrame(404 [("A", "1"), (np.nan, "3"), (np.nan, "2")],405 index=MultiIndex.from_tuples([(1, 0), (2, 0), (2, 1)], names=(None, "match")),406 columns=expected_names,407 dtype=any_string_dtype,408 )409 tm.assert_frame_equal(result, expected)410def test_extractall_single_group(any_string_dtype):411 s = Series(["a3", "b3", "d4c2"], name="series_name", dtype=any_string_dtype)412 expected_index = MultiIndex.from_tuples(413 [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match")414 )415 # extractall(one named group) returns DataFrame with one named column.416 result = s.str.extractall(r"(?P<letter>[a-z])")417 expected = DataFrame(418 {"letter": ["a", "b", "d", "c"]}, index=expected_index, dtype=any_string_dtype419 )420 tm.assert_frame_equal(result, expected)421 # extractall(one un-named group) returns DataFrame with one un-named column.422 result = s.str.extractall(r"([a-z])")423 expected = DataFrame(424 ["a", "b", "d", "c"], index=expected_index, dtype=any_string_dtype425 )426 tm.assert_frame_equal(result, expected)427def test_extractall_single_group_with_quantifier(any_string_dtype):428 # GH#13382429 # extractall(one un-named group with quantifier) returns DataFrame with one un-named430 # column.431 s = Series(["ab3", "abc3", "d4cd2"], name="series_name", dtype=any_string_dtype)432 result = s.str.extractall(r"([a-z]+)")433 expected = DataFrame(434 ["ab", "abc", "d", "cd"],435 index=MultiIndex.from_tuples(436 [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match")437 ),438 dtype=any_string_dtype,439 )440 tm.assert_frame_equal(result, expected)441@pytest.mark.parametrize(442 "data, names",443 [444 ([], (None,)),445 ([], ("i1",)),446 ([], (None, "i2")),447 ([], ("i1", "i2")),448 (["a3", "b3", "d4c2"], (None,)),449 (["a3", "b3", "d4c2"], ("i1", "i2")),450 (["a3", "b3", "d4c2"], (None, "i2")),451 (["a3", "b3", "d4c2"], ("i1", "i2")),452 ],453)454def test_extractall_no_matches(data, names, any_string_dtype):455 # GH19075 extractall with no matches should return a valid MultiIndex456 n = len(data)457 if len(names) == 1:458 index = Index(range(n), name=names[0])459 else:460 tuples = (tuple([i] * (n - 1)) for i in range(n))461 index = MultiIndex.from_tuples(tuples, names=names)462 s = Series(data, name="series_name", index=index, dtype=any_string_dtype)463 expected_index = MultiIndex.from_tuples([], names=(names + ("match",)))464 # one un-named group.465 result = s.str.extractall("(z)")466 expected = DataFrame(columns=[0], index=expected_index, dtype=any_string_dtype)467 tm.assert_frame_equal(result, expected)468 # two un-named groups.469 result = s.str.extractall("(z)(z)")470 expected = DataFrame(columns=[0, 1], index=expected_index, dtype=any_string_dtype)471 tm.assert_frame_equal(result, expected)472 # one named group.473 result = s.str.extractall("(?P<first>z)")474 expected = DataFrame(475 columns=["first"], index=expected_index, dtype=any_string_dtype476 )477 tm.assert_frame_equal(result, expected)478 # two named groups.479 result = s.str.extractall("(?P<first>z)(?P<second>z)")480 expected = DataFrame(481 columns=["first", "second"], index=expected_index, dtype=any_string_dtype482 )483 tm.assert_frame_equal(result, expected)484 # one named, one un-named.485 result = s.str.extractall("(z)(?P<second>z)")486 expected = DataFrame(487 columns=[0, "second"], index=expected_index, dtype=any_string_dtype488 )489 tm.assert_frame_equal(result, expected)490def test_extractall_stringindex(any_string_dtype):491 s = Series(["a1a2", "b1", "c1"], name="xxx", dtype=any_string_dtype)492 result = s.str.extractall(r"[ab](?P<digit>\d)")493 expected = DataFrame(494 {"digit": ["1", "2", "1"]},495 index=MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0)], names=[None, "match"]),496 dtype=any_string_dtype,497 )498 tm.assert_frame_equal(result, expected)499 # index should return the same result as the default index without name thus500 # index.name doesn't affect to the result501 if any_string_dtype == "object":502 for idx in [503 Index(["a1a2", "b1", "c1"]),504 Index(["a1a2", "b1", "c1"], name="xxx"),505 ]:506 result = idx.str.extractall(r"[ab](?P<digit>\d)")507 tm.assert_frame_equal(result, expected)508 s = Series(509 ["a1a2", "b1", "c1"],510 name="s_name",511 index=Index(["XX", "yy", "zz"], name="idx_name"),512 dtype=any_string_dtype,513 )514 result = s.str.extractall(r"[ab](?P<digit>\d)")515 expected = DataFrame(516 {"digit": ["1", "2", "1"]},517 index=MultiIndex.from_tuples(518 [("XX", 0), ("XX", 1), ("yy", 0)], names=["idx_name", "match"]519 ),520 dtype=any_string_dtype,521 )522 tm.assert_frame_equal(result, expected)523def test_extractall_no_capture_groups_raises(any_string_dtype):524 # Does not make sense to use extractall with a regex that has no capture groups.525 # (it returns DataFrame with one column for each capture group)526 s = Series(["a3", "b3", "d4c2"], name="series_name", dtype=any_string_dtype)527 with pytest.raises(ValueError, match="no capture groups"):528 s.str.extractall(r"[a-z]")529def test_extract_index_one_two_groups():530 s = Series(["a3", "b3", "d4c2"], index=["A3", "B3", "D4"], name="series_name")531 r = s.index.str.extract(r"([A-Z])", expand=True)532 e = DataFrame(["A", "B", "D"])533 tm.assert_frame_equal(r, e)534 # Prior to v0.18.0, index.str.extract(regex with one group)535 # returned Index. With more than one group, extract raised an536 # error (GH9980). Now extract always returns DataFrame.537 r = s.index.str.extract(r"(?P<letter>[A-Z])(?P<digit>[0-9])", expand=True)538 e_list = [("A", "3"), ("B", "3"), ("D", "4")]539 e = DataFrame(e_list, columns=["letter", "digit"])540 tm.assert_frame_equal(r, e)541def test_extractall_same_as_extract(any_string_dtype):542 s = Series(["a3", "b3", "c2"], name="series_name", dtype=any_string_dtype)543 pattern_two_noname = r"([a-z])([0-9])"544 extract_two_noname = s.str.extract(pattern_two_noname, expand=True)545 has_multi_index = s.str.extractall(pattern_two_noname)546 no_multi_index = has_multi_index.xs(0, level="match")547 tm.assert_frame_equal(extract_two_noname, no_multi_index)548 pattern_two_named = r"(?P<letter>[a-z])(?P<digit>[0-9])"549 extract_two_named = s.str.extract(pattern_two_named, expand=True)550 has_multi_index = s.str.extractall(pattern_two_named)551 no_multi_index = has_multi_index.xs(0, level="match")552 tm.assert_frame_equal(extract_two_named, no_multi_index)553 pattern_one_named = r"(?P<group_name>[a-z])"554 extract_one_named = s.str.extract(pattern_one_named, expand=True)555 has_multi_index = s.str.extractall(pattern_one_named)556 no_multi_index = has_multi_index.xs(0, level="match")557 tm.assert_frame_equal(extract_one_named, no_multi_index)558 pattern_one_noname = r"([a-z])"559 extract_one_noname = s.str.extract(pattern_one_noname, expand=True)560 has_multi_index = s.str.extractall(pattern_one_noname)561 no_multi_index = has_multi_index.xs(0, level="match")562 tm.assert_frame_equal(extract_one_noname, no_multi_index)563def test_extractall_same_as_extract_subject_index(any_string_dtype):564 # same as above tests, but s has an MultiIndex.565 mi = MultiIndex.from_tuples(566 [("A", "first"), ("B", "second"), ("C", "third")],567 names=("capital", "ordinal"),568 )569 s = Series(["a3", "b3", "c2"], index=mi, name="series_name", dtype=any_string_dtype)570 pattern_two_noname = r"([a-z])([0-9])"571 extract_two_noname = s.str.extract(pattern_two_noname, expand=True)572 has_match_index = s.str.extractall(pattern_two_noname)573 no_match_index = has_match_index.xs(0, level="match")574 tm.assert_frame_equal(extract_two_noname, no_match_index)575 pattern_two_named = r"(?P<letter>[a-z])(?P<digit>[0-9])"576 extract_two_named = s.str.extract(pattern_two_named, expand=True)577 has_match_index = s.str.extractall(pattern_two_named)578 no_match_index = has_match_index.xs(0, level="match")579 tm.assert_frame_equal(extract_two_named, no_match_index)580 pattern_one_named = r"(?P<group_name>[a-z])"581 extract_one_named = s.str.extract(pattern_one_named, expand=True)582 has_match_index = s.str.extractall(pattern_one_named)583 no_match_index = has_match_index.xs(0, level="match")584 tm.assert_frame_equal(extract_one_named, no_match_index)585 pattern_one_noname = r"([a-z])"586 extract_one_noname = s.str.extract(pattern_one_noname, expand=True)587 has_match_index = s.str.extractall(pattern_one_noname)588 no_match_index = has_match_index.xs(0, level="match")...
Week1_Assignment Text Analysis.py
Source:Week1_Assignment Text Analysis.py
...11 doc.append(line)12df = pd.Series(doc)13df.head(10)14# In[2]:15re1 = df.str.extractall(r'(?:(?P<Month>\d{1,2})[/-](?P<Day>\d{1,2})[/-](?P<Year>(?:19|20)?\d{2}))')16# In[3]:17re1['Month'] = re1['Month'].apply(lambda x: '0'+x if len(x) < 2 else x)18re1['Day'] = re1['Day'].apply(lambda x: '0'+x if len(x) < 2 else x)19re1['Year'] = re1['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)20re1 = re1[re1.Day.astype(int) < 32]21re1 = re1[re1.Month.astype(int) < 13]22df1 = pd.DataFrame((re1['Month']+'/'+re1['Day']+'/'+re1['Year']).astype('datetime64'), columns = ['Date'])23df1.reset_index(inplace = True)24df1.drop(['match'], axis = 1, inplace = True)25df1 = df1.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})26df1.head()27# In[4]:28re2 = df.str.extractall(r'(?:(?P<Day>\d{2} )(?P<Month>(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z.,/ ]*) (?P<Year>(?:19|20)?\d{2}))') #set29# In[5]:30months = ({'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05', 'June': '06', 31 'July': '07', 'August': '08', 'September': '09', 'October': '10', 'November': '11', 'December': '12', 32 'Decemeber': '12', 'Janaury': '01',33 'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06', 34 'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'})35re2['Month'] = re2['Month'].map(months)36re2['Day'] = re2['Day'].apply(lambda x: '0'+x if len(x) < 2 else x)37re2['Year'] = re2['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)38re2 = re2[re2.Day.astype(int) < 32]39re2 = re2[re2.Month.astype(int) < 13]40df2 = pd.DataFrame((re2['Month']+'/'+re2['Day']+'/'+re2['Year']).astype('datetime64'), columns = ['Date'])41df2.reset_index(inplace = True)42df2.drop(['match'], axis = 1, inplace = True)43df2 = df2.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})44df2.head()45# In[6]:46re3 = df.str.extractall(r'(?:(?P<Month>(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z. ]*)(?P<Day>\d{2}[/., ] )(?P<Year>(?:19|20)?\d{2}))') #set47# In[7]:48re3['Month'] = re3['Month'].str.replace(".", "").str.strip()49re3['Day'] = re3['Day'].str.replace(",", "")50re3['Month'] = re3['Month'].map(months)51re3['Day'] = re3['Day'].apply(lambda x: '0'+x if len(x) < 2 else x)52re3['Year'] = re3['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)53re3 = re3[re3.Day.astype(int) < 32]54re3 = re3[re3.Month.astype(int) < 13]55df3 = pd.DataFrame((re3['Month']+'/'+re3['Day']+'/'+re3['Year']).astype('datetime64'), columns = ['Date'])56df3.reset_index(inplace = True)57df3.drop(['match'], axis = 1, inplace = True)58df3 = df3.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})59df3.head()60# In[8]:61re4 = df.str.extractall(r'(?:(?P<Month>(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z. ]*)(?P<Day>\d{2} )(?P<Year>(?:19|20)?\d{2}))') #set62# In[9]:63re4['Month'] = re4['Month'].str.replace(".", "").str.strip()64re4['Day'] = re4['Day'].str.replace(",", "")65re4['Month'] = re4['Month'].map(months)66re4['Day'] = re4['Day'].apply(lambda x: '0'+x if len(x) < 2 else x)67re4['Year'] = re4['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)68re4 = re4[re4.Day.astype(int) < 32]69re4 = re4[re4.Month.astype(int) < 13]70df4 = pd.DataFrame((re4['Month']+'/'+re4['Day']+'/'+re4['Year']).astype('datetime64'), columns = ['Date'])71df4.reset_index(inplace = True)72df4.drop(['match'], axis = 1, inplace = True)73df4 = df4.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})74df4.head()75# In[10]:76re5 = df.str.extractall(r'(?:(?P<Month>(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z./, ]*)(?P<Day>)(?P<Year>(?:19|20)\d{2}))') #set77# In[11]:78re5['Month'] = re5['Month'].str.replace(".", "").str.strip()79re5['Month'] = re5['Month'].str.replace(",", "").str.strip()80re5['Month'] = re5['Month'].map(months)81re5['Day'] = re5['Day'].replace(np.nan, '01', regex = True)82re5['Year'] = re5['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)83re5 = re5[re5.Day.astype(int) < 32]84re5 = re5[re5.Month.astype(int) < 13]85df5 = pd.DataFrame((re5['Month']+'/'+re5['Day']+'/'+re5['Year']).astype('datetime64'), columns = ['Date'])86df5.reset_index(inplace = True)87df5.drop(['match'], axis = 1, inplace = True)88df5 = df5.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})89df5.head()90# In[12]:91re6 = df.str.extractall(r'(?:(?P<Month>\d{1,2})[/](?P<Day>)(?P<Year>(?:19|20)?\d{4}))')92# In[13]:93re6['Month'] = re6['Month'].str.replace(".", "").str.strip()94re6['Month'] = re6['Month'].str.replace(",", "").str.strip()95re6['Month'] = re6['Month'].apply(lambda x: '0'+x if len(x) < 2 else x)96re6['Day'] = re6['Day'].replace(np.nan, '01', regex = True)97re6['Year'] = re6['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)98re6 = re6[re6.Day.astype(int) < 32]99re6 = re6[re6.Month.astype(int) < 13]100df6 = pd.DataFrame((re6['Month']+'/'+re6['Day']+'/'+re6['Year']).astype('datetime64'), columns = ['Date'])101df6.reset_index(inplace = True)102df6.drop(['match'], axis = 1, inplace = True)103df6 = df6.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})104df6.head()105# In[14]:106re7 = df.str.extractall(r'(?:(?P<Month>)(?P<Day>)(?P<Year>(?:19|20)\d{2}))')107# In[15]:108re7['Month'] = re7['Month'].replace(np.nan, '01', regex = True)109re7['Day'] = re7['Day'].replace(np.nan, '01', regex = True)110re7['Year'] = re7['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)111df7 = pd.DataFrame((re7['Month']+'/'+re7['Day']+'/'+re7['Year']).astype('datetime64'), columns = ['Date'])112df7.reset_index(inplace = True)113df7.drop(['match'], axis = 1, inplace = True)114df7 = df7.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})115df7.head()116# # Merge DataFrames117# In[16]:118final_df = pd.concat([df1, df2, df3, df4])119final_df = final_df.sort_values('Old_Index')120final_df.reset_index(drop = True, inplace = True)121final_df.head()122# In[17]:123merge_df = pd.merge(final_df, df5, on = 'Old_Index', how = 'outer', validate = 'one_to_one')124merge_df.Date_x.fillna(merge_df.Date_y, inplace = True)125merge_df.drop(['Date_y'], axis = 1, inplace = True)126merge_df.rename(columns = {'Date_x': 'Date'}, inplace = True)127final_df = merge_df128final_df.head()129# In[18]:130merge_df = pd.merge(final_df, df6, on = 'Old_Index', how = 'outer', validate = 'one_to_one')131merge_df.Date_x.fillna(merge_df.Date_y, inplace = True)132merge_df.drop(['Date_y'], axis = 1, inplace = True)133merge_df.rename(columns = {'Date_x': 'Date'}, inplace = True)134final_df = merge_df135final_df.head()136# In[19]:137merge_df = pd.merge(final_df, df7, on = 'Old_Index', how = 'outer', validate = 'one_to_one')138merge_df.Date_x.fillna(merge_df.Date_y, inplace = True)139merge_df.drop(['Date_y'], axis = 1, inplace = True)140merge_df.rename(columns = {'Date_x': 'Date'}, inplace = True)141final_df = merge_df142final_df.head()143# In[20]:144final_df = final_df.sort_values('Date')145# In[21]:146S1 = pd.Series(list(final_df['Old_Index']))147# In[22]:148S1149# In[23]:150# flat_list = [item for sublist in re1 for item in sublist]151# df1 = pd.DataFrame(flat_list, columns = ['Dates'])152# merge_df.query('Date_x == "NaT"')153# In[24]:154# final_df['Date'] = pd.to_datetime(final_df['Date'])155final_df.sort_values('Date')156# final_df157# In[25]:158re4159# In[26]:160# Your code here161# Full date162global df163dates_extracted = df.str.extractall(r'(?P<origin>(?P<month>\d?\d)[/|-](?P<day>\d?\d)[/|-](?P<year>\d{4}))')164index_left = ~df.index.isin([x[0] for x in dates_extracted.index])165dates_extracted = dates_extracted.append(df[index_left].str.extractall(r'(?P<origin>(?P<month>\d?\d)[/|-](?P<day>([0-2]?[0-9])|([3][01]))[/|-](?P<year>\d{2}))'))166index_left = ~df.index.isin([x[0] for x in dates_extracted.index])167del dates_extracted[3]168del dates_extracted[4]169dates_extracted = dates_extracted.append(df[index_left].str.extractall(r'(?P<origin>(?P<day>\d?\d) ?(?P<month>[a-zA-Z]{3,})\.?,? (?P<year>\d{4}))'))170index_left = ~df.index.isin([x[0] for x in dates_extracted.index])171dates_extracted = dates_extracted.append(df[index_left].str.extractall(r'(?P<origin>(?P<month>[a-zA-Z]{3,})\.?-? ?(?P<day>\d\d?)(th|nd|st)?,?-? ?(?P<year>\d{4}))'))172del dates_extracted[3]173index_left = ~df.index.isin([x[0] for x in dates_extracted.index])174# Without day175dates_without_day = df[index_left].str.extractall('(?P<origin>(?P<month>[A-Z][a-z]{2,}),?\.? (?P<year>\d{4}))')176dates_without_day = dates_without_day.append(df[index_left].str.extractall(r'(?P<origin>(?P<month>\d\d?)/(?P<year>\d{4}))'))177dates_without_day['day'] = 1178dates_extracted = dates_extracted.append(dates_without_day)179index_left = ~df.index.isin([x[0] for x in dates_extracted.index])180# Only year181dates_only_year = df[index_left].str.extractall(r'(?P<origin>(?P<year>\d{4}))')182dates_only_year['day'] = 1183dates_only_year['month'] = 1184dates_extracted = dates_extracted.append(dates_only_year)185index_left = ~df.index.isin([x[0] for x in dates_extracted.index])186# Year187dates_extracted['year'] = dates_extracted['year'].apply(lambda x: '19' + x if len(x) == 2 else x)188dates_extracted['year'] = dates_extracted['year'].apply(lambda x: str(x))189# Month190dates_extracted['month'] = dates_extracted['month'].apply(lambda x: x[1:] if type(x) is str and x.startswith('0') else x)191month_dict = dict({'September': 9, 'Mar': 3, 'November': 11, 'Jul': 7, 'January': 1, 'December': 12,192 'Feb': 2, 'May': 5, 'Aug': 8, 'Jun': 6, 'Sep': 9, 'Oct': 10, 'June': 6, 'March': 3,193 'February': 2, 'Dec': 12, 'Apr': 4, 'Jan': 1, 'Janaury': 1,'August': 8, 'October': 10,194 'July': 7, 'Since': 1, 'Nov': 11, 'April': 4, 'Decemeber': 12, 'Age': 8})195dates_extracted.replace({"month": month_dict}, inplace=True)...
test_all.py
Source:test_all.py
...19 d = tempfile.mkdtemp(prefix="pyunpack_test_")20 return d21def test():22 with pytest.raises(ValueError):23 Archive("blabla").extractall(tempfile.gettempdir())24 with pytest.raises(PatoolError):25 Archive(__file__).extractall(tempfile.gettempdir())26def create_arc(format):27 d = tmpdir()28 x_txt = join(d, "x.txt")29 open(x_txt, "w").write("123")30 # x_zip = d / "x.zip"31 os.chdir(d)32 x_zip = make_archive(33 "x",34 format, # the archive format - or tar, bztar, gztar35 root_dir=None, # root for archive - current working dir if None36 base_dir=None,37 ) # start archiving from here - cwd if None too38 # EasyProcess(["zip", "--no-dir-entries", x_zip, "x.txt"], cwd=d).call()39 return x_zip40def test2():41 for f in formats:42 print(f)43 x_zip = create_arc(f)44 with pytest.raises(ValueError):45 Archive(x_zip).extractall("blabla")46 d = tmpdir()47 Archive(x_zip, backend="patool").extractall(d)48 ok_file(d, "x.txt")49 d = tmpdir()50 Archive(x_zip).extractall(d)51 ok_file(d, "x.txt")52 d = tmpdir()53 Archive(x_zip, backend="auto").extractall(d)54 ok_file(d, "x.txt")55 if f == "zip":56 d = tmpdir()57 Archive(x_zip, backend="zipfile").extractall(d)58 ok_file(d, "x.txt")59 d = tmpdir()60 cli.extractall(x_zip, d)61 ok_file(d, "x.txt")62def test_subdir():63 for f in formats:64 x_zip = create_arc(f)65 d = join(tmpdir(), "subdir")66 with pytest.raises(ValueError):67 Archive(x_zip).extractall(d, auto_create_dir=False)68 d = join(tmpdir(), "subdir")69 Archive(x_zip, backend="auto").extractall(d, auto_create_dir=True)...
How to select an input according to a parent sibling label
Headless doesn't work using Playwright and BeautifulSoup 4
Refreshing cookie using Playwright for Python
Is there any way to close popup while running headless chromium?
How to run Playwright test from Pycharm in headed mode
What's the best way to setup playwright in Apache Airflow in Docker?
Finding element with slash in the ID using Playwright
playwright headless chromium can't find selector, but finds it in UI mode
Running Playwright on Google colab gives error : asyncio.run() cannot be called from a running event loop
How to get a list of all links from a dynamic web page?
If you try to perform an action on a label which has associated input element Playwright will automatically retarget the action to the input.
If the DOM structure is always the same you can try something like this
page.locator('div:has( > :text-matches("Project")) + div input')
Otherwise layout selector is your best bet.
Check out the latest blogs from LambdaTest on this topic:
Ruby is a programming language which is well suitable for web automation. Ruby makes an excellent choice because of its clean syntax, focus on built-in library integrations, and an active community. Another benefit of Ruby is that it also allows other programming languages like Java, Python, etc. to be used in order to automate applications written in any other frameworks. Therefore you can use Selenium Ruby to automate any sort of application in your system and test the results in any type of testing environment
Websites and web apps are growing in number day by day, and so are the expectations of people for a pleasant web experience. Even though the World Wide Web (WWW) was invented only in 1989 (32 years back), this technology has revolutionized the world we know back then. The best part is that it has made life easier for us. You no longer have to stand in long queues to pay your bills. You can get that done within a few minutes by visiting their website, web app, or mobile app.
To decide what automation technology to use, we brought together Joe Colantonio, Founder of TestGuild, Sneha. V, Director of Quality Engineering, Everfi, and Carlos Kidman, Director of Engineering, Stealth Startup. The panel discussion was hosted by Mudit Singh, Marketing head at LambdaTest. Mudit decided to take a step backwards and let the panel discussion happen.
In today’s data-driven world, the ability to access and analyze large amounts of data can give researchers, businesses & organizations a competitive edge. One of the most important & free sources of this data is the Internet, which can be accessed and mined through web scraping.
LambdaTest’s Playwright tutorial will give you a broader idea about the Playwright automation framework, its unique features, and use cases with examples to exceed your understanding of Playwright testing. This tutorial will give A to Z guidance, from installing the Playwright framework to some best practices and advanced concepts.
Get 100 minutes of automation test minutes FREE!!