Best Python code snippet using playwright-python
test_extract.py
Source:test_extract.py
...306 s = Series(["a3", "b3", "c2"], name="series_name", dtype=any_string_dtype)307 result = s.str.extract(r"(?P<letter>[a-z])", expand=True)308 expected = DataFrame({"letter": ["a", "b", "c"]}, dtype=any_string_dtype)309 tm.assert_frame_equal(result, expected)310def test_extractall(any_string_dtype):311 data = [312 "dave@google.com",313 "tdhock5@gmail.com",314 "maudelaperriere@gmail.com",315 "rob@gmail.com some text steve@gmail.com",316 "a@b.com some text c@d.com and e@f.com",317 np.nan,318 "",319 ]320 expected_tuples = [321 ("dave", "google", "com"),322 ("tdhock5", "gmail", "com"),323 ("maudelaperriere", "gmail", "com"),324 ("rob", "gmail", "com"),325 ("steve", "gmail", "com"),326 ("a", "b", "com"),327 ("c", "d", "com"),328 ("e", "f", "com"),329 ]330 pat = r"""331 (?P<user>[a-z0-9]+)332 @333 (?P<domain>[a-z]+)334 \.335 (?P<tld>[a-z]{2,4})336 """337 expected_columns = ["user", "domain", "tld"]338 s = Series(data, dtype=any_string_dtype)339 # extractall should return a DataFrame with one row for each match, indexed by the340 # subject from which the match came.341 expected_index = MultiIndex.from_tuples(342 [(0, 0), (1, 0), (2, 0), (3, 0), (3, 1), (4, 0), (4, 1), (4, 2)],343 names=(None, "match"),344 )345 expected = DataFrame(346 expected_tuples, expected_index, expected_columns, dtype=any_string_dtype347 )348 result = s.str.extractall(pat, flags=re.VERBOSE)349 tm.assert_frame_equal(result, expected)350 # The index of the input Series should be used to construct the index of the output351 # DataFrame:352 mi = MultiIndex.from_tuples(353 [354 ("single", "Dave"),355 ("single", "Toby"),356 ("single", "Maude"),357 ("multiple", "robAndSteve"),358 ("multiple", "abcdef"),359 ("none", "missing"),360 ("none", "empty"),361 ]362 )363 s = Series(data, index=mi, dtype=any_string_dtype)364 expected_index = MultiIndex.from_tuples(365 [366 ("single", "Dave", 0),367 ("single", "Toby", 0),368 ("single", "Maude", 0),369 ("multiple", "robAndSteve", 0),370 ("multiple", "robAndSteve", 1),371 ("multiple", "abcdef", 0),372 ("multiple", "abcdef", 1),373 ("multiple", "abcdef", 2),374 ],375 names=(None, None, "match"),376 )377 expected = DataFrame(378 expected_tuples, expected_index, expected_columns, dtype=any_string_dtype379 )380 result = s.str.extractall(pat, flags=re.VERBOSE)381 tm.assert_frame_equal(result, expected)382 # MultiIndexed subject with names.383 s = Series(data, index=mi, dtype=any_string_dtype)384 s.index.names = ("matches", "description")385 expected_index.names = ("matches", "description", "match")386 expected = DataFrame(387 expected_tuples, expected_index, expected_columns, dtype=any_string_dtype388 )389 result = s.str.extractall(pat, flags=re.VERBOSE)390 tm.assert_frame_equal(result, expected)391@pytest.mark.parametrize(392 "pat,expected_names",393 [394 # optional groups.395 ("(?P<letter>[AB])?(?P<number>[123])", ["letter", "number"]),396 # only one of two groups has a name.397 ("([AB])?(?P<number>[123])", [0, "number"]),398 ],399)400def test_extractall_column_names(pat, expected_names, any_string_dtype):401 s = Series(["", "A1", "32"], dtype=any_string_dtype)402 result = s.str.extractall(pat)403 expected = DataFrame(404 [("A", "1"), (np.nan, "3"), (np.nan, "2")],405 index=MultiIndex.from_tuples([(1, 0), (2, 0), (2, 1)], names=(None, "match")),406 columns=expected_names,407 dtype=any_string_dtype,408 )409 tm.assert_frame_equal(result, expected)410def test_extractall_single_group(any_string_dtype):411 s = Series(["a3", "b3", "d4c2"], name="series_name", dtype=any_string_dtype)412 expected_index = MultiIndex.from_tuples(413 [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match")414 )415 # extractall(one named group) returns DataFrame with one named column.416 result = s.str.extractall(r"(?P<letter>[a-z])")417 expected = DataFrame(418 {"letter": ["a", "b", "d", "c"]}, index=expected_index, dtype=any_string_dtype419 )420 tm.assert_frame_equal(result, expected)421 # extractall(one un-named group) returns DataFrame with one un-named column.422 result = s.str.extractall(r"([a-z])")423 expected = DataFrame(424 ["a", "b", "d", "c"], index=expected_index, dtype=any_string_dtype425 )426 tm.assert_frame_equal(result, expected)427def test_extractall_single_group_with_quantifier(any_string_dtype):428 # GH#13382429 # extractall(one un-named group with quantifier) returns DataFrame with one un-named430 # column.431 s = Series(["ab3", "abc3", "d4cd2"], name="series_name", dtype=any_string_dtype)432 result = s.str.extractall(r"([a-z]+)")433 expected = DataFrame(434 ["ab", "abc", "d", "cd"],435 index=MultiIndex.from_tuples(436 [(0, 0), (1, 0), (2, 0), (2, 1)], names=(None, "match")437 ),438 dtype=any_string_dtype,439 )440 tm.assert_frame_equal(result, expected)441@pytest.mark.parametrize(442 "data, names",443 [444 ([], (None,)),445 ([], ("i1",)),446 ([], (None, "i2")),447 ([], ("i1", "i2")),448 (["a3", "b3", "d4c2"], (None,)),449 (["a3", "b3", "d4c2"], ("i1", "i2")),450 (["a3", "b3", "d4c2"], (None, "i2")),451 (["a3", "b3", "d4c2"], ("i1", "i2")),452 ],453)454def test_extractall_no_matches(data, names, any_string_dtype):455 # GH19075 extractall with no matches should return a valid MultiIndex456 n = len(data)457 if len(names) == 1:458 index = Index(range(n), name=names[0])459 else:460 tuples = (tuple([i] * (n - 1)) for i in range(n))461 index = MultiIndex.from_tuples(tuples, names=names)462 s = Series(data, name="series_name", index=index, dtype=any_string_dtype)463 expected_index = MultiIndex.from_tuples([], names=(names + ("match",)))464 # one un-named group.465 result = s.str.extractall("(z)")466 expected = DataFrame(columns=[0], index=expected_index, dtype=any_string_dtype)467 tm.assert_frame_equal(result, expected)468 # two un-named groups.469 result = s.str.extractall("(z)(z)")470 expected = DataFrame(columns=[0, 1], index=expected_index, dtype=any_string_dtype)471 tm.assert_frame_equal(result, expected)472 # one named group.473 result = s.str.extractall("(?P<first>z)")474 expected = DataFrame(475 columns=["first"], index=expected_index, dtype=any_string_dtype476 )477 tm.assert_frame_equal(result, expected)478 # two named groups.479 result = s.str.extractall("(?P<first>z)(?P<second>z)")480 expected = DataFrame(481 columns=["first", "second"], index=expected_index, dtype=any_string_dtype482 )483 tm.assert_frame_equal(result, expected)484 # one named, one un-named.485 result = s.str.extractall("(z)(?P<second>z)")486 expected = DataFrame(487 columns=[0, "second"], index=expected_index, dtype=any_string_dtype488 )489 tm.assert_frame_equal(result, expected)490def test_extractall_stringindex(any_string_dtype):491 s = Series(["a1a2", "b1", "c1"], name="xxx", dtype=any_string_dtype)492 result = s.str.extractall(r"[ab](?P<digit>\d)")493 expected = DataFrame(494 {"digit": ["1", "2", "1"]},495 index=MultiIndex.from_tuples([(0, 0), (0, 1), (1, 0)], names=[None, "match"]),496 dtype=any_string_dtype,497 )498 tm.assert_frame_equal(result, expected)499 # index should return the same result as the default index without name thus500 # index.name doesn't affect to the result501 if any_string_dtype == "object":502 for idx in [503 Index(["a1a2", "b1", "c1"]),504 Index(["a1a2", "b1", "c1"], name="xxx"),505 ]:506 result = idx.str.extractall(r"[ab](?P<digit>\d)")507 tm.assert_frame_equal(result, expected)508 s = Series(509 ["a1a2", "b1", "c1"],510 name="s_name",511 index=Index(["XX", "yy", "zz"], name="idx_name"),512 dtype=any_string_dtype,513 )514 result = s.str.extractall(r"[ab](?P<digit>\d)")515 expected = DataFrame(516 {"digit": ["1", "2", "1"]},517 index=MultiIndex.from_tuples(518 [("XX", 0), ("XX", 1), ("yy", 0)], names=["idx_name", "match"]519 ),520 dtype=any_string_dtype,521 )522 tm.assert_frame_equal(result, expected)523def test_extractall_no_capture_groups_raises(any_string_dtype):524 # Does not make sense to use extractall with a regex that has no capture groups.525 # (it returns DataFrame with one column for each capture group)526 s = Series(["a3", "b3", "d4c2"], name="series_name", dtype=any_string_dtype)527 with pytest.raises(ValueError, match="no capture groups"):528 s.str.extractall(r"[a-z]")529def test_extract_index_one_two_groups():530 s = Series(["a3", "b3", "d4c2"], index=["A3", "B3", "D4"], name="series_name")531 r = s.index.str.extract(r"([A-Z])", expand=True)532 e = DataFrame(["A", "B", "D"])533 tm.assert_frame_equal(r, e)534 # Prior to v0.18.0, index.str.extract(regex with one group)535 # returned Index. With more than one group, extract raised an536 # error (GH9980). Now extract always returns DataFrame.537 r = s.index.str.extract(r"(?P<letter>[A-Z])(?P<digit>[0-9])", expand=True)538 e_list = [("A", "3"), ("B", "3"), ("D", "4")]539 e = DataFrame(e_list, columns=["letter", "digit"])540 tm.assert_frame_equal(r, e)541def test_extractall_same_as_extract(any_string_dtype):542 s = Series(["a3", "b3", "c2"], name="series_name", dtype=any_string_dtype)543 pattern_two_noname = r"([a-z])([0-9])"544 extract_two_noname = s.str.extract(pattern_two_noname, expand=True)545 has_multi_index = s.str.extractall(pattern_two_noname)546 no_multi_index = has_multi_index.xs(0, level="match")547 tm.assert_frame_equal(extract_two_noname, no_multi_index)548 pattern_two_named = r"(?P<letter>[a-z])(?P<digit>[0-9])"549 extract_two_named = s.str.extract(pattern_two_named, expand=True)550 has_multi_index = s.str.extractall(pattern_two_named)551 no_multi_index = has_multi_index.xs(0, level="match")552 tm.assert_frame_equal(extract_two_named, no_multi_index)553 pattern_one_named = r"(?P<group_name>[a-z])"554 extract_one_named = s.str.extract(pattern_one_named, expand=True)555 has_multi_index = s.str.extractall(pattern_one_named)556 no_multi_index = has_multi_index.xs(0, level="match")557 tm.assert_frame_equal(extract_one_named, no_multi_index)558 pattern_one_noname = r"([a-z])"559 extract_one_noname = s.str.extract(pattern_one_noname, expand=True)560 has_multi_index = s.str.extractall(pattern_one_noname)561 no_multi_index = has_multi_index.xs(0, level="match")562 tm.assert_frame_equal(extract_one_noname, no_multi_index)563def test_extractall_same_as_extract_subject_index(any_string_dtype):564 # same as above tests, but s has an MultiIndex.565 mi = MultiIndex.from_tuples(566 [("A", "first"), ("B", "second"), ("C", "third")],567 names=("capital", "ordinal"),568 )569 s = Series(["a3", "b3", "c2"], index=mi, name="series_name", dtype=any_string_dtype)570 pattern_two_noname = r"([a-z])([0-9])"571 extract_two_noname = s.str.extract(pattern_two_noname, expand=True)572 has_match_index = s.str.extractall(pattern_two_noname)573 no_match_index = has_match_index.xs(0, level="match")574 tm.assert_frame_equal(extract_two_noname, no_match_index)575 pattern_two_named = r"(?P<letter>[a-z])(?P<digit>[0-9])"576 extract_two_named = s.str.extract(pattern_two_named, expand=True)577 has_match_index = s.str.extractall(pattern_two_named)578 no_match_index = has_match_index.xs(0, level="match")579 tm.assert_frame_equal(extract_two_named, no_match_index)580 pattern_one_named = r"(?P<group_name>[a-z])"581 extract_one_named = s.str.extract(pattern_one_named, expand=True)582 has_match_index = s.str.extractall(pattern_one_named)583 no_match_index = has_match_index.xs(0, level="match")584 tm.assert_frame_equal(extract_one_named, no_match_index)585 pattern_one_noname = r"([a-z])"586 extract_one_noname = s.str.extract(pattern_one_noname, expand=True)587 has_match_index = s.str.extractall(pattern_one_noname)588 no_match_index = has_match_index.xs(0, level="match")...
Week1_Assignment Text Analysis.py
Source:Week1_Assignment Text Analysis.py
...11 doc.append(line)12df = pd.Series(doc)13df.head(10)14# In[2]:15re1 = df.str.extractall(r'(?:(?P<Month>\d{1,2})[/-](?P<Day>\d{1,2})[/-](?P<Year>(?:19|20)?\d{2}))')16# In[3]:17re1['Month'] = re1['Month'].apply(lambda x: '0'+x if len(x) < 2 else x)18re1['Day'] = re1['Day'].apply(lambda x: '0'+x if len(x) < 2 else x)19re1['Year'] = re1['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)20re1 = re1[re1.Day.astype(int) < 32]21re1 = re1[re1.Month.astype(int) < 13]22df1 = pd.DataFrame((re1['Month']+'/'+re1['Day']+'/'+re1['Year']).astype('datetime64'), columns = ['Date'])23df1.reset_index(inplace = True)24df1.drop(['match'], axis = 1, inplace = True)25df1 = df1.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})26df1.head()27# In[4]:28re2 = df.str.extractall(r'(?:(?P<Day>\d{2} )(?P<Month>(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z.,/ ]*) (?P<Year>(?:19|20)?\d{2}))') #set29# In[5]:30months = ({'January': '01', 'February': '02', 'March': '03', 'April': '04', 'May': '05', 'June': '06', 31 'July': '07', 'August': '08', 'September': '09', 'October': '10', 'November': '11', 'December': '12', 32 'Decemeber': '12', 'Janaury': '01',33 'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06', 34 'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'})35re2['Month'] = re2['Month'].map(months)36re2['Day'] = re2['Day'].apply(lambda x: '0'+x if len(x) < 2 else x)37re2['Year'] = re2['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)38re2 = re2[re2.Day.astype(int) < 32]39re2 = re2[re2.Month.astype(int) < 13]40df2 = pd.DataFrame((re2['Month']+'/'+re2['Day']+'/'+re2['Year']).astype('datetime64'), columns = ['Date'])41df2.reset_index(inplace = True)42df2.drop(['match'], axis = 1, inplace = True)43df2 = df2.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})44df2.head()45# In[6]:46re3 = df.str.extractall(r'(?:(?P<Month>(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z. ]*)(?P<Day>\d{2}[/., ] )(?P<Year>(?:19|20)?\d{2}))') #set47# In[7]:48re3['Month'] = re3['Month'].str.replace(".", "").str.strip()49re3['Day'] = re3['Day'].str.replace(",", "")50re3['Month'] = re3['Month'].map(months)51re3['Day'] = re3['Day'].apply(lambda x: '0'+x if len(x) < 2 else x)52re3['Year'] = re3['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)53re3 = re3[re3.Day.astype(int) < 32]54re3 = re3[re3.Month.astype(int) < 13]55df3 = pd.DataFrame((re3['Month']+'/'+re3['Day']+'/'+re3['Year']).astype('datetime64'), columns = ['Date'])56df3.reset_index(inplace = True)57df3.drop(['match'], axis = 1, inplace = True)58df3 = df3.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})59df3.head()60# In[8]:61re4 = df.str.extractall(r'(?:(?P<Month>(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z. ]*)(?P<Day>\d{2} )(?P<Year>(?:19|20)?\d{2}))') #set62# In[9]:63re4['Month'] = re4['Month'].str.replace(".", "").str.strip()64re4['Day'] = re4['Day'].str.replace(",", "")65re4['Month'] = re4['Month'].map(months)66re4['Day'] = re4['Day'].apply(lambda x: '0'+x if len(x) < 2 else x)67re4['Year'] = re4['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)68re4 = re4[re4.Day.astype(int) < 32]69re4 = re4[re4.Month.astype(int) < 13]70df4 = pd.DataFrame((re4['Month']+'/'+re4['Day']+'/'+re4['Year']).astype('datetime64'), columns = ['Date'])71df4.reset_index(inplace = True)72df4.drop(['match'], axis = 1, inplace = True)73df4 = df4.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})74df4.head()75# In[10]:76re5 = df.str.extractall(r'(?:(?P<Month>(?:Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)[a-z./, ]*)(?P<Day>)(?P<Year>(?:19|20)\d{2}))') #set77# In[11]:78re5['Month'] = re5['Month'].str.replace(".", "").str.strip()79re5['Month'] = re5['Month'].str.replace(",", "").str.strip()80re5['Month'] = re5['Month'].map(months)81re5['Day'] = re5['Day'].replace(np.nan, '01', regex = True)82re5['Year'] = re5['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)83re5 = re5[re5.Day.astype(int) < 32]84re5 = re5[re5.Month.astype(int) < 13]85df5 = pd.DataFrame((re5['Month']+'/'+re5['Day']+'/'+re5['Year']).astype('datetime64'), columns = ['Date'])86df5.reset_index(inplace = True)87df5.drop(['match'], axis = 1, inplace = True)88df5 = df5.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})89df5.head()90# In[12]:91re6 = df.str.extractall(r'(?:(?P<Month>\d{1,2})[/](?P<Day>)(?P<Year>(?:19|20)?\d{4}))')92# In[13]:93re6['Month'] = re6['Month'].str.replace(".", "").str.strip()94re6['Month'] = re6['Month'].str.replace(",", "").str.strip()95re6['Month'] = re6['Month'].apply(lambda x: '0'+x if len(x) < 2 else x)96re6['Day'] = re6['Day'].replace(np.nan, '01', regex = True)97re6['Year'] = re6['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)98re6 = re6[re6.Day.astype(int) < 32]99re6 = re6[re6.Month.astype(int) < 13]100df6 = pd.DataFrame((re6['Month']+'/'+re6['Day']+'/'+re6['Year']).astype('datetime64'), columns = ['Date'])101df6.reset_index(inplace = True)102df6.drop(['match'], axis = 1, inplace = True)103df6 = df6.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})104df6.head()105# In[14]:106re7 = df.str.extractall(r'(?:(?P<Month>)(?P<Day>)(?P<Year>(?:19|20)\d{2}))')107# In[15]:108re7['Month'] = re7['Month'].replace(np.nan, '01', regex = True)109re7['Day'] = re7['Day'].replace(np.nan, '01', regex = True)110re7['Year'] = re7['Year'].apply(lambda x: '19'+x if len(x) < 4 else x)111df7 = pd.DataFrame((re7['Month']+'/'+re7['Day']+'/'+re7['Year']).astype('datetime64'), columns = ['Date'])112df7.reset_index(inplace = True)113df7.drop(['match'], axis = 1, inplace = True)114df7 = df7.rename(columns={'level_0': 'Old_Index', 'Date': 'Date'})115df7.head()116# # Merge DataFrames117# In[16]:118final_df = pd.concat([df1, df2, df3, df4])119final_df = final_df.sort_values('Old_Index')120final_df.reset_index(drop = True, inplace = True)121final_df.head()122# In[17]:123merge_df = pd.merge(final_df, df5, on = 'Old_Index', how = 'outer', validate = 'one_to_one')124merge_df.Date_x.fillna(merge_df.Date_y, inplace = True)125merge_df.drop(['Date_y'], axis = 1, inplace = True)126merge_df.rename(columns = {'Date_x': 'Date'}, inplace = True)127final_df = merge_df128final_df.head()129# In[18]:130merge_df = pd.merge(final_df, df6, on = 'Old_Index', how = 'outer', validate = 'one_to_one')131merge_df.Date_x.fillna(merge_df.Date_y, inplace = True)132merge_df.drop(['Date_y'], axis = 1, inplace = True)133merge_df.rename(columns = {'Date_x': 'Date'}, inplace = True)134final_df = merge_df135final_df.head()136# In[19]:137merge_df = pd.merge(final_df, df7, on = 'Old_Index', how = 'outer', validate = 'one_to_one')138merge_df.Date_x.fillna(merge_df.Date_y, inplace = True)139merge_df.drop(['Date_y'], axis = 1, inplace = True)140merge_df.rename(columns = {'Date_x': 'Date'}, inplace = True)141final_df = merge_df142final_df.head()143# In[20]:144final_df = final_df.sort_values('Date')145# In[21]:146S1 = pd.Series(list(final_df['Old_Index']))147# In[22]:148S1149# In[23]:150# flat_list = [item for sublist in re1 for item in sublist]151# df1 = pd.DataFrame(flat_list, columns = ['Dates'])152# merge_df.query('Date_x == "NaT"')153# In[24]:154# final_df['Date'] = pd.to_datetime(final_df['Date'])155final_df.sort_values('Date')156# final_df157# In[25]:158re4159# In[26]:160# Your code here161# Full date162global df163dates_extracted = df.str.extractall(r'(?P<origin>(?P<month>\d?\d)[/|-](?P<day>\d?\d)[/|-](?P<year>\d{4}))')164index_left = ~df.index.isin([x[0] for x in dates_extracted.index])165dates_extracted = dates_extracted.append(df[index_left].str.extractall(r'(?P<origin>(?P<month>\d?\d)[/|-](?P<day>([0-2]?[0-9])|([3][01]))[/|-](?P<year>\d{2}))'))166index_left = ~df.index.isin([x[0] for x in dates_extracted.index])167del dates_extracted[3]168del dates_extracted[4]169dates_extracted = dates_extracted.append(df[index_left].str.extractall(r'(?P<origin>(?P<day>\d?\d) ?(?P<month>[a-zA-Z]{3,})\.?,? (?P<year>\d{4}))'))170index_left = ~df.index.isin([x[0] for x in dates_extracted.index])171dates_extracted = dates_extracted.append(df[index_left].str.extractall(r'(?P<origin>(?P<month>[a-zA-Z]{3,})\.?-? ?(?P<day>\d\d?)(th|nd|st)?,?-? ?(?P<year>\d{4}))'))172del dates_extracted[3]173index_left = ~df.index.isin([x[0] for x in dates_extracted.index])174# Without day175dates_without_day = df[index_left].str.extractall('(?P<origin>(?P<month>[A-Z][a-z]{2,}),?\.? (?P<year>\d{4}))')176dates_without_day = dates_without_day.append(df[index_left].str.extractall(r'(?P<origin>(?P<month>\d\d?)/(?P<year>\d{4}))'))177dates_without_day['day'] = 1178dates_extracted = dates_extracted.append(dates_without_day)179index_left = ~df.index.isin([x[0] for x in dates_extracted.index])180# Only year181dates_only_year = df[index_left].str.extractall(r'(?P<origin>(?P<year>\d{4}))')182dates_only_year['day'] = 1183dates_only_year['month'] = 1184dates_extracted = dates_extracted.append(dates_only_year)185index_left = ~df.index.isin([x[0] for x in dates_extracted.index])186# Year187dates_extracted['year'] = dates_extracted['year'].apply(lambda x: '19' + x if len(x) == 2 else x)188dates_extracted['year'] = dates_extracted['year'].apply(lambda x: str(x))189# Month190dates_extracted['month'] = dates_extracted['month'].apply(lambda x: x[1:] if type(x) is str and x.startswith('0') else x)191month_dict = dict({'September': 9, 'Mar': 3, 'November': 11, 'Jul': 7, 'January': 1, 'December': 12,192 'Feb': 2, 'May': 5, 'Aug': 8, 'Jun': 6, 'Sep': 9, 'Oct': 10, 'June': 6, 'March': 3,193 'February': 2, 'Dec': 12, 'Apr': 4, 'Jan': 1, 'Janaury': 1,'August': 8, 'October': 10,194 'July': 7, 'Since': 1, 'Nov': 11, 'April': 4, 'Decemeber': 12, 'Age': 8})195dates_extracted.replace({"month": month_dict}, inplace=True)...
test_all.py
Source:test_all.py
...19 d = tempfile.mkdtemp(prefix="pyunpack_test_")20 return d21def test():22 with pytest.raises(ValueError):23 Archive("blabla").extractall(tempfile.gettempdir())24 with pytest.raises(PatoolError):25 Archive(__file__).extractall(tempfile.gettempdir())26def create_arc(format):27 d = tmpdir()28 x_txt = join(d, "x.txt")29 open(x_txt, "w").write("123")30 # x_zip = d / "x.zip"31 os.chdir(d)32 x_zip = make_archive(33 "x",34 format, # the archive format - or tar, bztar, gztar35 root_dir=None, # root for archive - current working dir if None36 base_dir=None,37 ) # start archiving from here - cwd if None too38 # EasyProcess(["zip", "--no-dir-entries", x_zip, "x.txt"], cwd=d).call()39 return x_zip40def test2():41 for f in formats:42 print(f)43 x_zip = create_arc(f)44 with pytest.raises(ValueError):45 Archive(x_zip).extractall("blabla")46 d = tmpdir()47 Archive(x_zip, backend="patool").extractall(d)48 ok_file(d, "x.txt")49 d = tmpdir()50 Archive(x_zip).extractall(d)51 ok_file(d, "x.txt")52 d = tmpdir()53 Archive(x_zip, backend="auto").extractall(d)54 ok_file(d, "x.txt")55 if f == "zip":56 d = tmpdir()57 Archive(x_zip, backend="zipfile").extractall(d)58 ok_file(d, "x.txt")59 d = tmpdir()60 cli.extractall(x_zip, d)61 ok_file(d, "x.txt")62def test_subdir():63 for f in formats:64 x_zip = create_arc(f)65 d = join(tmpdir(), "subdir")66 with pytest.raises(ValueError):67 Archive(x_zip).extractall(d, auto_create_dir=False)68 d = join(tmpdir(), "subdir")69 Archive(x_zip, backend="auto").extractall(d, auto_create_dir=True)...
LambdaTest’s Playwright tutorial will give you a broader idea about the Playwright automation framework, its unique features, and use cases with examples to exceed your understanding of Playwright testing. This tutorial will give A to Z guidance, from installing the Playwright framework to some best practices and advanced concepts.
Get 100 minutes of automation test minutes FREE!!