Best Python code snippet using pandera_python
test_hypotheses.py
Source:test_hypotheses.py
...91 {92 "height_in_feet": Column(93 Float,94 [95 Hypothesis.two_sample_ttest(96 sample1="M",97 sample2="F",98 groupby="sex",99 relationship="greater_than",100 alpha=0.5,101 ),102 ],103 ),104 "sex": Column(String),105 }106 )107 schema_pass_ttest_on_alpha_val_2 = DataFrameSchema(108 {109 "height_in_feet": Column(110 Float,111 [112 Hypothesis(113 test=stats.ttest_ind,114 samples=["M", "F"],115 groupby="sex",116 relationship="greater_than",117 relationship_kwargs={"alpha": 0.5},118 ),119 ],120 ),121 "sex": Column(String),122 }123 )124 schema_pass_ttest_on_alpha_val_3 = DataFrameSchema(125 {126 "height_in_feet": Column(127 Float,128 [129 Hypothesis.two_sample_ttest(130 sample1="M",131 sample2="F",132 groupby="sex",133 relationship="greater_than",134 alpha=0.5,135 ),136 ],137 ),138 "sex": Column(String),139 }140 )141 schema_pass_ttest_on_custom_relationship = DataFrameSchema(142 {143 "height_in_feet": Column(144 Float,145 [146 Hypothesis(147 test=stats.ttest_ind,148 samples=["M", "F"],149 groupby="sex",150 relationship=lambda stat, pvalue, alpha=0.01: (151 stat > 0 and pvalue / 2 < alpha152 ),153 relationship_kwargs={"alpha": 0.5},154 )155 ],156 ),157 "sex": Column(String),158 }159 )160 # Check the 3 happy paths are successful:161 schema_pass_ttest_on_alpha_val_1.validate(df)162 schema_pass_ttest_on_alpha_val_2.validate(df)163 schema_pass_ttest_on_alpha_val_3.validate(df)164 schema_pass_ttest_on_custom_relationship.validate(df)165 schema_fail_ttest_on_alpha_val_1 = DataFrameSchema(166 {167 "height_in_feet": Column(168 Float,169 [170 Hypothesis.two_sample_ttest(171 sample1="M",172 sample2="F",173 groupby="sex",174 relationship="greater_than",175 alpha=0.05,176 ),177 ],178 ),179 "sex": Column(String),180 }181 )182 schema_fail_ttest_on_alpha_val_2 = DataFrameSchema(183 {184 "height_in_feet": Column(185 Float,186 [187 Hypothesis(188 test=stats.ttest_ind,189 samples=["M", "F"],190 groupby="sex",191 relationship="greater_than",192 relationship_kwargs={"alpha": 0.05},193 ),194 ],195 ),196 "sex": Column(String),197 }198 )199 schema_fail_ttest_on_alpha_val_3 = DataFrameSchema(200 {201 "height_in_feet": Column(202 Float,203 [204 Hypothesis.two_sample_ttest(205 sample1="M",206 sample2="F",207 groupby="sex",208 relationship="greater_than",209 alpha=0.05,210 ),211 ],212 ),213 "sex": Column(String),214 }215 )216 with pytest.raises(errors.SchemaError):217 schema_fail_ttest_on_alpha_val_1.validate(df)218 with pytest.raises(errors.SchemaError):219 schema_fail_ttest_on_alpha_val_2.validate(df)220 with pytest.raises(errors.SchemaError):221 schema_fail_ttest_on_alpha_val_3.validate(df)222def test_two_sample_ttest_hypothesis_relationships():223 """Check allowable relationships in two-sample ttest."""224 for relationship in Hypothesis.RELATIONSHIPS:225 schema = DataFrameSchema(226 {227 "height_in_feet": Column(228 Float,229 [230 Hypothesis.two_sample_ttest(231 sample1="M",232 sample2="F",233 groupby="sex",234 relationship=relationship,235 alpha=0.5,236 ),237 ],238 ),239 "sex": Column(String),240 }241 )242 assert isinstance(schema, DataFrameSchema)243 for relationship in ["foo", "bar", 1, 2, 3, None]:244 with pytest.raises(errors.SchemaInitError):245 DataFrameSchema(246 {247 "height_in_feet": Column(248 Float,249 [250 Hypothesis.two_sample_ttest(251 sample1="M",252 sample2="F",253 groupby="sex",254 relationship=relationship,255 alpha=0.5,256 ),257 ],258 ),259 "sex": Column(String),260 }261 )262def test_one_sample_hypothesis():263 """Check one sample ttest."""264 schema = DataFrameSchema(...
hypothesis_testing.py
Source:hypothesis_testing.py
2import sqlite33import sys4sys.path.append('../')5from Spotify_Song_Data import getSongData6def two_sample_ttest(peak_values, trough_values):7 """8 Input:9 - peak_values: the attribute values of songs during a COVID peak time period10 - trough_values the attribute values of songs from a COVID trough time period11 Output:12 - tstats: Test statistics (float)13 - p-value: P-value (float)14 """15 # Using scipy's ttest_ind16 # (https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.ttest_ind.html)17 # to get the t-statistic and the p-value18 # Note: The code will disregard null (nan) values. We will assume equal variance.19 # Indepdendent two sample t-test, since testing unknown population means of 20 # two groups21 tstats, pvalue = ttest_ind(peak_values, trough_values, nan_policy='omit')22 # Print tstats, pvalue23 print("two_sample_ttest tstats: ", tstats)24 print("two_sample_ttest pvalue: ", pvalue)25 return tstats, pvalue26def get_songs_by_date(month, day, year):27 db = "/Users/mohammedakel/Desktop/CS1951A-Spring2022/rim-dj/data_deliverable/data/billboard.db"28 # create a database connection29 conn = sqlite3.connect(db)30 cur = conn.cursor()31 cur.execute("SELECT title, artist FROM billboard WHERE month=? AND day=? AND year=?", (month, day, year,))32 rows = cur.fetchall()33 song_titles = []34 artist_names = []35 for row in rows:36 song_titles.append(row[0])37 artist_names.append(row[1])38 return song_titles, artist_names39def get_peak_attribute(attribute):40 # Peaks: 4/10/2020, 7/24/2020, 1/11/2021, 9/13/2021, 1/15/202241 titles = []42 names = []43 attribute_values = []44 45 titles_one, names_one = get_songs_by_date(4, 10, 2020)46 titles_two, names_two = get_songs_by_date(7, 24, 2020)47 titles_three, names_three = get_songs_by_date(1, 11, 2021)48 titles_four, names_four = get_songs_by_date(9, 13, 2021)49 titles_five, names_five = get_songs_by_date(1, 15, 2022)50 titles = titles + titles_one + titles_two + titles_three + titles_four + titles_five51 names = names + names_one + names_two + names_three + names_four + names_five52 for i in range(len(titles)):53 song = getSongData(titles[i], names[i], 1)54 if len(song) != 0:55 attribute_values.append(song[0][attribute])56 return attribute_values57def get_trough_attribute(attribute):58 # Troughs: 9/11/2020, 12/30/2020, 6/22/2021, 10/26/2021, 11/28/202159 titles = []60 names = []61 attribute_values = []62 63 titles_one, names_one = get_songs_by_date(9, 11, 2020)64 titles_two, names_two = get_songs_by_date(12, 30, 2020)65 titles_three, names_three = get_songs_by_date(6, 22, 2021)66 titles_four, names_four = get_songs_by_date(10, 26, 2021)67 titles_five, names_five = get_songs_by_date(11, 28, 2022)68 titles = titles + titles_one + titles_two + titles_three + titles_four + titles_five69 names = names + names_one + names_two + names_three + names_four + names_five70 for i in range(len(titles)):71 song = getSongData(titles[i], names[i], 1)72 if len(song) != 0:73 attribute_values.append(song[0][attribute])74 return attribute_values75'''76if __name__ == "__main__":77 # get the peak values and trough values for testing per attribute78 # Note: we can test additional attributes by changing the input argument in get_[peak/trough]_attribute()79 print("---Testing Danceability Difference---")80 peak_danceability = get_peak_attribute("danceability")81 trough_danceability = get_trough_attribute("danceability")82 two_sample_ttest(peak_values=peak_danceability, trough_values=trough_danceability)83 print("---Testing Energy Difference---")84 peak_energy = get_peak_attribute("energy")85 trough_energy = get_trough_attribute("energy")86 two_sample_ttest(peak_values=peak_energy, trough_values=trough_energy)87 print("---Testing Valence Difference---")88 peak_valence = get_peak_attribute("valence")89 trough_valence = get_trough_attribute("valence")90 two_sample_ttest(peak_values=peak_valence, trough_values=trough_valence)91'''92 ...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!