How to use _test_statistics method in pandera

Best Python code snippet using pandera_python

test_schema_statistics.py

Source: test_schema_statistics.py Github

copy

Full Screen

...137 checks = schema_statistics.parse_check_statistics(check_stats)138 if checks is None:139 checks = []140 assert set(checks) == set(expectation)141def _test_statistics(statistics, expectations):142 if not isinstance(statistics, list):143 statistics = [statistics]144 if not isinstance(expectations, list):145 expectations = [expectations]146 for stats, expectation in zip(statistics, expectations):147 stat_dtype = stats.pop("dtype")148 expectation_dtype = expectation.pop("dtype")149 assert stats == expectation150 assert expectation_dtype.check(stat_dtype)151@pytest.mark.parametrize(152 "series, expectation",153 [154 *[155 [156 pd.Series(157 [1, 2, 3], dtype=str(pandas_engine.Engine.dtype(data_type))158 ),159 {160 "dtype": pandas_engine.Engine.dtype(data_type),161 "nullable": False,162 "checks": {163 "greater_than_or_equal_to": 1,164 "less_than_or_equal_to": 3,165 },166 "name": None,167 },168 ]169 for data_type in NUMERIC_TYPES170 ],171 [172 pd.Series(["a", "b", "c", "a"], dtype="category"),173 {174 "dtype": pandas_engine.Engine.dtype(pa.Category),175 "nullable": False,176 "checks": {"isin": ["a", "b", "c"]},177 "name": None,178 },179 ],180 [181 pd.Series(["a", "b", "c", "a"], dtype="string", name="str_series"),182 {183 "dtype": pandas_engine.Engine.dtype("string"),184 "nullable": False,185 "checks": None,186 "name": "str_series",187 },188 ],189 [190 pd.Series(pd.to_datetime(["20180101", "20180102", "20180103"])),191 {192 "dtype": pandas_engine.Engine.dtype(pa.DateTime),193 "nullable": False,194 "checks": {195 "greater_than_or_equal_to": pd.Timestamp("20180101"),196 "less_than_or_equal_to": pd.Timestamp("20180103"),197 },198 "name": None,199 },200 ],201 ],202)203def test_infer_series_schema_statistics(series, expectation) -> None:204 """Test series statistics are correctly inferred."""205 statistics = schema_statistics.infer_series_statistics(series)206 _test_statistics(statistics, expectation)207@pytest.mark.parametrize(208 "null_index, series, expectation",209 [210 *[211 [212 0,213 pd.Series([1, 2, 3], dtype=str(data_type)),214 {215 # introducing nans to integer arrays upcasts to float216 "dtype": DEFAULT_FLOAT,217 "nullable": True,218 "checks": {219 "greater_than_or_equal_to": 2,220 "less_than_or_equal_to": 3,221 },222 "name": None,223 },224 ]225 for data_type in INTEGER_TYPES226 ],227 [228 # introducing nans to bool arrays upcasts to float except229 # for pandas >= 1.3.0230 0,231 pd.Series([True, False, True, False]),232 {233 "dtype": (234 pandas_engine.Engine.dtype(pa.BOOL)235 if pa.PANDAS_1_3_0_PLUS236 else DEFAULT_FLOAT237 ),238 "nullable": True,239 "checks": (240 None241 if pa.PANDAS_1_3_0_PLUS242 else {243 "greater_than_or_equal_to": 0,244 "less_than_or_equal_to": 1,245 }246 ),247 "name": None,248 },249 ],250 [251 0,252 pd.Series(["a", "b", "c", "a"], dtype="category"),253 {254 "dtype": pandas_engine.Engine.dtype(pa.Category),255 "nullable": True,256 "checks": {"isin": ["a", "b", "c"]},257 "name": None,258 },259 ],260 [261 0,262 pd.Series(["a", "b", "c", "a"], name="str_series"),263 {264 "dtype": pandas_engine.Engine.dtype(str),265 "nullable": True,266 "checks": None,267 "name": "str_series",268 },269 ],270 [271 2,272 pd.Series(pd.to_datetime(["20180101", "20180102", "20180103"])),273 {274 "dtype": pandas_engine.Engine.dtype(pa.DateTime),275 "nullable": True,276 "checks": {277 "greater_than_or_equal_to": pd.Timestamp("20180101"),278 "less_than_or_equal_to": pd.Timestamp("20180102"),279 },280 "name": None,281 },282 ],283 ],284)285def test_infer_nullable_series_schema_statistics(286 null_index, series, expectation287):288 """Test nullable series statistics are correctly inferred."""289 series.iloc[null_index] = None290 statistics = schema_statistics.infer_series_statistics(series)291 _test_statistics(statistics, expectation)292@pytest.mark.parametrize(293 "index, expectation",294 [295 [296 pd.RangeIndex(20),297 [298 {299 "name": None,300 "dtype": DEFAULT_INT,301 "nullable": False,302 "checks": {303 "greater_than_or_equal_to": 0,304 "less_than_or_equal_to": 19,305 },306 }307 ],308 ],309 [310 pd.Index([1, 2, 3], name="int_index"),311 [312 {313 "name": "int_index",314 "dtype": DEFAULT_INT,315 "nullable": False,316 "checks": {317 "greater_than_or_equal_to": 1,318 "less_than_or_equal_to": 3,319 },320 }321 ],322 ],323 [324 pd.Index(["foo", "bar", "baz"], name="str_index"),325 [326 {327 "name": "str_index",328 "dtype": pandas_engine.Engine.dtype("object"),329 "nullable": False,330 "checks": None,331 },332 ],333 ],334 [335 pd.MultiIndex.from_arrays(336 [[10, 11, 12], pd.Series(["a", "b", "c"], dtype="category")],337 names=["int_index", "str_index"],338 ),339 [340 {341 "name": "int_index",342 "dtype": DEFAULT_INT,343 "nullable": False,344 "checks": {345 "greater_than_or_equal_to": 10,346 "less_than_or_equal_to": 12,347 },348 },349 {350 "name": "str_index",351 "dtype": pandas_engine.Engine.dtype(pa.Category),352 "nullable": False,353 "checks": {"isin": ["a", "b", "c"]},354 },355 ],356 ],357 # UserWarning cases358 [1, UserWarning],359 ["foo", UserWarning],360 [{"foo": "bar"}, UserWarning],361 [["foo", "bar"], UserWarning],362 [pd.Series(["foo", "bar"]), UserWarning],363 [pd.DataFrame({"column": ["foo", "bar"]}), UserWarning],364 ],365)366def test_infer_index_statistics(index, expectation):367 """Test that index statistics are correctly inferred."""368 if expectation is UserWarning:369 with pytest.warns(UserWarning, match="^index type .+ not recognized"):370 schema_statistics.infer_index_statistics(index)371 else:372 _test_statistics(373 schema_statistics.infer_index_statistics(index), expectation374 )375def test_get_dataframe_schema_statistics():376 """Test that dataframe schema statistics logic is correct."""377 schema = pa.DataFrameSchema(378 columns={379 "int": pa.Column(380 int,381 checks=[382 pa.Check.greater_than_or_equal_to(0),383 pa.Check.less_than_or_equal_to(100),384 ],385 nullable=True,386 ),387 "float": pa.Column(388 float,389 checks=[390 pa.Check.greater_than_or_equal_to(50),391 pa.Check.less_than_or_equal_to(100),392 ],393 ),394 "str": pa.Column(395 str,396 checks=[pa.Check.isin(["foo", "bar", "baz"])],397 ),398 },399 index=pa.Index(400 int,401 checks=pa.Check.greater_than_or_equal_to(0),402 nullable=False,403 name="int_index",404 ),405 )406 expectation = {407 "checks": None,408 "columns": {409 "int": {410 "dtype": DEFAULT_INT,411 "checks": {412 "greater_than_or_equal_to": {"min_value": 0},413 "less_than_or_equal_to": {"max_value": 100},414 },415 "nullable": True,416 "unique": False,417 "coerce": False,418 "required": True,419 "regex": False,420 },421 "float": {422 "dtype": DEFAULT_FLOAT,423 "checks": {424 "greater_than_or_equal_to": {"min_value": 50},425 "less_than_or_equal_to": {"max_value": 100},426 },427 "nullable": False,428 "unique": False,429 "coerce": False,430 "required": True,431 "regex": False,432 },433 "str": {434 "dtype": pandas_engine.Engine.dtype(str),435 "checks": {"isin": {"allowed_values": ["foo", "bar", "baz"]}},436 "nullable": False,437 "unique": False,438 "coerce": False,439 "required": True,440 "regex": False,441 },442 },443 "index": [444 {445 "dtype": DEFAULT_INT,446 "checks": {"greater_than_or_equal_to": {"min_value": 0}},447 "nullable": False,448 "coerce": False,449 "name": "int_index",450 }451 ],452 "coerce": False,453 }454 statistics = schema_statistics.get_dataframe_schema_statistics(schema)455 assert statistics == expectation456def test_get_series_schema_statistics():457 """Test that series schema statistics logic is correct."""458 schema = pa.SeriesSchema(459 int,460 nullable=False,461 checks=[462 pa.Check.greater_than_or_equal_to(0),463 pa.Check.less_than_or_equal_to(100),464 ],465 )466 statistics = schema_statistics.get_series_schema_statistics(schema)467 assert statistics == {468 "dtype": pandas_engine.Engine.dtype(int),469 "nullable": False,470 "checks": {471 "greater_than_or_equal_to": {"min_value": 0},472 "less_than_or_equal_to": {"max_value": 100},473 },474 "name": None,475 "coerce": False,476 }477@pytest.mark.parametrize(478 "index_schema_component, expectation",479 [480 [481 pa.Index(482 int,483 checks=[484 pa.Check.greater_than_or_equal_to(10),485 pa.Check.less_than_or_equal_to(20),486 ],487 nullable=False,488 name="int_index",489 ),490 [491 {492 "dtype": pandas_engine.Engine.dtype(int),493 "nullable": False,494 "checks": {495 "greater_than_or_equal_to": {"min_value": 10},496 "less_than_or_equal_to": {"max_value": 20},497 },498 "name": "int_index",499 "coerce": False,500 }501 ],502 ]503 ],504)505def test_get_index_schema_statistics(index_schema_component, expectation):506 """Test that index schema statistics logic is correct."""507 statistics = schema_statistics.get_index_schema_statistics(508 index_schema_component509 )510 _test_statistics(statistics, expectation)511@pytest.mark.parametrize(512 "checks, expectation",513 [514 *[515 [[check], {check.name: check.statistics}]516 for check in [517 pa.Check.greater_than(1),518 pa.Check.less_than(1),519 pa.Check.in_range(1, 3),520 pa.Check.equal_to(1),521 pa.Check.not_equal_to(1),522 pa.Check.notin([1, 2, 3]),523 pa.Check.str_matches("foobar"),524 pa.Check.str_contains("foobar"),...

Full Screen

Full Screen

test_histogram.py

Source: test_histogram.py Github

copy

Full Screen

...16 data = [15, 15, 20, 20, 20, 35, 35, 40, 40, 50, 50]17 histogram = traces.Histogram(data)18 normalized = histogram.normalized()19 assert sum(normalized.values()) == 1.020def _test_statistics(normalized):21 data_list = [22 [1, 2, 3, 5, 6, 7],23 [1, 2, 3, 5, 6],24 [1, 1],25 [1, 1, 1, 1, 1, 1, 1, 2],26 [i + 0.25 for i in [1, 1, 1, 1, 1, 1, 1, 2]],27 ]28 for data in data_list:29 histogram = traces.Histogram(data)30 if normalized:31 histogram = histogram.normalized()32 n = 133 else:34 n = len(data)35 nose.tools.assert_almost_equal(histogram.total(), n)36 nose.tools.assert_almost_equal(histogram.mean(), numpy.mean(data))37 nose.tools.assert_almost_equal(histogram.variance(), numpy.var(data))38 nose.tools.assert_almost_equal(39 histogram.standard_deviation(),40 numpy.std(data),41 )42 nose.tools.assert_almost_equal(histogram.max(), numpy.max(data))43 nose.tools.assert_almost_equal(histogram.min(), numpy.min(data))44 nose.tools.assert_almost_equal(45 histogram.quantile(0.5),46 numpy.median(data),47 )48 q_list = [0.001, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 0.999]49 # linear interpolation50 result = histogram.quantiles(q_list)51 reference = stats.mstats.mquantiles(52 data, prob=q_list, alphap=0.5, betap=0.5,53 )54 for i, j in zip(result, reference):55 nose.tools.assert_almost_equal(i, j)56 # make sure ot throw an error for bad quantile values57 try:58 histogram.quantile(-1)59 except ValueError:60 pass61def test_statistics():62 return _test_statistics(True)63def test_normalized_statistics():64 return _test_statistics(False)65def test_quantile_interpolation():66 data = [1, 1, 1, 2, 3, 5, 6, 7]67 histogram = traces.Histogram(data)68 normalized = histogram.normalized()69 q_list = [0.001, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 0.999]70 # just do the inverse of the emperical cdf71 result = histogram.quantiles(q_list, alpha=0, smallest_count=1)72 answer = [1.0, 1.0, 1.0, 1.0, 2.5, 5.5, 7.0, 7.0, 7.0]73 for i, j in zip(result, answer):74 nose.tools.assert_almost_equal(i, j)75 # same thing with normalized76 result = normalized.quantiles(77 q_list, alpha=0, smallest_count=1.0 /​ len(data))78 for i, j in zip(result, answer):...

Full Screen

Full Screen

Blogs

Check out the latest blogs from LambdaTest on this topic:

Test strategy and how to communicate it

I routinely come across test strategy documents when working with customers. They are lengthy—100 pages or more—and packed with monotonous text that is routinely reused from one project to another. Yawn once more— the test halt and resume circumstances, the defect management procedure, entrance and exit criteria, unnecessary generic risks, and in fact, one often-used model replicates the requirements of textbook testing, from stress to systems integration.

How To Write End-To-End Tests Using Cypress App Actions

When I started writing tests with Cypress, I was always going to use the user interface to interact and change the application’s state when running tests.

Pair testing strategy in an Agile environment

Pair testing can help you complete your testing tasks faster and with higher quality. But who can do pair testing, and when should it be done? And what form of pair testing is best for your circumstance? Check out this blog for more information on how to conduct pair testing to optimize its benefits.

LIVE With Automation Testing For OTT Streaming Devices ????

People love to watch, read and interact with quality content — especially video content. Whether it is sports, news, TV shows, or videos captured on smartphones, people crave digital content. The emergence of OTT platforms has already shaped the way people consume content. Viewers can now enjoy their favorite shows whenever they want rather than at pre-set times. Thus, the OTT platform’s concept of viewing anything, anytime, anywhere has hit the right chord.

Different Ways To Style CSS Box Shadow Effects

Have you ever visited a website that only has plain text and images? Most probably, no. It’s because such websites do not exist now. But there was a time when websites only had plain text and images with almost no styling. For the longest time, websites did not focus on user experience. For instance, this is how eBay’s homepage looked in 1999.

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful