How to use parse_check_statistics method in pandera

Best Python code snippet using pandera_python

test_schema_statistics.py

Source: test_schema_statistics.py Github

copy

Full Screen

...129 ],130 [{}, None],131 ],132)133def test_parse_check_statistics(check_stats, expectation) -> None:134 """Test that Checks are correctly parsed from check statistics."""135 if expectation is None:136 expectation = []137 checks = schema_statistics.parse_check_statistics(check_stats)138 if checks is None:139 checks = []140 assert set(checks) == set(expectation)141def _test_statistics(statistics, expectations):142 if not isinstance(statistics, list):143 statistics = [statistics]144 if not isinstance(expectations, list):145 expectations = [expectations]146 for stats, expectation in zip(statistics, expectations):147 stat_dtype = stats.pop("dtype")148 expectation_dtype = expectation.pop("dtype")149 assert stats == expectation150 assert expectation_dtype.check(stat_dtype)151@pytest.mark.parametrize(152 "series, expectation",153 [154 *[155 [156 pd.Series(157 [1, 2, 3], dtype=str(pandas_engine.Engine.dtype(data_type))158 ),159 {160 "dtype": pandas_engine.Engine.dtype(data_type),161 "nullable": False,162 "checks": {163 "greater_than_or_equal_to": 1,164 "less_than_or_equal_to": 3,165 },166 "name": None,167 },168 ]169 for data_type in NUMERIC_TYPES170 ],171 [172 pd.Series(["a", "b", "c", "a"], dtype="category"),173 {174 "dtype": pandas_engine.Engine.dtype(pa.Category),175 "nullable": False,176 "checks": {"isin": ["a", "b", "c"]},177 "name": None,178 },179 ],180 [181 pd.Series(["a", "b", "c", "a"], dtype="string", name="str_series"),182 {183 "dtype": pandas_engine.Engine.dtype("string"),184 "nullable": False,185 "checks": None,186 "name": "str_series",187 },188 ],189 [190 pd.Series(pd.to_datetime(["20180101", "20180102", "20180103"])),191 {192 "dtype": pandas_engine.Engine.dtype(pa.DateTime),193 "nullable": False,194 "checks": {195 "greater_than_or_equal_to": pd.Timestamp("20180101"),196 "less_than_or_equal_to": pd.Timestamp("20180103"),197 },198 "name": None,199 },200 ],201 ],202)203def test_infer_series_schema_statistics(series, expectation) -> None:204 """Test series statistics are correctly inferred."""205 statistics = schema_statistics.infer_series_statistics(series)206 _test_statistics(statistics, expectation)207@pytest.mark.parametrize(208 "null_index, series, expectation",209 [210 *[211 [212 0,213 pd.Series([1, 2, 3], dtype=str(data_type)),214 {215 # introducing nans to integer arrays upcasts to float216 "dtype": DEFAULT_FLOAT,217 "nullable": True,218 "checks": {219 "greater_than_or_equal_to": 2,220 "less_than_or_equal_to": 3,221 },222 "name": None,223 },224 ]225 for data_type in INTEGER_TYPES226 ],227 [228 # introducing nans to bool arrays upcasts to float except229 # for pandas >= 1.3.0230 0,231 pd.Series([True, False, True, False]),232 {233 "dtype": (234 pandas_engine.Engine.dtype(pa.BOOL)235 if pa.PANDAS_1_3_0_PLUS236 else DEFAULT_FLOAT237 ),238 "nullable": True,239 "checks": (240 None241 if pa.PANDAS_1_3_0_PLUS242 else {243 "greater_than_or_equal_to": 0,244 "less_than_or_equal_to": 1,245 }246 ),247 "name": None,248 },249 ],250 [251 0,252 pd.Series(["a", "b", "c", "a"], dtype="category"),253 {254 "dtype": pandas_engine.Engine.dtype(pa.Category),255 "nullable": True,256 "checks": {"isin": ["a", "b", "c"]},257 "name": None,258 },259 ],260 [261 0,262 pd.Series(["a", "b", "c", "a"], name="str_series"),263 {264 "dtype": pandas_engine.Engine.dtype(str),265 "nullable": True,266 "checks": None,267 "name": "str_series",268 },269 ],270 [271 2,272 pd.Series(pd.to_datetime(["20180101", "20180102", "20180103"])),273 {274 "dtype": pandas_engine.Engine.dtype(pa.DateTime),275 "nullable": True,276 "checks": {277 "greater_than_or_equal_to": pd.Timestamp("20180101"),278 "less_than_or_equal_to": pd.Timestamp("20180102"),279 },280 "name": None,281 },282 ],283 ],284)285def test_infer_nullable_series_schema_statistics(286 null_index, series, expectation287):288 """Test nullable series statistics are correctly inferred."""289 series.iloc[null_index] = None290 statistics = schema_statistics.infer_series_statistics(series)291 _test_statistics(statistics, expectation)292@pytest.mark.parametrize(293 "index, expectation",294 [295 [296 pd.RangeIndex(20),297 [298 {299 "name": None,300 "dtype": DEFAULT_INT,301 "nullable": False,302 "checks": {303 "greater_than_or_equal_to": 0,304 "less_than_or_equal_to": 19,305 },306 }307 ],308 ],309 [310 pd.Index([1, 2, 3], name="int_index"),311 [312 {313 "name": "int_index",314 "dtype": DEFAULT_INT,315 "nullable": False,316 "checks": {317 "greater_than_or_equal_to": 1,318 "less_than_or_equal_to": 3,319 },320 }321 ],322 ],323 [324 pd.Index(["foo", "bar", "baz"], name="str_index"),325 [326 {327 "name": "str_index",328 "dtype": pandas_engine.Engine.dtype("object"),329 "nullable": False,330 "checks": None,331 },332 ],333 ],334 [335 pd.MultiIndex.from_arrays(336 [[10, 11, 12], pd.Series(["a", "b", "c"], dtype="category")],337 names=["int_index", "str_index"],338 ),339 [340 {341 "name": "int_index",342 "dtype": DEFAULT_INT,343 "nullable": False,344 "checks": {345 "greater_than_or_equal_to": 10,346 "less_than_or_equal_to": 12,347 },348 },349 {350 "name": "str_index",351 "dtype": pandas_engine.Engine.dtype(pa.Category),352 "nullable": False,353 "checks": {"isin": ["a", "b", "c"]},354 },355 ],356 ],357 # UserWarning cases358 [1, UserWarning],359 ["foo", UserWarning],360 [{"foo": "bar"}, UserWarning],361 [["foo", "bar"], UserWarning],362 [pd.Series(["foo", "bar"]), UserWarning],363 [pd.DataFrame({"column": ["foo", "bar"]}), UserWarning],364 ],365)366def test_infer_index_statistics(index, expectation):367 """Test that index statistics are correctly inferred."""368 if expectation is UserWarning:369 with pytest.warns(UserWarning, match="^index type .+ not recognized"):370 schema_statistics.infer_index_statistics(index)371 else:372 _test_statistics(373 schema_statistics.infer_index_statistics(index), expectation374 )375def test_get_dataframe_schema_statistics():376 """Test that dataframe schema statistics logic is correct."""377 schema = pa.DataFrameSchema(378 columns={379 "int": pa.Column(380 int,381 checks=[382 pa.Check.greater_than_or_equal_to(0),383 pa.Check.less_than_or_equal_to(100),384 ],385 nullable=True,386 ),387 "float": pa.Column(388 float,389 checks=[390 pa.Check.greater_than_or_equal_to(50),391 pa.Check.less_than_or_equal_to(100),392 ],393 ),394 "str": pa.Column(395 str,396 checks=[pa.Check.isin(["foo", "bar", "baz"])],397 ),398 },399 index=pa.Index(400 int,401 checks=pa.Check.greater_than_or_equal_to(0),402 nullable=False,403 name="int_index",404 ),405 )406 expectation = {407 "checks": None,408 "columns": {409 "int": {410 "dtype": DEFAULT_INT,411 "checks": {412 "greater_than_or_equal_to": {"min_value": 0},413 "less_than_or_equal_to": {"max_value": 100},414 },415 "nullable": True,416 "unique": False,417 "coerce": False,418 "required": True,419 "regex": False,420 },421 "float": {422 "dtype": DEFAULT_FLOAT,423 "checks": {424 "greater_than_or_equal_to": {"min_value": 50},425 "less_than_or_equal_to": {"max_value": 100},426 },427 "nullable": False,428 "unique": False,429 "coerce": False,430 "required": True,431 "regex": False,432 },433 "str": {434 "dtype": pandas_engine.Engine.dtype(str),435 "checks": {"isin": {"allowed_values": ["foo", "bar", "baz"]}},436 "nullable": False,437 "unique": False,438 "coerce": False,439 "required": True,440 "regex": False,441 },442 },443 "index": [444 {445 "dtype": DEFAULT_INT,446 "checks": {"greater_than_or_equal_to": {"min_value": 0}},447 "nullable": False,448 "coerce": False,449 "name": "int_index",450 }451 ],452 "coerce": False,453 }454 statistics = schema_statistics.get_dataframe_schema_statistics(schema)455 assert statistics == expectation456def test_get_series_schema_statistics():457 """Test that series schema statistics logic is correct."""458 schema = pa.SeriesSchema(459 int,460 nullable=False,461 checks=[462 pa.Check.greater_than_or_equal_to(0),463 pa.Check.less_than_or_equal_to(100),464 ],465 )466 statistics = schema_statistics.get_series_schema_statistics(schema)467 assert statistics == {468 "dtype": pandas_engine.Engine.dtype(int),469 "nullable": False,470 "checks": {471 "greater_than_or_equal_to": {"min_value": 0},472 "less_than_or_equal_to": {"max_value": 100},473 },474 "name": None,475 "coerce": False,476 }477@pytest.mark.parametrize(478 "index_schema_component, expectation",479 [480 [481 pa.Index(482 int,483 checks=[484 pa.Check.greater_than_or_equal_to(10),485 pa.Check.less_than_or_equal_to(20),486 ],487 nullable=False,488 name="int_index",489 ),490 [491 {492 "dtype": pandas_engine.Engine.dtype(int),493 "nullable": False,494 "checks": {495 "greater_than_or_equal_to": {"min_value": 10},496 "less_than_or_equal_to": {"max_value": 20},497 },498 "name": "int_index",499 "coerce": False,500 }501 ],502 ]503 ],504)505def test_get_index_schema_statistics(index_schema_component, expectation):506 """Test that index schema statistics logic is correct."""507 statistics = schema_statistics.get_index_schema_statistics(508 index_schema_component509 )510 _test_statistics(statistics, expectation)511@pytest.mark.parametrize(512 "checks, expectation",513 [514 *[515 [[check], {check.name: check.statistics}]516 for check in [517 pa.Check.greater_than(1),518 pa.Check.less_than(1),519 pa.Check.in_range(1, 3),520 pa.Check.equal_to(1),521 pa.Check.not_equal_to(1),522 pa.Check.notin([1, 2, 3]),523 pa.Check.str_matches("foobar"),524 pa.Check.str_contains("foobar"),525 pa.Check.str_startswith("foobar"),526 pa.Check.str_endswith("foobar"),527 pa.Check.str_length(5, 10),528 ]529 ],530 # multiple checks at once531 [532 [533 pa.Check.greater_than_or_equal_to(10),534 pa.Check.less_than_or_equal_to(50),535 pa.Check.isin([10, 20, 30, 40, 50]),536 ],537 {538 "greater_than_or_equal_to": {"min_value": 10},539 "less_than_or_equal_to": {"max_value": 50},540 "isin": {"allowed_values": [10, 20, 30, 40, 50]},541 },542 ],543 # incompatible checks544 *[545 [546 [547 pa.Check.greater_than_or_equal_to(min_value),548 pa.Check.less_than_or_equal_to(max_value),549 ],550 ValueError,551 ]552 for min_value, max_value in [553 (5, 1),554 (10, 1),555 (100, 10),556 (1000, 100),557 ]558 ],559 ],560)561def test_parse_checks_and_statistics_roundtrip(checks, expectation):562 """563 Test that parse checks correctly obtain statistics from checks and564 vice-versa.565 """566 if expectation is ValueError:567 with pytest.raises(ValueError):568 schema_statistics.parse_checks(checks)569 return570 assert schema_statistics.parse_checks(checks) == expectation571 check_statistics = {check.name: check.statistics for check in checks}572 check_list = schema_statistics.parse_check_statistics(check_statistics)573 assert set(check_list) == set(checks)574# pylint: disable=unused-argument575def test_parse_checks_and_statistics_no_param(extra_registered_checks):576 """577 Ensure that an edge case where a check does not have parameters is578 appropriately handled.579 """580 checks = [pa.Check.no_param_check()]581 expectation = {"no_param_check": {}}582 assert schema_statistics.parse_checks(checks) == expectation583 check_statistics = {check.name: check.statistics for check in checks}584 check_list = schema_statistics.parse_check_statistics(check_statistics)585 assert set(check_list) == set(checks)...

Full Screen

Full Screen

schema_inference.py

Source: schema_inference.py Github

copy

Full Screen

...28def _create_index(index_statistics):29 index = [30 Index(31 properties["dtype"],32 checks=parse_check_statistics(properties["checks"]),33 nullable=properties["nullable"],34 name=properties["name"],35 )36 for properties in index_statistics37 ]38 if len(index) == 1:39 index = index[0] # type: ignore40 else:41 index = MultiIndex(index) # type: ignore42 return index43def infer_dataframe_schema(df: pd.DataFrame) -> DataFrameSchema:44 """Infer a DataFrameSchema from a pandas DataFrame.45 :param df: DataFrame object to infer.46 :returns: DataFrameSchema47 """48 df_statistics = infer_dataframe_statistics(df)49 schema = DataFrameSchema(50 columns={51 colname: Column(52 properties["dtype"],53 checks=parse_check_statistics(properties["checks"]),54 nullable=properties["nullable"],55 )56 for colname, properties in df_statistics["columns"].items()57 },58 index=_create_index(df_statistics["index"]),59 coerce=True,60 )61 schema._is_inferred = True62 return schema63def infer_series_schema(series) -> SeriesSchema:64 """Infer a SeriesSchema from a pandas DataFrame.65 :param series: Series object to infer.66 :returns: SeriesSchema67 """68 series_statistics = infer_series_statistics(series)69 schema = SeriesSchema(70 dtype=series_statistics["dtype"],71 checks=parse_check_statistics(series_statistics["checks"]),72 nullable=series_statistics["nullable"],73 name=series_statistics["name"],74 coerce=True,75 )76 schema._is_inferred = True...

Full Screen

Full Screen

Blogs

Check out the latest blogs from LambdaTest on this topic:

Test strategy and how to communicate it

I routinely come across test strategy documents when working with customers. They are lengthy—100 pages or more—and packed with monotonous text that is routinely reused from one project to another. Yawn once more— the test halt and resume circumstances, the defect management procedure, entrance and exit criteria, unnecessary generic risks, and in fact, one often-used model replicates the requirements of textbook testing, from stress to systems integration.

How To Write End-To-End Tests Using Cypress App Actions

When I started writing tests with Cypress, I was always going to use the user interface to interact and change the application’s state when running tests.

Pair testing strategy in an Agile environment

Pair testing can help you complete your testing tasks faster and with higher quality. But who can do pair testing, and when should it be done? And what form of pair testing is best for your circumstance? Check out this blog for more information on how to conduct pair testing to optimize its benefits.

LIVE With Automation Testing For OTT Streaming Devices ????

People love to watch, read and interact with quality content — especially video content. Whether it is sports, news, TV shows, or videos captured on smartphones, people crave digital content. The emergence of OTT platforms has already shaped the way people consume content. Viewers can now enjoy their favorite shows whenever they want rather than at pre-set times. Thus, the OTT platform’s concept of viewing anything, anytime, anywhere has hit the right chord.

Different Ways To Style CSS Box Shadow Effects

Have you ever visited a website that only has plain text and images? Most probably, no. It’s because such websites do not exist now. But there was a time when websites only had plain text and images with almost no styling. For the longest time, websites did not focus on user experience. For instance, this is how eBay’s homepage looked in 1999.

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.

LambdaTest Learning Hubs:

YouTube

You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.

Run pandera automation tests on LambdaTest cloud grid

Perform automation testing on 3000+ real desktop and mobile devices online.

Try LambdaTest Now !!

Get 100 minutes of automation test minutes FREE!!

Next-Gen App & Browser Testing Cloud

Was this article helpful?

Helpful

NotHelpful