Join the 1-day Testing & QA Summit featuring 15+ Expert Speakers.Register for FREE! Join TestMu Conference

How to use _test_statistics method in pandera

Best Python code snippet using pandera_python

test_schema_statistics.py

Source:test_schema_statistics.py

...137    checks = schema_statistics.parse_check_statistics(check_stats)138    if checks is None:139        checks = []140    assert set(checks) == set(expectation)141def _test_statistics(statistics, expectations):142    if not isinstance(statistics, list):143        statistics = [statistics]144    if not isinstance(expectations, list):145        expectations = [expectations]146    for stats, expectation in zip(statistics, expectations):147        stat_dtype = stats.pop("dtype")148        expectation_dtype = expectation.pop("dtype")149        assert stats == expectation150        assert expectation_dtype.check(stat_dtype)151@pytest.mark.parametrize(152    "series, expectation",153    [154        *[155            [156                pd.Series(157                    [1, 2, 3], dtype=str(pandas_engine.Engine.dtype(data_type))158                ),159                {160                    "dtype": pandas_engine.Engine.dtype(data_type),161                    "nullable": False,162                    "checks": {163                        "greater_than_or_equal_to": 1,164                        "less_than_or_equal_to": 3,165                    },166                    "name": None,167                },168            ]169            for data_type in NUMERIC_TYPES170        ],171        [172            pd.Series(["a", "b", "c", "a"], dtype="category"),173            {174                "dtype": pandas_engine.Engine.dtype(pa.Category),175                "nullable": False,176                "checks": {"isin": ["a", "b", "c"]},177                "name": None,178            },179        ],180        [181            pd.Series(["a", "b", "c", "a"], dtype="string", name="str_series"),182            {183                "dtype": pandas_engine.Engine.dtype("string"),184                "nullable": False,185                "checks": None,186                "name": "str_series",187            },188        ],189        [190            pd.Series(pd.to_datetime(["20180101", "20180102", "20180103"])),191            {192                "dtype": pandas_engine.Engine.dtype(pa.DateTime),193                "nullable": False,194                "checks": {195                    "greater_than_or_equal_to": pd.Timestamp("20180101"),196                    "less_than_or_equal_to": pd.Timestamp("20180103"),197                },198                "name": None,199            },200        ],201    ],202)203def test_infer_series_schema_statistics(series, expectation) -> None:204    """Test series statistics are correctly inferred."""205    statistics = schema_statistics.infer_series_statistics(series)206    _test_statistics(statistics, expectation)207@pytest.mark.parametrize(208    "null_index, series, expectation",209    [210        *[211            [212                0,213                pd.Series([1, 2, 3], dtype=str(data_type)),214                {215                    # introducing nans to integer arrays upcasts to float216                    "dtype": DEFAULT_FLOAT,217                    "nullable": True,218                    "checks": {219                        "greater_than_or_equal_to": 2,220                        "less_than_or_equal_to": 3,221                    },222                    "name": None,223                },224            ]225            for data_type in INTEGER_TYPES226        ],227        [228            # introducing nans to bool arrays upcasts to float except229            # for pandas >= 1.3.0230            0,231            pd.Series([True, False, True, False]),232            {233                "dtype": (234                    pandas_engine.Engine.dtype(pa.BOOL)235                    if pa.PANDAS_1_3_0_PLUS236                    else DEFAULT_FLOAT237                ),238                "nullable": True,239                "checks": (240                    None241                    if pa.PANDAS_1_3_0_PLUS242                    else {243                        "greater_than_or_equal_to": 0,244                        "less_than_or_equal_to": 1,245                    }246                ),247                "name": None,248            },249        ],250        [251            0,252            pd.Series(["a", "b", "c", "a"], dtype="category"),253            {254                "dtype": pandas_engine.Engine.dtype(pa.Category),255                "nullable": True,256                "checks": {"isin": ["a", "b", "c"]},257                "name": None,258            },259        ],260        [261            0,262            pd.Series(["a", "b", "c", "a"], name="str_series"),263            {264                "dtype": pandas_engine.Engine.dtype(str),265                "nullable": True,266                "checks": None,267                "name": "str_series",268            },269        ],270        [271            2,272            pd.Series(pd.to_datetime(["20180101", "20180102", "20180103"])),273            {274                "dtype": pandas_engine.Engine.dtype(pa.DateTime),275                "nullable": True,276                "checks": {277                    "greater_than_or_equal_to": pd.Timestamp("20180101"),278                    "less_than_or_equal_to": pd.Timestamp("20180102"),279                },280                "name": None,281            },282        ],283    ],284)285def test_infer_nullable_series_schema_statistics(286    null_index, series, expectation287):288    """Test nullable series statistics are correctly inferred."""289    series.iloc[null_index] = None290    statistics = schema_statistics.infer_series_statistics(series)291    _test_statistics(statistics, expectation)292@pytest.mark.parametrize(293    "index, expectation",294    [295        [296            pd.RangeIndex(20),297            [298                {299                    "name": None,300                    "dtype": DEFAULT_INT,301                    "nullable": False,302                    "checks": {303                        "greater_than_or_equal_to": 0,304                        "less_than_or_equal_to": 19,305                    },306                }307            ],308        ],309        [310            pd.Index([1, 2, 3], name="int_index"),311            [312                {313                    "name": "int_index",314                    "dtype": DEFAULT_INT,315                    "nullable": False,316                    "checks": {317                        "greater_than_or_equal_to": 1,318                        "less_than_or_equal_to": 3,319                    },320                }321            ],322        ],323        [324            pd.Index(["foo", "bar", "baz"], name="str_index"),325            [326                {327                    "name": "str_index",328                    "dtype": pandas_engine.Engine.dtype("object"),329                    "nullable": False,330                    "checks": None,331                },332            ],333        ],334        [335            pd.MultiIndex.from_arrays(336                [[10, 11, 12], pd.Series(["a", "b", "c"], dtype="category")],337                names=["int_index", "str_index"],338            ),339            [340                {341                    "name": "int_index",342                    "dtype": DEFAULT_INT,343                    "nullable": False,344                    "checks": {345                        "greater_than_or_equal_to": 10,346                        "less_than_or_equal_to": 12,347                    },348                },349                {350                    "name": "str_index",351                    "dtype": pandas_engine.Engine.dtype(pa.Category),352                    "nullable": False,353                    "checks": {"isin": ["a", "b", "c"]},354                },355            ],356        ],357        # UserWarning cases358        [1, UserWarning],359        ["foo", UserWarning],360        [{"foo": "bar"}, UserWarning],361        [["foo", "bar"], UserWarning],362        [pd.Series(["foo", "bar"]), UserWarning],363        [pd.DataFrame({"column": ["foo", "bar"]}), UserWarning],364    ],365)366def test_infer_index_statistics(index, expectation):367    """Test that index statistics are correctly inferred."""368    if expectation is UserWarning:369        with pytest.warns(UserWarning, match="^index type .+ not recognized"):370            schema_statistics.infer_index_statistics(index)371    else:372        _test_statistics(373            schema_statistics.infer_index_statistics(index), expectation374        )375def test_get_dataframe_schema_statistics():376    """Test that dataframe schema statistics logic is correct."""377    schema = pa.DataFrameSchema(378        columns={379            "int": pa.Column(380                int,381                checks=[382                    pa.Check.greater_than_or_equal_to(0),383                    pa.Check.less_than_or_equal_to(100),384                ],385                nullable=True,386            ),387            "float": pa.Column(388                float,389                checks=[390                    pa.Check.greater_than_or_equal_to(50),391                    pa.Check.less_than_or_equal_to(100),392                ],393            ),394            "str": pa.Column(395                str,396                checks=[pa.Check.isin(["foo", "bar", "baz"])],397            ),398        },399        index=pa.Index(400            int,401            checks=pa.Check.greater_than_or_equal_to(0),402            nullable=False,403            name="int_index",404        ),405    )406    expectation = {407        "checks": None,408        "columns": {409            "int": {410                "dtype": DEFAULT_INT,411                "checks": {412                    "greater_than_or_equal_to": {"min_value": 0},413                    "less_than_or_equal_to": {"max_value": 100},414                },415                "nullable": True,416                "unique": False,417                "coerce": False,418                "required": True,419                "regex": False,420            },421            "float": {422                "dtype": DEFAULT_FLOAT,423                "checks": {424                    "greater_than_or_equal_to": {"min_value": 50},425                    "less_than_or_equal_to": {"max_value": 100},426                },427                "nullable": False,428                "unique": False,429                "coerce": False,430                "required": True,431                "regex": False,432            },433            "str": {434                "dtype": pandas_engine.Engine.dtype(str),435                "checks": {"isin": {"allowed_values": ["foo", "bar", "baz"]}},436                "nullable": False,437                "unique": False,438                "coerce": False,439                "required": True,440                "regex": False,441            },442        },443        "index": [444            {445                "dtype": DEFAULT_INT,446                "checks": {"greater_than_or_equal_to": {"min_value": 0}},447                "nullable": False,448                "coerce": False,449                "name": "int_index",450            }451        ],452        "coerce": False,453    }454    statistics = schema_statistics.get_dataframe_schema_statistics(schema)455    assert statistics == expectation456def test_get_series_schema_statistics():457    """Test that series schema statistics logic is correct."""458    schema = pa.SeriesSchema(459        int,460        nullable=False,461        checks=[462            pa.Check.greater_than_or_equal_to(0),463            pa.Check.less_than_or_equal_to(100),464        ],465    )466    statistics = schema_statistics.get_series_schema_statistics(schema)467    assert statistics == {468        "dtype": pandas_engine.Engine.dtype(int),469        "nullable": False,470        "checks": {471            "greater_than_or_equal_to": {"min_value": 0},472            "less_than_or_equal_to": {"max_value": 100},473        },474        "name": None,475        "coerce": False,476    }477@pytest.mark.parametrize(478    "index_schema_component, expectation",479    [480        [481            pa.Index(482                int,483                checks=[484                    pa.Check.greater_than_or_equal_to(10),485                    pa.Check.less_than_or_equal_to(20),486                ],487                nullable=False,488                name="int_index",489            ),490            [491                {492                    "dtype": pandas_engine.Engine.dtype(int),493                    "nullable": False,494                    "checks": {495                        "greater_than_or_equal_to": {"min_value": 10},496                        "less_than_or_equal_to": {"max_value": 20},497                    },498                    "name": "int_index",499                    "coerce": False,500                }501            ],502        ]503    ],504)505def test_get_index_schema_statistics(index_schema_component, expectation):506    """Test that index schema statistics logic is correct."""507    statistics = schema_statistics.get_index_schema_statistics(508        index_schema_component509    )510    _test_statistics(statistics, expectation)511@pytest.mark.parametrize(512    "checks, expectation",513    [514        *[515            [[check], {check.name: check.statistics}]516            for check in [517                pa.Check.greater_than(1),518                pa.Check.less_than(1),519                pa.Check.in_range(1, 3),520                pa.Check.equal_to(1),521                pa.Check.not_equal_to(1),522                pa.Check.notin([1, 2, 3]),523                pa.Check.str_matches("foobar"),524                pa.Check.str_contains("foobar"),...

test_histogram.py

Source:test_histogram.py

...16    data = [15, 15, 20, 20, 20, 35, 35, 40, 40, 50, 50]17    histogram = traces.Histogram(data)18    normalized = histogram.normalized()19    assert sum(normalized.values()) == 1.020def _test_statistics(normalized):21    data_list = [22        [1, 2, 3, 5, 6, 7],23        [1, 2, 3, 5, 6],24        [1, 1],25        [1, 1, 1, 1, 1, 1, 1, 2],26        [i + 0.25 for i in [1, 1, 1, 1, 1, 1, 1, 2]],27    ]28    for data in data_list:29        histogram = traces.Histogram(data)30        if normalized:31            histogram = histogram.normalized()32            n = 133        else:34            n = len(data)35        nose.tools.assert_almost_equal(histogram.total(), n)36        nose.tools.assert_almost_equal(histogram.mean(), numpy.mean(data))37        nose.tools.assert_almost_equal(histogram.variance(), numpy.var(data))38        nose.tools.assert_almost_equal(39            histogram.standard_deviation(),40            numpy.std(data),41        )42        nose.tools.assert_almost_equal(histogram.max(), numpy.max(data))43        nose.tools.assert_almost_equal(histogram.min(), numpy.min(data))44        nose.tools.assert_almost_equal(45            histogram.quantile(0.5),46            numpy.median(data),47        )48        q_list = [0.001, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 0.999]49        # linear interpolation50        result = histogram.quantiles(q_list)51        reference = stats.mstats.mquantiles(52            data, prob=q_list, alphap=0.5, betap=0.5,53        )54        for i, j in zip(result, reference):55            nose.tools.assert_almost_equal(i, j)56        # make sure ot throw an error for bad quantile values57        try:58            histogram.quantile(-1)59        except ValueError:60            pass61def test_statistics():62    return _test_statistics(True)63def test_normalized_statistics():64    return _test_statistics(False)65def test_quantile_interpolation():66    data = [1, 1, 1, 2, 3, 5, 6, 7]67    histogram = traces.Histogram(data)68    normalized = histogram.normalized()69    q_list = [0.001, 0.01, 0.05, 0.25, 0.5, 0.75, 0.95, 0.99, 0.999]70    # just do the inverse of the emperical cdf71    result = histogram.quantiles(q_list, alpha=0, smallest_count=1)72    answer = [1.0, 1.0, 1.0, 1.0, 2.5, 5.5, 7.0, 7.0, 7.0]73    for i, j in zip(result, answer):74        nose.tools.assert_almost_equal(i, j)75    # same thing with normalized76    result = normalized.quantiles(77        q_list, alpha=0, smallest_count=1.0 / len(data))78    for i, j in zip(result, answer):...

Automation Testing Tutorials

Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.