Best Python code snippet using pandera_python
test_json_table_schema_ext_dtype.py
Source:test_json_table_schema_ext_dtype.py
1"""Tests for ExtensionDtype Table Schema integration."""2from collections import OrderedDict3import datetime as dt4import decimal5import json6import pytest7from pandas import (8 DataFrame,9 array,10)11from pandas.core.arrays.integer import Int64Dtype12from pandas.core.arrays.string_ import StringDtype13from pandas.core.series import Series14from pandas.tests.extension.date import (15 DateArray,16 DateDtype,17)18from pandas.tests.extension.decimal.array import (19 DecimalArray,20 DecimalDtype,21)22from pandas.io.json._table_schema import (23 as_json_table_type,24 build_table_schema,25)26class TestBuildSchema:27 def setup_method(self, method):28 self.da = DateArray([dt.date(2021, 10, 10)])29 self.dc = DecimalArray([decimal.Decimal(10)])30 self.sa = array(["pandas"], dtype="string")31 self.ia = array([10], dtype="Int64")32 self.df = DataFrame(33 {34 "A": self.da,35 "B": self.dc,36 "C": self.sa,37 "D": self.ia,38 }39 )40 def test_build_table_schema(self):41 result = build_table_schema(self.df, version=False)42 expected = {43 "fields": [44 {"name": "index", "type": "integer"},45 {"name": "A", "type": "any", "extDtype": "DateDtype"},46 {"name": "B", "type": "any", "extDtype": "decimal"},47 {"name": "C", "type": "any", "extDtype": "string"},48 {"name": "D", "type": "integer", "extDtype": "Int64"},49 ],50 "primaryKey": ["index"],51 }52 assert result == expected53 result = build_table_schema(self.df)54 assert "pandas_version" in result55class TestTableSchemaType:56 @pytest.mark.parametrize(57 "date_data",58 [59 DateArray([dt.date(2021, 10, 10)]),60 DateArray(dt.date(2021, 10, 10)),61 Series(DateArray(dt.date(2021, 10, 10))),62 ],63 )64 def test_as_json_table_type_ext_date_array_dtype(self, date_data):65 assert as_json_table_type(date_data.dtype) == "any"66 def test_as_json_table_type_ext_date_dtype(self):67 assert as_json_table_type(DateDtype()) == "any"68 @pytest.mark.parametrize(69 "decimal_data",70 [71 DecimalArray([decimal.Decimal(10)]),72 Series(DecimalArray([decimal.Decimal(10)])),73 ],74 )75 def test_as_json_table_type_ext_decimal_array_dtype(self, decimal_data):76 assert as_json_table_type(decimal_data.dtype) == "any"77 def test_as_json_table_type_ext_decimal_dtype(self):78 assert as_json_table_type(DecimalDtype()) == "any"79 @pytest.mark.parametrize(80 "string_data",81 [82 array(["pandas"], dtype="string"),83 Series(array(["pandas"], dtype="string")),84 ],85 )86 def test_as_json_table_type_ext_string_array_dtype(self, string_data):87 assert as_json_table_type(string_data.dtype) == "any"88 def test_as_json_table_type_ext_string_dtype(self):89 assert as_json_table_type(StringDtype()) == "any"90 @pytest.mark.parametrize(91 "integer_data",92 [93 array([10], dtype="Int64"),94 Series(array([10], dtype="Int64")),95 ],96 )97 def test_as_json_table_type_ext_integer_array_dtype(self, integer_data):98 assert as_json_table_type(integer_data.dtype) == "integer"99 def test_as_json_table_type_ext_integer_dtype(self):100 assert as_json_table_type(Int64Dtype()) == "integer"101class TestTableOrient:102 def setup_method(self, method):103 self.da = DateArray([dt.date(2021, 10, 10)])104 self.dc = DecimalArray([decimal.Decimal(10)])105 self.sa = array(["pandas"], dtype="string")106 self.ia = array([10], dtype="Int64")107 self.df = DataFrame(108 {109 "A": self.da,110 "B": self.dc,111 "C": self.sa,112 "D": self.ia,113 }114 )115 def test_build_date_series(self):116 s = Series(self.da, name="a")117 s.index.name = "id"118 result = s.to_json(orient="table", date_format="iso")119 result = json.loads(result, object_pairs_hook=OrderedDict)120 assert "pandas_version" in result["schema"]121 result["schema"].pop("pandas_version")122 fields = [123 {"name": "id", "type": "integer"},124 {"name": "a", "type": "any", "extDtype": "DateDtype"},125 ]126 schema = {"fields": fields, "primaryKey": ["id"]}127 expected = OrderedDict(128 [129 ("schema", schema),130 ("data", [OrderedDict([("id", 0), ("a", "2021-10-10T00:00:00.000Z")])]),131 ]132 )133 assert result == expected134 def test_build_decimal_series(self):135 s = Series(self.dc, name="a")136 s.index.name = "id"137 result = s.to_json(orient="table", date_format="iso")138 result = json.loads(result, object_pairs_hook=OrderedDict)139 assert "pandas_version" in result["schema"]140 result["schema"].pop("pandas_version")141 fields = [142 {"name": "id", "type": "integer"},143 {"name": "a", "type": "any", "extDtype": "decimal"},144 ]145 schema = {"fields": fields, "primaryKey": ["id"]}146 expected = OrderedDict(147 [148 ("schema", schema),149 ("data", [OrderedDict([("id", 0), ("a", 10.0)])]),150 ]151 )152 assert result == expected153 def test_build_string_series(self):154 s = Series(self.sa, name="a")155 s.index.name = "id"156 result = s.to_json(orient="table", date_format="iso")157 result = json.loads(result, object_pairs_hook=OrderedDict)158 assert "pandas_version" in result["schema"]159 result["schema"].pop("pandas_version")160 fields = [161 {"name": "id", "type": "integer"},162 {"name": "a", "type": "any", "extDtype": "string"},163 ]164 schema = {"fields": fields, "primaryKey": ["id"]}165 expected = OrderedDict(166 [167 ("schema", schema),168 ("data", [OrderedDict([("id", 0), ("a", "pandas")])]),169 ]170 )171 assert result == expected172 def test_build_int64_series(self):173 s = Series(self.ia, name="a")174 s.index.name = "id"175 result = s.to_json(orient="table", date_format="iso")176 result = json.loads(result, object_pairs_hook=OrderedDict)177 assert "pandas_version" in result["schema"]178 result["schema"].pop("pandas_version")179 fields = [180 {"name": "id", "type": "integer"},181 {"name": "a", "type": "integer", "extDtype": "Int64"},182 ]183 schema = {"fields": fields, "primaryKey": ["id"]}184 expected = OrderedDict(185 [186 ("schema", schema),187 ("data", [OrderedDict([("id", 0), ("a", 10)])]),188 ]189 )190 assert result == expected191 def test_to_json(self):192 df = self.df.copy()193 df.index.name = "idx"194 result = df.to_json(orient="table", date_format="iso")195 result = json.loads(result, object_pairs_hook=OrderedDict)196 assert "pandas_version" in result["schema"]197 result["schema"].pop("pandas_version")198 fields = [199 OrderedDict({"name": "idx", "type": "integer"}),200 OrderedDict({"name": "A", "type": "any", "extDtype": "DateDtype"}),201 OrderedDict({"name": "B", "type": "any", "extDtype": "decimal"}),202 OrderedDict({"name": "C", "type": "any", "extDtype": "string"}),203 OrderedDict({"name": "D", "type": "integer", "extDtype": "Int64"}),204 ]205 schema = OrderedDict({"fields": fields, "primaryKey": ["idx"]})206 data = [207 OrderedDict(208 [209 ("idx", 0),210 ("A", "2021-10-10T00:00:00.000Z"),211 ("B", 10.0),212 ("C", "pandas"),213 ("D", 10),214 ]215 )216 ]217 expected = OrderedDict([("schema", schema), ("data", data)])...
_compat.py
Source:_compat.py
1import string2import numpy as np3import pandas as pd4from packaging.version import parse as parse_version5PANDAS_VERSION = parse_version(pd.__version__)6PANDAS_GT_104 = PANDAS_VERSION >= parse_version("1.0.4")7PANDAS_GT_110 = PANDAS_VERSION >= parse_version("1.1.0")8PANDAS_GT_120 = PANDAS_VERSION >= parse_version("1.2.0")9PANDAS_GT_121 = PANDAS_VERSION >= parse_version("1.2.1")10PANDAS_GT_130 = PANDAS_VERSION >= parse_version("1.3.0")11PANDAS_GT_131 = PANDAS_VERSION >= parse_version("1.3.1")12PANDAS_GT_133 = PANDAS_VERSION >= parse_version("1.3.3")13PANDAS_GT_140 = PANDAS_VERSION >= parse_version("1.4.0")14# FIXME: Using `.release` below as versions like `1.5.0.dev0+268.gbe8d1ec880`15# are less than `1.5.0` with `packaging.version`. Update to use `parse_version("1.5.0")`16# below once `pandas=1.5.0` is released17PANDAS_GT_150 = PANDAS_VERSION.release >= (1, 5, 0)18import pandas.testing as tm19def assert_categorical_equal(left, right, *args, **kwargs):20 tm.assert_extension_array_equal(left, right, *args, **kwargs)21 assert pd.api.types.is_categorical_dtype(22 left.dtype23 ), f"{left} is not categorical dtype"24 assert pd.api.types.is_categorical_dtype(25 right.dtype26 ), f"{right} is not categorical dtype"27def assert_numpy_array_equal(left, right):28 left_na = pd.isna(left)29 right_na = pd.isna(right)30 np.testing.assert_array_equal(left_na, right_na)31 left_valid = left[~left_na]32 right_valid = right[~right_na]33 np.testing.assert_array_equal(left_valid, right_valid)34def makeDataFrame():35 data = np.random.randn(30, 4)36 index = list(string.ascii_letters)[:30]37 return pd.DataFrame(data, index=index, columns=list("ABCD"))38def makeTimeDataFrame():39 data = makeDataFrame()40 data.index = makeDateIndex()41 return data42def makeTimeSeries():43 return makeTimeDataFrame()["A"]44def makeDateIndex(k=30, freq="B"):45 return pd.date_range("2000", periods=k, freq=freq)46def makeTimedeltaIndex(k=30, freq="D"):47 return pd.timedelta_range("1 day", periods=k, freq=freq)48def makeMissingDataframe():49 df = makeDataFrame()50 data = df.values51 data = np.where(data > 1, np.nan, data)52 return pd.DataFrame(data, index=df.index, columns=df.columns)53def makeMixedDataFrame():54 df = pd.DataFrame(55 {56 "A": [0.0, 1, 2, 3, 4],57 "B": [0.0, 1, 0, 1, 0],58 "C": [f"foo{i}" for i in range(5)],59 "D": pd.date_range("2009-01-01", periods=5),60 }61 )...
_utils.py
Source:_utils.py
1import datetime as dt2from distutils.version import LooseVersion3import pandas as pd4from pandas.core.common import PandasError5from pandas import to_datetime6import requests7from requests_file import FileAdapter8import requests_ftp9requests_ftp.monkeypatch_session()10if pd.compat.PY3:11 from urllib.error import HTTPError # noqa12else:13 from urllib2 import HTTPError # noqa14PANDAS_VERSION = LooseVersion(pd.__version__)15if PANDAS_VERSION >= LooseVersion('0.19.0'):16 PANDAS_0190 = True17 from pandas.api.types import is_number # noqa18else:19 PANDAS_0190 = False20 from pandas.core.common import is_number # noqa21if PANDAS_VERSION >= LooseVersion('0.17.0'):22 PANDAS_0170 = True23else:24 PANDAS_0170 = False25if PANDAS_VERSION >= LooseVersion('0.16.0'):26 PANDAS_0160 = True27else:28 PANDAS_0160 = False29if PANDAS_VERSION >= LooseVersion('0.14.0'):30 PANDAS_0140 = True31else:32 PANDAS_0140 = False33class SymbolWarning(UserWarning):34 pass35class RemoteDataError(PandasError, IOError):36 pass37def _sanitize_dates(start, end):38 """39 Return (datetime_start, datetime_end) tuple40 if start is None - default is 2010/01/0141 if end is None - default is today42 """43 if is_number(start):44 # regard int as year45 start = dt.datetime(start, 1, 1)46 start = to_datetime(start)47 if is_number(end):48 end = dt.datetime(end, 1, 1)49 end = to_datetime(end)50 if start is None:51 start = dt.datetime(2010, 1, 1)52 if end is None:53 end = dt.datetime.today()54 return start, end55def _init_session(session, retry_count=3):56 if session is None:57 session = requests.Session()58 session.mount('file://', FileAdapter())59 # do not set requests max_retries here to support arbitrary pause...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!