Best Python code snippet using behave
test_store.py
Source:test_store.py
1import datetime2from datetime import timedelta3from distutils.version import LooseVersion4import hashlib5from io import BytesIO6import os7from pathlib import Path8import re9import time10from warnings import catch_warnings, simplefilter11import numpy as np12import pytest13from pandas.compat import is_platform_little_endian, is_platform_windows14import pandas.util._test_decorators as td15import pandas as pd16from pandas import (17 Categorical,18 CategoricalIndex,19 DataFrame,20 DatetimeIndex,21 Index,22 Int64Index,23 MultiIndex,24 RangeIndex,25 Series,26 Timestamp,27 bdate_range,28 concat,29 date_range,30 isna,31 timedelta_range,32)33import pandas._testing as tm34from pandas.tests.io.pytables.common import (35 _maybe_remove,36 create_tempfile,37 ensure_clean_path,38 ensure_clean_store,39 safe_close,40 safe_remove,41 tables,42)43from pandas.io.pytables import (44 ClosedFileError,45 HDFStore,46 PossibleDataLossError,47 Term,48 read_hdf,49)50from pandas.io import pytables as pytables # noqa: E402 isort:skip51from pandas.io.pytables import TableIterator # noqa: E402 isort:skip52_default_compressor = "blosc"53ignore_natural_naming_warning = pytest.mark.filterwarnings(54 "ignore:object name:tables.exceptions.NaturalNameWarning"55)56@pytest.mark.single57class TestHDFStore:58 def test_format_type(self, setup_path):59 df = pd.DataFrame({"A": [1, 2]})60 with ensure_clean_path(setup_path) as path:61 with HDFStore(path) as store:62 store.put("a", df, format="fixed")63 store.put("b", df, format="table")64 assert store.get_storer("a").format_type == "fixed"65 assert store.get_storer("b").format_type == "table"66 def test_format_kwarg_in_constructor(self, setup_path):67 # GH 1329168 msg = "format is not a defined argument for HDFStore"69 with ensure_clean_path(setup_path) as path:70 with pytest.raises(ValueError, match=msg):71 HDFStore(path, format="table")72 def test_context(self, setup_path):73 path = create_tempfile(setup_path)74 try:75 with HDFStore(path) as tbl:76 raise ValueError("blah")77 except ValueError:78 pass79 finally:80 safe_remove(path)81 try:82 with HDFStore(path) as tbl:83 tbl["a"] = tm.makeDataFrame()84 with HDFStore(path) as tbl:85 assert len(tbl) == 186 assert type(tbl["a"]) == DataFrame87 finally:88 safe_remove(path)89 def test_conv_read_write(self, setup_path):90 path = create_tempfile(setup_path)91 try:92 def roundtrip(key, obj, **kwargs):93 obj.to_hdf(path, key, **kwargs)94 return read_hdf(path, key)95 o = tm.makeTimeSeries()96 tm.assert_series_equal(o, roundtrip("series", o))97 o = tm.makeStringSeries()98 tm.assert_series_equal(o, roundtrip("string_series", o))99 o = tm.makeDataFrame()100 tm.assert_frame_equal(o, roundtrip("frame", o))101 # table102 df = DataFrame(dict(A=range(5), B=range(5)))103 df.to_hdf(path, "table", append=True)104 result = read_hdf(path, "table", where=["index>2"])105 tm.assert_frame_equal(df[df.index > 2], result)106 finally:107 safe_remove(path)108 def test_long_strings(self, setup_path):109 # GH6166110 df = DataFrame(111 {"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)112 )113 with ensure_clean_store(setup_path) as store:114 store.append("df", df, data_columns=["a"])115 result = store.select("df")116 tm.assert_frame_equal(df, result)117 def test_api(self, setup_path):118 # GH4584119 # API issue when to_hdf doesn't accept append AND format args120 with ensure_clean_path(setup_path) as path:121 df = tm.makeDataFrame()122 df.iloc[:10].to_hdf(path, "df", append=True, format="table")123 df.iloc[10:].to_hdf(path, "df", append=True, format="table")124 tm.assert_frame_equal(read_hdf(path, "df"), df)125 # append to False126 df.iloc[:10].to_hdf(path, "df", append=False, format="table")127 df.iloc[10:].to_hdf(path, "df", append=True, format="table")128 tm.assert_frame_equal(read_hdf(path, "df"), df)129 with ensure_clean_path(setup_path) as path:130 df = tm.makeDataFrame()131 df.iloc[:10].to_hdf(path, "df", append=True)132 df.iloc[10:].to_hdf(path, "df", append=True, format="table")133 tm.assert_frame_equal(read_hdf(path, "df"), df)134 # append to False135 df.iloc[:10].to_hdf(path, "df", append=False, format="table")136 df.iloc[10:].to_hdf(path, "df", append=True)137 tm.assert_frame_equal(read_hdf(path, "df"), df)138 with ensure_clean_path(setup_path) as path:139 df = tm.makeDataFrame()140 df.to_hdf(path, "df", append=False, format="fixed")141 tm.assert_frame_equal(read_hdf(path, "df"), df)142 df.to_hdf(path, "df", append=False, format="f")143 tm.assert_frame_equal(read_hdf(path, "df"), df)144 df.to_hdf(path, "df", append=False)145 tm.assert_frame_equal(read_hdf(path, "df"), df)146 df.to_hdf(path, "df")147 tm.assert_frame_equal(read_hdf(path, "df"), df)148 with ensure_clean_store(setup_path) as store:149 path = store._path150 df = tm.makeDataFrame()151 _maybe_remove(store, "df")152 store.append("df", df.iloc[:10], append=True, format="table")153 store.append("df", df.iloc[10:], append=True, format="table")154 tm.assert_frame_equal(store.select("df"), df)155 # append to False156 _maybe_remove(store, "df")157 store.append("df", df.iloc[:10], append=False, format="table")158 store.append("df", df.iloc[10:], append=True, format="table")159 tm.assert_frame_equal(store.select("df"), df)160 # formats161 _maybe_remove(store, "df")162 store.append("df", df.iloc[:10], append=False, format="table")163 store.append("df", df.iloc[10:], append=True, format="table")164 tm.assert_frame_equal(store.select("df"), df)165 _maybe_remove(store, "df")166 store.append("df", df.iloc[:10], append=False, format="table")167 store.append("df", df.iloc[10:], append=True, format=None)168 tm.assert_frame_equal(store.select("df"), df)169 with ensure_clean_path(setup_path) as path:170 # Invalid.171 df = tm.makeDataFrame()172 msg = "Can only append to Tables"173 with pytest.raises(ValueError, match=msg):174 df.to_hdf(path, "df", append=True, format="f")175 with pytest.raises(ValueError, match=msg):176 df.to_hdf(path, "df", append=True, format="fixed")177 msg = r"invalid HDFStore format specified \[foo\]"178 with pytest.raises(TypeError, match=msg):179 df.to_hdf(path, "df", append=True, format="foo")180 with pytest.raises(TypeError, match=msg):181 df.to_hdf(path, "df", append=False, format="foo")182 # File path doesn't exist183 path = ""184 msg = f"File {path} does not exist"185 with pytest.raises(FileNotFoundError, match=msg):186 read_hdf(path, "df")187 def test_api_default_format(self, setup_path):188 # default_format option189 with ensure_clean_store(setup_path) as store:190 df = tm.makeDataFrame()191 pd.set_option("io.hdf.default_format", "fixed")192 _maybe_remove(store, "df")193 store.put("df", df)194 assert not store.get_storer("df").is_table195 msg = "Can only append to Tables"196 with pytest.raises(ValueError, match=msg):197 store.append("df2", df)198 pd.set_option("io.hdf.default_format", "table")199 _maybe_remove(store, "df")200 store.put("df", df)201 assert store.get_storer("df").is_table202 _maybe_remove(store, "df2")203 store.append("df2", df)204 assert store.get_storer("df").is_table205 pd.set_option("io.hdf.default_format", None)206 with ensure_clean_path(setup_path) as path:207 df = tm.makeDataFrame()208 pd.set_option("io.hdf.default_format", "fixed")209 df.to_hdf(path, "df")210 with HDFStore(path) as store:211 assert not store.get_storer("df").is_table212 with pytest.raises(ValueError, match=msg):213 df.to_hdf(path, "df2", append=True)214 pd.set_option("io.hdf.default_format", "table")215 df.to_hdf(path, "df3")216 with HDFStore(path) as store:217 assert store.get_storer("df3").is_table218 df.to_hdf(path, "df4", append=True)219 with HDFStore(path) as store:220 assert store.get_storer("df4").is_table221 pd.set_option("io.hdf.default_format", None)222 def test_keys(self, setup_path):223 with ensure_clean_store(setup_path) as store:224 store["a"] = tm.makeTimeSeries()225 store["b"] = tm.makeStringSeries()226 store["c"] = tm.makeDataFrame()227 assert len(store) == 3228 expected = {"/a", "/b", "/c"}229 assert set(store.keys()) == expected230 assert set(store) == expected231 def test_no_track_times(self, setup_path):232 # GH 32682233 # enables to set track_times (see `pytables` `create_table` documentation)234 def checksum(filename, hash_factory=hashlib.md5, chunk_num_blocks=128):235 h = hash_factory()236 with open(filename, "rb") as f:237 for chunk in iter(lambda: f.read(chunk_num_blocks * h.block_size), b""):238 h.update(chunk)239 return h.digest()240 def create_h5_and_return_checksum(track_times):241 with ensure_clean_path(setup_path) as path:242 df = pd.DataFrame({"a": [1]})243 with pd.HDFStore(path, mode="w") as hdf:244 hdf.put(245 "table",246 df,247 format="table",248 data_columns=True,249 index=None,250 track_times=track_times,251 )252 return checksum(path)253 checksum_0_tt_false = create_h5_and_return_checksum(track_times=False)254 checksum_0_tt_true = create_h5_and_return_checksum(track_times=True)255 # sleep is necessary to create h5 with different creation time256 time.sleep(1)257 checksum_1_tt_false = create_h5_and_return_checksum(track_times=False)258 checksum_1_tt_true = create_h5_and_return_checksum(track_times=True)259 # checksums are the same if track_time = False260 assert checksum_0_tt_false == checksum_1_tt_false261 # checksums are NOT same if track_time = True262 assert checksum_0_tt_true != checksum_1_tt_true263 def test_non_pandas_keys(self, setup_path):264 class Table1(tables.IsDescription):265 value1 = tables.Float32Col()266 class Table2(tables.IsDescription):267 value2 = tables.Float32Col()268 class Table3(tables.IsDescription):269 value3 = tables.Float32Col()270 with ensure_clean_path(setup_path) as path:271 with tables.open_file(path, mode="w") as h5file:272 group = h5file.create_group("/", "group")273 h5file.create_table(group, "table1", Table1, "Table 1")274 h5file.create_table(group, "table2", Table2, "Table 2")275 h5file.create_table(group, "table3", Table3, "Table 3")276 with HDFStore(path) as store:277 assert len(store.keys(include="native")) == 3278 expected = {"/group/table1", "/group/table2", "/group/table3"}279 assert set(store.keys(include="native")) == expected280 assert set(store.keys(include="pandas")) == set()281 for name in expected:282 df = store.get(name)283 assert len(df.columns) == 1284 def test_keys_illegal_include_keyword_value(self, setup_path):285 with ensure_clean_store(setup_path) as store:286 with pytest.raises(287 ValueError,288 match="`include` should be either 'pandas' or 'native' "289 "but is 'illegal'",290 ):291 store.keys(include="illegal")292 def test_keys_ignore_hdf_softlink(self, setup_path):293 # GH 20523294 # Puts a softlink into HDF file and rereads295 with ensure_clean_store(setup_path) as store:296 df = DataFrame(dict(A=range(5), B=range(5)))297 store.put("df", df)298 assert store.keys() == ["/df"]299 store._handle.create_soft_link(store._handle.root, "symlink", "df")300 # Should ignore the softlink301 assert store.keys() == ["/df"]302 def test_iter_empty(self, setup_path):303 with ensure_clean_store(setup_path) as store:304 # GH 12221305 assert list(store) == []306 def test_repr(self, setup_path):307 with ensure_clean_store(setup_path) as store:308 repr(store)309 store.info()310 store["a"] = tm.makeTimeSeries()311 store["b"] = tm.makeStringSeries()312 store["c"] = tm.makeDataFrame()313 df = tm.makeDataFrame()314 df["obj1"] = "foo"315 df["obj2"] = "bar"316 df["bool1"] = df["A"] > 0317 df["bool2"] = df["B"] > 0318 df["bool3"] = True319 df["int1"] = 1320 df["int2"] = 2321 df["timestamp1"] = Timestamp("20010102")322 df["timestamp2"] = Timestamp("20010103")323 df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)324 df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)325 df.loc[df.index[3:6], ["obj1"]] = np.nan326 df = df._consolidate()._convert(datetime=True)327 with catch_warnings(record=True):328 simplefilter("ignore", pd.errors.PerformanceWarning)329 store["df"] = df330 # make a random group in hdf space331 store._handle.create_group(store._handle.root, "bah")332 assert store.filename in repr(store)333 assert store.filename in str(store)334 store.info()335 # storers336 with ensure_clean_store(setup_path) as store:337 df = tm.makeDataFrame()338 store.append("df", df)339 s = store.get_storer("df")340 repr(s)341 str(s)342 @ignore_natural_naming_warning343 def test_contains(self, setup_path):344 with ensure_clean_store(setup_path) as store:345 store["a"] = tm.makeTimeSeries()346 store["b"] = tm.makeDataFrame()347 store["foo/bar"] = tm.makeDataFrame()348 assert "a" in store349 assert "b" in store350 assert "c" not in store351 assert "foo/bar" in store352 assert "/foo/bar" in store353 assert "/foo/b" not in store354 assert "bar" not in store355 # gh-2694: tables.NaturalNameWarning356 with catch_warnings(record=True):357 store["node())"] = tm.makeDataFrame()358 assert "node())" in store359 def test_versioning(self, setup_path):360 with ensure_clean_store(setup_path) as store:361 store["a"] = tm.makeTimeSeries()362 store["b"] = tm.makeDataFrame()363 df = tm.makeTimeDataFrame()364 _maybe_remove(store, "df1")365 store.append("df1", df[:10])366 store.append("df1", df[10:])367 assert store.root.a._v_attrs.pandas_version == "0.15.2"368 assert store.root.b._v_attrs.pandas_version == "0.15.2"369 assert store.root.df1._v_attrs.pandas_version == "0.15.2"370 # write a file and wipe its versioning371 _maybe_remove(store, "df2")372 store.append("df2", df)373 # this is an error because its table_type is appendable, but no374 # version info375 store.get_node("df2")._v_attrs.pandas_version = None376 msg = "'NoneType' object has no attribute 'startswith'"377 with pytest.raises(Exception, match=msg):378 store.select("df2")379 def test_mode(self, setup_path):380 df = tm.makeTimeDataFrame()381 def check(mode):382 with ensure_clean_path(setup_path) as path:383 # constructor384 if mode in ["r", "r+"]:385 with pytest.raises(IOError):386 HDFStore(path, mode=mode)387 else:388 store = HDFStore(path, mode=mode)389 assert store._handle.mode == mode390 store.close()391 with ensure_clean_path(setup_path) as path:392 # context393 if mode in ["r", "r+"]:394 with pytest.raises(IOError):395 with HDFStore(path, mode=mode) as store: # noqa396 pass397 else:398 with HDFStore(path, mode=mode) as store:399 assert store._handle.mode == mode400 with ensure_clean_path(setup_path) as path:401 # conv write402 if mode in ["r", "r+"]:403 with pytest.raises(IOError):404 df.to_hdf(path, "df", mode=mode)405 df.to_hdf(path, "df", mode="w")406 else:407 df.to_hdf(path, "df", mode=mode)408 # conv read409 if mode in ["w"]:410 msg = (411 "mode w is not allowed while performing a read. "412 r"Allowed modes are r, r\+ and a."413 )414 with pytest.raises(ValueError, match=msg):415 read_hdf(path, "df", mode=mode)416 else:417 result = read_hdf(path, "df", mode=mode)418 tm.assert_frame_equal(result, df)419 def check_default_mode():420 # read_hdf uses default mode421 with ensure_clean_path(setup_path) as path:422 df.to_hdf(path, "df", mode="w")423 result = read_hdf(path, "df")424 tm.assert_frame_equal(result, df)425 check("r")426 check("r+")427 check("a")428 check("w")429 check_default_mode()430 def test_reopen_handle(self, setup_path):431 with ensure_clean_path(setup_path) as path:432 store = HDFStore(path, mode="a")433 store["a"] = tm.makeTimeSeries()434 # invalid mode change435 with pytest.raises(PossibleDataLossError):436 store.open("w")437 store.close()438 assert not store.is_open439 # truncation ok here440 store.open("w")441 assert store.is_open442 assert len(store) == 0443 store.close()444 assert not store.is_open445 store = HDFStore(path, mode="a")446 store["a"] = tm.makeTimeSeries()447 # reopen as read448 store.open("r")449 assert store.is_open450 assert len(store) == 1451 assert store._mode == "r"452 store.close()453 assert not store.is_open454 # reopen as append455 store.open("a")456 assert store.is_open457 assert len(store) == 1458 assert store._mode == "a"459 store.close()460 assert not store.is_open461 # reopen as append (again)462 store.open("a")463 assert store.is_open464 assert len(store) == 1465 assert store._mode == "a"466 store.close()467 assert not store.is_open468 def test_open_args(self, setup_path):469 with ensure_clean_path(setup_path) as path:470 df = tm.makeDataFrame()471 # create an in memory store472 store = HDFStore(473 path, mode="a", driver="H5FD_CORE", driver_core_backing_store=0474 )475 store["df"] = df476 store.append("df2", df)477 tm.assert_frame_equal(store["df"], df)478 tm.assert_frame_equal(store["df2"], df)479 store.close()480 # the file should not have actually been written481 assert not os.path.exists(path)482 def test_flush(self, setup_path):483 with ensure_clean_store(setup_path) as store:484 store["a"] = tm.makeTimeSeries()485 store.flush()486 store.flush(fsync=True)487 def test_get(self, setup_path):488 with ensure_clean_store(setup_path) as store:489 store["a"] = tm.makeTimeSeries()490 left = store.get("a")491 right = store["a"]492 tm.assert_series_equal(left, right)493 left = store.get("/a")494 right = store["/a"]495 tm.assert_series_equal(left, right)496 with pytest.raises(KeyError, match="'No object named b in the file'"):497 store.get("b")498 @pytest.mark.parametrize(499 "where, expected",500 [501 (502 "/",503 {504 "": ({"first_group", "second_group"}, set()),505 "/first_group": (set(), {"df1", "df2"}),506 "/second_group": ({"third_group"}, {"df3", "s1"}),507 "/second_group/third_group": (set(), {"df4"}),508 },509 ),510 (511 "/second_group",512 {513 "/second_group": ({"third_group"}, {"df3", "s1"}),514 "/second_group/third_group": (set(), {"df4"}),515 },516 ),517 ],518 )519 def test_walk(self, where, expected, setup_path):520 # GH10143521 objs = {522 "df1": pd.DataFrame([1, 2, 3]),523 "df2": pd.DataFrame([4, 5, 6]),524 "df3": pd.DataFrame([6, 7, 8]),525 "df4": pd.DataFrame([9, 10, 11]),526 "s1": pd.Series([10, 9, 8]),527 # Next 3 items aren't pandas objects and should be ignored528 "a1": np.array([[1, 2, 3], [4, 5, 6]]),529 "tb1": np.array([(1, 2, 3), (4, 5, 6)], dtype="i,i,i"),530 "tb2": np.array([(7, 8, 9), (10, 11, 12)], dtype="i,i,i"),531 }532 with ensure_clean_store("walk_groups.hdf", mode="w") as store:533 store.put("/first_group/df1", objs["df1"])534 store.put("/first_group/df2", objs["df2"])535 store.put("/second_group/df3", objs["df3"])536 store.put("/second_group/s1", objs["s1"])537 store.put("/second_group/third_group/df4", objs["df4"])538 # Create non-pandas objects539 store._handle.create_array("/first_group", "a1", objs["a1"])540 store._handle.create_table("/first_group", "tb1", obj=objs["tb1"])541 store._handle.create_table("/second_group", "tb2", obj=objs["tb2"])542 assert len(list(store.walk(where=where))) == len(expected)543 for path, groups, leaves in store.walk(where=where):544 assert path in expected545 expected_groups, expected_frames = expected[path]546 assert expected_groups == set(groups)547 assert expected_frames == set(leaves)548 for leaf in leaves:549 frame_path = "/".join([path, leaf])550 obj = store.get(frame_path)551 if "df" in leaf:552 tm.assert_frame_equal(obj, objs[leaf])553 else:554 tm.assert_series_equal(obj, objs[leaf])555 def test_getattr(self, setup_path):556 with ensure_clean_store(setup_path) as store:557 s = tm.makeTimeSeries()558 store["a"] = s559 # test attribute access560 result = store.a561 tm.assert_series_equal(result, s)562 result = getattr(store, "a")563 tm.assert_series_equal(result, s)564 df = tm.makeTimeDataFrame()565 store["df"] = df566 result = store.df567 tm.assert_frame_equal(result, df)568 # errors569 for x in ["d", "mode", "path", "handle", "complib"]:570 with pytest.raises(AttributeError):571 getattr(store, x)572 # not stores573 for x in ["mode", "path", "handle", "complib"]:574 getattr(store, f"_{x}")575 def test_put(self, setup_path):576 with ensure_clean_store(setup_path) as store:577 ts = tm.makeTimeSeries()578 df = tm.makeTimeDataFrame()579 store["a"] = ts580 store["b"] = df[:10]581 store["foo/bar/bah"] = df[:10]582 store["foo"] = df[:10]583 store["/foo"] = df[:10]584 store.put("c", df[:10], format="table")585 # not OK, not a table586 with pytest.raises(ValueError):587 store.put("b", df[10:], append=True)588 # node does not currently exist, test _is_table_type returns False589 # in this case590 _maybe_remove(store, "f")591 with pytest.raises(ValueError):592 store.put("f", df[10:], append=True)593 # can't put to a table (use append instead)594 with pytest.raises(ValueError):595 store.put("c", df[10:], append=True)596 # overwrite table597 store.put("c", df[:10], format="table", append=False)598 tm.assert_frame_equal(df[:10], store["c"])599 def test_put_string_index(self, setup_path):600 with ensure_clean_store(setup_path) as store:601 index = Index([f"I am a very long string index: {i}" for i in range(20)])602 s = Series(np.arange(20), index=index)603 df = DataFrame({"A": s, "B": s})604 store["a"] = s605 tm.assert_series_equal(store["a"], s)606 store["b"] = df607 tm.assert_frame_equal(store["b"], df)608 # mixed length609 index = Index(610 ["abcdefghijklmnopqrstuvwxyz1234567890"]611 + [f"I am a very long string index: {i}" for i in range(20)]612 )613 s = Series(np.arange(21), index=index)614 df = DataFrame({"A": s, "B": s})615 store["a"] = s616 tm.assert_series_equal(store["a"], s)617 store["b"] = df618 tm.assert_frame_equal(store["b"], df)619 def test_put_compression(self, setup_path):620 with ensure_clean_store(setup_path) as store:621 df = tm.makeTimeDataFrame()622 store.put("c", df, format="table", complib="zlib")623 tm.assert_frame_equal(store["c"], df)624 # can't compress if format='fixed'625 with pytest.raises(ValueError):626 store.put("b", df, format="fixed", complib="zlib")627 @td.skip_if_windows_python_3628 def test_put_compression_blosc(self, setup_path):629 df = tm.makeTimeDataFrame()630 with ensure_clean_store(setup_path) as store:631 # can't compress if format='fixed'632 with pytest.raises(ValueError):633 store.put("b", df, format="fixed", complib="blosc")634 store.put("c", df, format="table", complib="blosc")635 tm.assert_frame_equal(store["c"], df)636 def test_complibs_default_settings(self, setup_path):637 # GH15943638 df = tm.makeDataFrame()639 # Set complevel and check if complib is automatically set to640 # default value641 with ensure_clean_path(setup_path) as tmpfile:642 df.to_hdf(tmpfile, "df", complevel=9)643 result = pd.read_hdf(tmpfile, "df")644 tm.assert_frame_equal(result, df)645 with tables.open_file(tmpfile, mode="r") as h5file:646 for node in h5file.walk_nodes(where="/df", classname="Leaf"):647 assert node.filters.complevel == 9648 assert node.filters.complib == "zlib"649 # Set complib and check to see if compression is disabled650 with ensure_clean_path(setup_path) as tmpfile:651 df.to_hdf(tmpfile, "df", complib="zlib")652 result = pd.read_hdf(tmpfile, "df")653 tm.assert_frame_equal(result, df)654 with tables.open_file(tmpfile, mode="r") as h5file:655 for node in h5file.walk_nodes(where="/df", classname="Leaf"):656 assert node.filters.complevel == 0657 assert node.filters.complib is None658 # Check if not setting complib or complevel results in no compression659 with ensure_clean_path(setup_path) as tmpfile:660 df.to_hdf(tmpfile, "df")661 result = pd.read_hdf(tmpfile, "df")662 tm.assert_frame_equal(result, df)663 with tables.open_file(tmpfile, mode="r") as h5file:664 for node in h5file.walk_nodes(where="/df", classname="Leaf"):665 assert node.filters.complevel == 0666 assert node.filters.complib is None667 # Check if file-defaults can be overridden on a per table basis668 with ensure_clean_path(setup_path) as tmpfile:669 store = pd.HDFStore(tmpfile)670 store.append("dfc", df, complevel=9, complib="blosc")671 store.append("df", df)672 store.close()673 with tables.open_file(tmpfile, mode="r") as h5file:674 for node in h5file.walk_nodes(where="/df", classname="Leaf"):675 assert node.filters.complevel == 0676 assert node.filters.complib is None677 for node in h5file.walk_nodes(where="/dfc", classname="Leaf"):678 assert node.filters.complevel == 9679 assert node.filters.complib == "blosc"680 def test_complibs(self, setup_path):681 # GH14478682 df = tm.makeDataFrame()683 # Building list of all complibs and complevels tuples684 all_complibs = tables.filters.all_complibs685 # Remove lzo if its not available on this platform686 if not tables.which_lib_version("lzo"):687 all_complibs.remove("lzo")688 # Remove bzip2 if its not available on this platform689 if not tables.which_lib_version("bzip2"):690 all_complibs.remove("bzip2")691 all_levels = range(0, 10)692 all_tests = [(lib, lvl) for lib in all_complibs for lvl in all_levels]693 for (lib, lvl) in all_tests:694 with ensure_clean_path(setup_path) as tmpfile:695 gname = "foo"696 # Write and read file to see if data is consistent697 df.to_hdf(tmpfile, gname, complib=lib, complevel=lvl)698 result = pd.read_hdf(tmpfile, gname)699 tm.assert_frame_equal(result, df)700 # Open file and check metadata701 # for correct amount of compression702 h5table = tables.open_file(tmpfile, mode="r")703 for node in h5table.walk_nodes(where="/" + gname, classname="Leaf"):704 assert node.filters.complevel == lvl705 if lvl == 0:706 assert node.filters.complib is None707 else:708 assert node.filters.complib == lib709 h5table.close()710 def test_put_integer(self, setup_path):711 # non-date, non-string index712 df = DataFrame(np.random.randn(50, 100))713 self._check_roundtrip(df, tm.assert_frame_equal, setup_path)714 @td.xfail_non_writeable715 def test_put_mixed_type(self, setup_path):716 df = tm.makeTimeDataFrame()717 df["obj1"] = "foo"718 df["obj2"] = "bar"719 df["bool1"] = df["A"] > 0720 df["bool2"] = df["B"] > 0721 df["bool3"] = True722 df["int1"] = 1723 df["int2"] = 2724 df["timestamp1"] = Timestamp("20010102")725 df["timestamp2"] = Timestamp("20010103")726 df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)727 df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)728 df.loc[df.index[3:6], ["obj1"]] = np.nan729 df = df._consolidate()._convert(datetime=True)730 with ensure_clean_store(setup_path) as store:731 _maybe_remove(store, "df")732 # PerformanceWarning733 with catch_warnings(record=True):734 simplefilter("ignore", pd.errors.PerformanceWarning)735 store.put("df", df)736 expected = store.get("df")737 tm.assert_frame_equal(expected, df)738 @pytest.mark.filterwarnings(739 "ignore:object name:tables.exceptions.NaturalNameWarning"740 )741 def test_append(self, setup_path):742 with ensure_clean_store(setup_path) as store:743 # this is allowed by almost always don't want to do it744 # tables.NaturalNameWarning):745 with catch_warnings(record=True):746 df = tm.makeTimeDataFrame()747 _maybe_remove(store, "df1")748 store.append("df1", df[:10])749 store.append("df1", df[10:])750 tm.assert_frame_equal(store["df1"], df)751 _maybe_remove(store, "df2")752 store.put("df2", df[:10], format="table")753 store.append("df2", df[10:])754 tm.assert_frame_equal(store["df2"], df)755 _maybe_remove(store, "df3")756 store.append("/df3", df[:10])757 store.append("/df3", df[10:])758 tm.assert_frame_equal(store["df3"], df)759 # this is allowed by almost always don't want to do it760 # tables.NaturalNameWarning761 _maybe_remove(store, "/df3 foo")762 store.append("/df3 foo", df[:10])763 store.append("/df3 foo", df[10:])764 tm.assert_frame_equal(store["df3 foo"], df)765 # dtype issues - mizxed type in a single object column766 df = DataFrame(data=[[1, 2], [0, 1], [1, 2], [0, 0]])767 df["mixed_column"] = "testing"768 df.loc[2, "mixed_column"] = np.nan769 _maybe_remove(store, "df")770 store.append("df", df)771 tm.assert_frame_equal(store["df"], df)772 # uints - test storage of uints773 uint_data = DataFrame(774 {775 "u08": Series(776 np.random.randint(0, high=255, size=5), dtype=np.uint8777 ),778 "u16": Series(779 np.random.randint(0, high=65535, size=5), dtype=np.uint16780 ),781 "u32": Series(782 np.random.randint(0, high=2 ** 30, size=5), dtype=np.uint32783 ),784 "u64": Series(785 [2 ** 58, 2 ** 59, 2 ** 60, 2 ** 61, 2 ** 62],786 dtype=np.uint64,787 ),788 },789 index=np.arange(5),790 )791 _maybe_remove(store, "uints")792 store.append("uints", uint_data)793 tm.assert_frame_equal(store["uints"], uint_data)794 # uints - test storage of uints in indexable columns795 _maybe_remove(store, "uints")796 # 64-bit indices not yet supported797 store.append("uints", uint_data, data_columns=["u08", "u16", "u32"])798 tm.assert_frame_equal(store["uints"], uint_data)799 def test_append_series(self, setup_path):800 with ensure_clean_store(setup_path) as store:801 # basic802 ss = tm.makeStringSeries()803 ts = tm.makeTimeSeries()804 ns = Series(np.arange(100))805 store.append("ss", ss)806 result = store["ss"]807 tm.assert_series_equal(result, ss)808 assert result.name is None809 store.append("ts", ts)810 result = store["ts"]811 tm.assert_series_equal(result, ts)812 assert result.name is None813 ns.name = "foo"814 store.append("ns", ns)815 result = store["ns"]816 tm.assert_series_equal(result, ns)817 assert result.name == ns.name818 # select on the values819 expected = ns[ns > 60]820 result = store.select("ns", "foo>60")821 tm.assert_series_equal(result, expected)822 # select on the index and values823 expected = ns[(ns > 70) & (ns.index < 90)]824 result = store.select("ns", "foo>70 and index<90")825 tm.assert_series_equal(result, expected)826 # multi-index827 mi = DataFrame(np.random.randn(5, 1), columns=["A"])828 mi["B"] = np.arange(len(mi))829 mi["C"] = "foo"830 mi.loc[3:5, "C"] = "bar"831 mi.set_index(["C", "B"], inplace=True)832 s = mi.stack()833 s.index = s.index.droplevel(2)834 store.append("mi", s)835 tm.assert_series_equal(store["mi"], s)836 def test_store_index_types(self, setup_path):837 # GH5386838 # test storing various index types839 with ensure_clean_store(setup_path) as store:840 def check(format, index):841 df = DataFrame(np.random.randn(10, 2), columns=list("AB"))842 df.index = index(len(df))843 _maybe_remove(store, "df")844 store.put("df", df, format=format)845 tm.assert_frame_equal(df, store["df"])846 for index in [847 tm.makeFloatIndex,848 tm.makeStringIndex,849 tm.makeIntIndex,850 tm.makeDateIndex,851 ]:852 check("table", index)853 check("fixed", index)854 # period index currently broken for table855 # seee GH7796 FIXME856 check("fixed", tm.makePeriodIndex)857 # check('table',tm.makePeriodIndex)858 # unicode859 index = tm.makeUnicodeIndex860 check("table", index)861 check("fixed", index)862 @pytest.mark.skipif(863 not is_platform_little_endian(), reason="reason platform is not little endian"864 )865 def test_encoding(self, setup_path):866 with ensure_clean_store(setup_path) as store:867 df = DataFrame(dict(A="foo", B="bar"), index=range(5))868 df.loc[2, "A"] = np.nan869 df.loc[3, "B"] = np.nan870 _maybe_remove(store, "df")871 store.append("df", df, encoding="ascii")872 tm.assert_frame_equal(store["df"], df)873 expected = df.reindex(columns=["A"])874 result = store.select("df", Term("columns=A", encoding="ascii"))875 tm.assert_frame_equal(result, expected)876 @pytest.mark.parametrize(877 "val",878 [879 [b"E\xc9, 17", b"", b"a", b"b", b"c"],880 [b"E\xc9, 17", b"a", b"b", b"c"],881 [b"EE, 17", b"", b"a", b"b", b"c"],882 [b"E\xc9, 17", b"\xf8\xfc", b"a", b"b", b"c"],883 [b"", b"a", b"b", b"c"],884 [b"\xf8\xfc", b"a", b"b", b"c"],885 [b"A\xf8\xfc", b"", b"a", b"b", b"c"],886 [np.nan, b"", b"b", b"c"],887 [b"A\xf8\xfc", np.nan, b"", b"b", b"c"],888 ],889 )890 @pytest.mark.parametrize("dtype", ["category", object])891 def test_latin_encoding(self, setup_path, dtype, val):892 enc = "latin-1"893 nan_rep = ""894 key = "data"895 val = [x.decode(enc) if isinstance(x, bytes) else x for x in val]896 ser = pd.Series(val, dtype=dtype)897 with ensure_clean_path(setup_path) as store:898 ser.to_hdf(store, key, format="table", encoding=enc, nan_rep=nan_rep)899 retr = read_hdf(store, key)900 s_nan = ser.replace(nan_rep, np.nan)901 tm.assert_series_equal(s_nan, retr)902 def test_append_some_nans(self, setup_path):903 with ensure_clean_store(setup_path) as store:904 df = DataFrame(905 {906 "A": Series(np.random.randn(20)).astype("int32"),907 "A1": np.random.randn(20),908 "A2": np.random.randn(20),909 "B": "foo",910 "C": "bar",911 "D": Timestamp("20010101"),912 "E": datetime.datetime(2001, 1, 2, 0, 0),913 },914 index=np.arange(20),915 )916 # some nans917 _maybe_remove(store, "df1")918 df.loc[0:15, ["A1", "B", "D", "E"]] = np.nan919 store.append("df1", df[:10])920 store.append("df1", df[10:])921 tm.assert_frame_equal(store["df1"], df)922 # first column923 df1 = df.copy()924 df1.loc[:, "A1"] = np.nan925 _maybe_remove(store, "df1")926 store.append("df1", df1[:10])927 store.append("df1", df1[10:])928 tm.assert_frame_equal(store["df1"], df1)929 # 2nd column930 df2 = df.copy()931 df2.loc[:, "A2"] = np.nan932 _maybe_remove(store, "df2")933 store.append("df2", df2[:10])934 store.append("df2", df2[10:])935 tm.assert_frame_equal(store["df2"], df2)936 # datetimes937 df3 = df.copy()938 df3.loc[:, "E"] = np.nan939 _maybe_remove(store, "df3")940 store.append("df3", df3[:10])941 store.append("df3", df3[10:])942 tm.assert_frame_equal(store["df3"], df3)943 def test_append_all_nans(self, setup_path):944 with ensure_clean_store(setup_path) as store:945 df = DataFrame(946 {"A1": np.random.randn(20), "A2": np.random.randn(20)},947 index=np.arange(20),948 )949 df.loc[0:15, :] = np.nan950 # nan some entire rows (dropna=True)951 _maybe_remove(store, "df")952 store.append("df", df[:10], dropna=True)953 store.append("df", df[10:], dropna=True)954 tm.assert_frame_equal(store["df"], df[-4:])955 # nan some entire rows (dropna=False)956 _maybe_remove(store, "df2")957 store.append("df2", df[:10], dropna=False)958 store.append("df2", df[10:], dropna=False)959 tm.assert_frame_equal(store["df2"], df)960 # tests the option io.hdf.dropna_table961 pd.set_option("io.hdf.dropna_table", False)962 _maybe_remove(store, "df3")963 store.append("df3", df[:10])964 store.append("df3", df[10:])965 tm.assert_frame_equal(store["df3"], df)966 pd.set_option("io.hdf.dropna_table", True)967 _maybe_remove(store, "df4")968 store.append("df4", df[:10])969 store.append("df4", df[10:])970 tm.assert_frame_equal(store["df4"], df[-4:])971 # nan some entire rows (string are still written!)972 df = DataFrame(973 {974 "A1": np.random.randn(20),975 "A2": np.random.randn(20),976 "B": "foo",977 "C": "bar",978 },979 index=np.arange(20),980 )981 df.loc[0:15, :] = np.nan982 _maybe_remove(store, "df")983 store.append("df", df[:10], dropna=True)984 store.append("df", df[10:], dropna=True)985 tm.assert_frame_equal(store["df"], df)986 _maybe_remove(store, "df2")987 store.append("df2", df[:10], dropna=False)988 store.append("df2", df[10:], dropna=False)989 tm.assert_frame_equal(store["df2"], df)990 # nan some entire rows (but since we have dates they are still991 # written!)992 df = DataFrame(993 {994 "A1": np.random.randn(20),995 "A2": np.random.randn(20),996 "B": "foo",997 "C": "bar",998 "D": Timestamp("20010101"),999 "E": datetime.datetime(2001, 1, 2, 0, 0),1000 },1001 index=np.arange(20),1002 )1003 df.loc[0:15, :] = np.nan1004 _maybe_remove(store, "df")1005 store.append("df", df[:10], dropna=True)1006 store.append("df", df[10:], dropna=True)1007 tm.assert_frame_equal(store["df"], df)1008 _maybe_remove(store, "df2")1009 store.append("df2", df[:10], dropna=False)1010 store.append("df2", df[10:], dropna=False)1011 tm.assert_frame_equal(store["df2"], df)1012 # Test to make sure defaults are to not drop.1013 # Corresponding to Issue 93821014 df_with_missing = DataFrame(1015 {"col1": [0, np.nan, 2], "col2": [1, np.nan, np.nan]}1016 )1017 with ensure_clean_path(setup_path) as path:1018 df_with_missing.to_hdf(path, "df_with_missing", format="table")1019 reloaded = read_hdf(path, "df_with_missing")1020 tm.assert_frame_equal(df_with_missing, reloaded)1021 def test_read_missing_key_close_store(self, setup_path):1022 # GH 257661023 with ensure_clean_path(setup_path) as path:1024 df = pd.DataFrame({"a": range(2), "b": range(2)})1025 df.to_hdf(path, "k1")1026 with pytest.raises(KeyError, match="'No object named k2 in the file'"):1027 pd.read_hdf(path, "k2")1028 # smoke test to test that file is properly closed after1029 # read with KeyError before another write1030 df.to_hdf(path, "k2")1031 def test_read_missing_key_opened_store(self, setup_path):1032 # GH 286991033 with ensure_clean_path(setup_path) as path:1034 df = pd.DataFrame({"a": range(2), "b": range(2)})1035 df.to_hdf(path, "k1")1036 with pd.HDFStore(path, "r") as store:1037 with pytest.raises(KeyError, match="'No object named k2 in the file'"):1038 pd.read_hdf(store, "k2")1039 # Test that the file is still open after a KeyError and that we can1040 # still read from it.1041 pd.read_hdf(store, "k1")1042 def test_append_frame_column_oriented(self, setup_path):1043 with ensure_clean_store(setup_path) as store:1044 # column oriented1045 df = tm.makeTimeDataFrame()1046 df.index = df.index._with_freq(None) # freq doesnt round-trip1047 _maybe_remove(store, "df1")1048 store.append("df1", df.iloc[:, :2], axes=["columns"])1049 store.append("df1", df.iloc[:, 2:])1050 tm.assert_frame_equal(store["df1"], df)1051 result = store.select("df1", "columns=A")1052 expected = df.reindex(columns=["A"])1053 tm.assert_frame_equal(expected, result)1054 # selection on the non-indexable1055 result = store.select("df1", ("columns=A", "index=df.index[0:4]"))1056 expected = df.reindex(columns=["A"], index=df.index[0:4])1057 tm.assert_frame_equal(expected, result)1058 # this isn't supported1059 with pytest.raises(TypeError):1060 store.select("df1", "columns=A and index>df.index[4]")1061 def test_append_with_different_block_ordering(self, setup_path):1062 # GH 4096; using same frames, but different block orderings1063 with ensure_clean_store(setup_path) as store:1064 for i in range(10):1065 df = DataFrame(np.random.randn(10, 2), columns=list("AB"))1066 df["index"] = range(10)1067 df["index"] += i * 101068 df["int64"] = Series([1] * len(df), dtype="int64")1069 df["int16"] = Series([1] * len(df), dtype="int16")1070 if i % 2 == 0:1071 del df["int64"]1072 df["int64"] = Series([1] * len(df), dtype="int64")1073 if i % 3 == 0:1074 a = df.pop("A")1075 df["A"] = a1076 df.set_index("index", inplace=True)1077 store.append("df", df)1078 # test a different ordering but with more fields (like invalid1079 # combinate)1080 with ensure_clean_store(setup_path) as store:1081 df = DataFrame(np.random.randn(10, 2), columns=list("AB"), dtype="float64")1082 df["int64"] = Series([1] * len(df), dtype="int64")1083 df["int16"] = Series([1] * len(df), dtype="int16")1084 store.append("df", df)1085 # store additional fields in different blocks1086 df["int16_2"] = Series([1] * len(df), dtype="int16")1087 with pytest.raises(ValueError):1088 store.append("df", df)1089 # store multiple additional fields in different blocks1090 df["float_3"] = Series([1.0] * len(df), dtype="float64")1091 with pytest.raises(ValueError):1092 store.append("df", df)1093 def test_append_with_strings(self, setup_path):1094 with ensure_clean_store(setup_path) as store:1095 with catch_warnings(record=True):1096 def check_col(key, name, size):1097 assert (1098 getattr(store.get_storer(key).table.description, name).itemsize1099 == size1100 )1101 # avoid truncation on elements1102 df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]])1103 store.append("df_big", df)1104 tm.assert_frame_equal(store.select("df_big"), df)1105 check_col("df_big", "values_block_1", 15)1106 # appending smaller string ok1107 df2 = DataFrame([[124, "asdqy"], [346, "dggnhefbdfb"]])1108 store.append("df_big", df2)1109 expected = concat([df, df2])1110 tm.assert_frame_equal(store.select("df_big"), expected)1111 check_col("df_big", "values_block_1", 15)1112 # avoid truncation on elements1113 df = DataFrame([[123, "asdqwerty"], [345, "dggnhebbsdfbdfb"]])1114 store.append("df_big2", df, min_itemsize={"values": 50})1115 tm.assert_frame_equal(store.select("df_big2"), df)1116 check_col("df_big2", "values_block_1", 50)1117 # bigger string on next append1118 store.append("df_new", df)1119 df_new = DataFrame(1120 [[124, "abcdefqhij"], [346, "abcdefghijklmnopqrtsuvwxyz"]]1121 )1122 with pytest.raises(ValueError):1123 store.append("df_new", df_new)1124 # min_itemsize on Series index (GH 11412)1125 df = tm.makeMixedDataFrame().set_index("C")1126 store.append("ss", df["B"], min_itemsize={"index": 4})1127 tm.assert_series_equal(store.select("ss"), df["B"])1128 # same as above, with data_columns=True1129 store.append(1130 "ss2", df["B"], data_columns=True, min_itemsize={"index": 4}1131 )1132 tm.assert_series_equal(store.select("ss2"), df["B"])1133 # min_itemsize in index without appending (GH 10381)1134 store.put("ss3", df, format="table", min_itemsize={"index": 6})1135 # just make sure there is a longer string:1136 df2 = df.copy().reset_index().assign(C="longer").set_index("C")1137 store.append("ss3", df2)1138 tm.assert_frame_equal(store.select("ss3"), pd.concat([df, df2]))1139 # same as above, with a Series1140 store.put("ss4", df["B"], format="table", min_itemsize={"index": 6})1141 store.append("ss4", df2["B"])1142 tm.assert_series_equal(1143 store.select("ss4"), pd.concat([df["B"], df2["B"]])1144 )1145 # with nans1146 _maybe_remove(store, "df")1147 df = tm.makeTimeDataFrame()1148 df["string"] = "foo"1149 df.loc[df.index[1:4], "string"] = np.nan1150 df["string2"] = "bar"1151 df.loc[df.index[4:8], "string2"] = np.nan1152 df["string3"] = "bah"1153 df.loc[df.index[1:], "string3"] = np.nan1154 store.append("df", df)1155 result = store.select("df")1156 tm.assert_frame_equal(result, df)1157 with ensure_clean_store(setup_path) as store:1158 def check_col(key, name, size):1159 assert getattr(1160 store.get_storer(key).table.description, name1161 ).itemsize, size1162 df = DataFrame(dict(A="foo", B="bar"), index=range(10))1163 # a min_itemsize that creates a data_column1164 _maybe_remove(store, "df")1165 store.append("df", df, min_itemsize={"A": 200})1166 check_col("df", "A", 200)1167 assert store.get_storer("df").data_columns == ["A"]1168 # a min_itemsize that creates a data_column21169 _maybe_remove(store, "df")1170 store.append("df", df, data_columns=["B"], min_itemsize={"A": 200})1171 check_col("df", "A", 200)1172 assert store.get_storer("df").data_columns == ["B", "A"]1173 # a min_itemsize that creates a data_column21174 _maybe_remove(store, "df")1175 store.append("df", df, data_columns=["B"], min_itemsize={"values": 200})1176 check_col("df", "B", 200)1177 check_col("df", "values_block_0", 200)1178 assert store.get_storer("df").data_columns == ["B"]1179 # infer the .typ on subsequent appends1180 _maybe_remove(store, "df")1181 store.append("df", df[:5], min_itemsize=200)1182 store.append("df", df[5:], min_itemsize=200)1183 tm.assert_frame_equal(store["df"], df)1184 # invalid min_itemsize keys1185 df = DataFrame(["foo", "foo", "foo", "barh", "barh", "barh"], columns=["A"])1186 _maybe_remove(store, "df")1187 with pytest.raises(ValueError):1188 store.append("df", df, min_itemsize={"foo": 20, "foobar": 20})1189 def test_append_with_empty_string(self, setup_path):1190 with ensure_clean_store(setup_path) as store:1191 # with all empty strings (GH 12242)1192 df = DataFrame({"x": ["a", "b", "c", "d", "e", "f", ""]})1193 store.append("df", df[:-1], min_itemsize={"x": 1})1194 store.append("df", df[-1:], min_itemsize={"x": 1})1195 tm.assert_frame_equal(store.select("df"), df)1196 def test_to_hdf_with_min_itemsize(self, setup_path):1197 with ensure_clean_path(setup_path) as path:1198 # min_itemsize in index with to_hdf (GH 10381)1199 df = tm.makeMixedDataFrame().set_index("C")1200 df.to_hdf(path, "ss3", format="table", min_itemsize={"index": 6})1201 # just make sure there is a longer string:1202 df2 = df.copy().reset_index().assign(C="longer").set_index("C")1203 df2.to_hdf(path, "ss3", append=True, format="table")1204 tm.assert_frame_equal(pd.read_hdf(path, "ss3"), pd.concat([df, df2]))1205 # same as above, with a Series1206 df["B"].to_hdf(path, "ss4", format="table", min_itemsize={"index": 6})1207 df2["B"].to_hdf(path, "ss4", append=True, format="table")1208 tm.assert_series_equal(1209 pd.read_hdf(path, "ss4"), pd.concat([df["B"], df2["B"]])1210 )1211 @pytest.mark.parametrize(1212 "format", [pytest.param("fixed", marks=td.xfail_non_writeable), "table"]1213 )1214 def test_to_hdf_errors(self, format, setup_path):1215 data = ["\ud800foo"]1216 ser = pd.Series(data, index=pd.Index(data))1217 with ensure_clean_path(setup_path) as path:1218 # GH 208351219 ser.to_hdf(path, "table", format=format, errors="surrogatepass")1220 result = pd.read_hdf(path, "table", errors="surrogatepass")1221 tm.assert_series_equal(result, ser)1222 def test_append_with_data_columns(self, setup_path):1223 with ensure_clean_store(setup_path) as store:1224 df = tm.makeTimeDataFrame()1225 df.iloc[0, df.columns.get_loc("B")] = 1.01226 _maybe_remove(store, "df")1227 store.append("df", df[:2], data_columns=["B"])1228 store.append("df", df[2:])1229 tm.assert_frame_equal(store["df"], df)1230 # check that we have indices created1231 assert store._handle.root.df.table.cols.index.is_indexed is True1232 assert store._handle.root.df.table.cols.B.is_indexed is True1233 # data column searching1234 result = store.select("df", "B>0")1235 expected = df[df.B > 0]1236 tm.assert_frame_equal(result, expected)1237 # data column searching (with an indexable and a data_columns)1238 result = store.select("df", "B>0 and index>df.index[3]")1239 df_new = df.reindex(index=df.index[4:])1240 expected = df_new[df_new.B > 0]1241 tm.assert_frame_equal(result, expected)1242 # data column selection with a string data_column1243 df_new = df.copy()1244 df_new["string"] = "foo"1245 df_new.loc[df_new.index[1:4], "string"] = np.nan1246 df_new.loc[df_new.index[5:6], "string"] = "bar"1247 _maybe_remove(store, "df")1248 store.append("df", df_new, data_columns=["string"])1249 result = store.select("df", "string='foo'")1250 expected = df_new[df_new.string == "foo"]1251 tm.assert_frame_equal(result, expected)1252 # using min_itemsize and a data column1253 def check_col(key, name, size):1254 assert (1255 getattr(store.get_storer(key).table.description, name).itemsize1256 == size1257 )1258 with ensure_clean_store(setup_path) as store:1259 _maybe_remove(store, "df")1260 store.append(1261 "df", df_new, data_columns=["string"], min_itemsize={"string": 30}1262 )1263 check_col("df", "string", 30)1264 _maybe_remove(store, "df")1265 store.append("df", df_new, data_columns=["string"], min_itemsize=30)1266 check_col("df", "string", 30)1267 _maybe_remove(store, "df")1268 store.append(1269 "df", df_new, data_columns=["string"], min_itemsize={"values": 30}1270 )1271 check_col("df", "string", 30)1272 with ensure_clean_store(setup_path) as store:1273 df_new["string2"] = "foobarbah"1274 df_new["string_block1"] = "foobarbah1"1275 df_new["string_block2"] = "foobarbah2"1276 _maybe_remove(store, "df")1277 store.append(1278 "df",1279 df_new,1280 data_columns=["string", "string2"],1281 min_itemsize={"string": 30, "string2": 40, "values": 50},1282 )1283 check_col("df", "string", 30)1284 check_col("df", "string2", 40)1285 check_col("df", "values_block_1", 50)1286 with ensure_clean_store(setup_path) as store:1287 # multiple data columns1288 df_new = df.copy()1289 df_new.iloc[0, df_new.columns.get_loc("A")] = 1.01290 df_new.iloc[0, df_new.columns.get_loc("B")] = -1.01291 df_new["string"] = "foo"1292 sl = df_new.columns.get_loc("string")1293 df_new.iloc[1:4, sl] = np.nan1294 df_new.iloc[5:6, sl] = "bar"1295 df_new["string2"] = "foo"1296 sl = df_new.columns.get_loc("string2")1297 df_new.iloc[2:5, sl] = np.nan1298 df_new.iloc[7:8, sl] = "bar"1299 _maybe_remove(store, "df")1300 store.append("df", df_new, data_columns=["A", "B", "string", "string2"])1301 result = store.select(1302 "df", "string='foo' and string2='foo' and A>0 and B<0"1303 )1304 expected = df_new[1305 (df_new.string == "foo")1306 & (df_new.string2 == "foo")1307 & (df_new.A > 0)1308 & (df_new.B < 0)1309 ]1310 tm.assert_frame_equal(1311 result, expected, check_index_type=False, check_freq=False1312 )1313 # yield an empty frame1314 result = store.select("df", "string='foo' and string2='cool'")1315 expected = df_new[(df_new.string == "foo") & (df_new.string2 == "cool")]1316 tm.assert_frame_equal(1317 result, expected, check_index_type=False, check_freq=False1318 )1319 with ensure_clean_store(setup_path) as store:1320 # doc example1321 df_dc = df.copy()1322 df_dc["string"] = "foo"1323 df_dc.loc[df_dc.index[4:6], "string"] = np.nan1324 df_dc.loc[df_dc.index[7:9], "string"] = "bar"1325 df_dc["string2"] = "cool"1326 df_dc["datetime"] = Timestamp("20010102")1327 df_dc = df_dc._convert(datetime=True)1328 df_dc.loc[df_dc.index[3:5], ["A", "B", "datetime"]] = np.nan1329 _maybe_remove(store, "df_dc")1330 store.append(1331 "df_dc", df_dc, data_columns=["B", "C", "string", "string2", "datetime"]1332 )1333 result = store.select("df_dc", "B>0")1334 expected = df_dc[df_dc.B > 0]1335 tm.assert_frame_equal(1336 result, expected, check_index_type=False, check_freq=False1337 )1338 result = store.select("df_dc", ["B > 0", "C > 0", "string == foo"])1339 expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]1340 tm.assert_frame_equal(1341 result, expected, check_index_type=False, check_freq=False1342 )1343 # FIXME: 2020-05-07 freq check randomly fails in the CI1344 with ensure_clean_store(setup_path) as store:1345 # doc example part 21346 np.random.seed(1234)1347 index = date_range("1/1/2000", periods=8)1348 df_dc = DataFrame(1349 np.random.randn(8, 3), index=index, columns=["A", "B", "C"]1350 )1351 df_dc["string"] = "foo"1352 df_dc.loc[df_dc.index[4:6], "string"] = np.nan1353 df_dc.loc[df_dc.index[7:9], "string"] = "bar"1354 df_dc.loc[:, ["B", "C"]] = df_dc.loc[:, ["B", "C"]].abs()1355 df_dc["string2"] = "cool"1356 # on-disk operations1357 store.append("df_dc", df_dc, data_columns=["B", "C", "string", "string2"])1358 result = store.select("df_dc", "B>0")1359 expected = df_dc[df_dc.B > 0]1360 tm.assert_frame_equal(result, expected)1361 result = store.select("df_dc", ["B > 0", "C > 0", 'string == "foo"'])1362 expected = df_dc[(df_dc.B > 0) & (df_dc.C > 0) & (df_dc.string == "foo")]1363 tm.assert_frame_equal(result, expected)1364 def test_create_table_index(self, setup_path):1365 with ensure_clean_store(setup_path) as store:1366 with catch_warnings(record=True):1367 def col(t, column):1368 return getattr(store.get_storer(t).table.cols, column)1369 # data columns1370 df = tm.makeTimeDataFrame()1371 df["string"] = "foo"1372 df["string2"] = "bar"1373 store.append("f", df, data_columns=["string", "string2"])1374 assert col("f", "index").is_indexed is True1375 assert col("f", "string").is_indexed is True1376 assert col("f", "string2").is_indexed is True1377 # specify index=columns1378 store.append(1379 "f2", df, index=["string"], data_columns=["string", "string2"]1380 )1381 assert col("f2", "index").is_indexed is False1382 assert col("f2", "string").is_indexed is True1383 assert col("f2", "string2").is_indexed is False1384 # try to index a non-table1385 _maybe_remove(store, "f2")1386 store.put("f2", df)1387 with pytest.raises(TypeError):1388 store.create_table_index("f2")1389 def test_create_table_index_data_columns_argument(self, setup_path):1390 # GH 281561391 with ensure_clean_store(setup_path) as store:1392 with catch_warnings(record=True):1393 def col(t, column):1394 return getattr(store.get_storer(t).table.cols, column)1395 # data columns1396 df = tm.makeTimeDataFrame()1397 df["string"] = "foo"1398 df["string2"] = "bar"1399 store.append("f", df, data_columns=["string"])1400 assert col("f", "index").is_indexed is True1401 assert col("f", "string").is_indexed is True1402 msg = "'Cols' object has no attribute 'string2'"1403 with pytest.raises(AttributeError, match=msg):1404 col("f", "string2").is_indexed1405 # try to index a col which isn't a data_column1406 msg = (1407 f"column string2 is not a data_column.\n"1408 f"In order to read column string2 you must reload the dataframe \n"1409 f"into HDFStore and include string2 with the data_columns argument."1410 )1411 with pytest.raises(AttributeError, match=msg):1412 store.create_table_index("f", columns=["string2"])1413 def test_append_hierarchical(self, setup_path):1414 index = MultiIndex(1415 levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],1416 codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],1417 names=["foo", "bar"],1418 )1419 df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])1420 with ensure_clean_store(setup_path) as store:1421 store.append("mi", df)1422 result = store.select("mi")1423 tm.assert_frame_equal(result, df)1424 # GH 37481425 result = store.select("mi", columns=["A", "B"])1426 expected = df.reindex(columns=["A", "B"])1427 tm.assert_frame_equal(result, expected)1428 with ensure_clean_path("test.hdf") as path:1429 df.to_hdf(path, "df", format="table")1430 result = read_hdf(path, "df", columns=["A", "B"])1431 expected = df.reindex(columns=["A", "B"])1432 tm.assert_frame_equal(result, expected)1433 def test_column_multiindex(self, setup_path):1434 # GH 47101435 # recreate multi-indexes properly1436 index = MultiIndex.from_tuples(1437 [("A", "a"), ("A", "b"), ("B", "a"), ("B", "b")], names=["first", "second"]1438 )1439 df = DataFrame(np.arange(12).reshape(3, 4), columns=index)1440 expected = df.copy()1441 if isinstance(expected.index, RangeIndex):1442 expected.index = Int64Index(expected.index)1443 with ensure_clean_store(setup_path) as store:1444 store.put("df", df)1445 tm.assert_frame_equal(1446 store["df"], expected, check_index_type=True, check_column_type=True1447 )1448 store.put("df1", df, format="table")1449 tm.assert_frame_equal(1450 store["df1"], expected, check_index_type=True, check_column_type=True1451 )1452 with pytest.raises(ValueError):1453 store.put("df2", df, format="table", data_columns=["A"])1454 with pytest.raises(ValueError):1455 store.put("df3", df, format="table", data_columns=True)1456 # appending multi-column on existing table (see GH 6167)1457 with ensure_clean_store(setup_path) as store:1458 store.append("df2", df)1459 store.append("df2", df)1460 tm.assert_frame_equal(store["df2"], concat((df, df)))1461 # non_index_axes name1462 df = DataFrame(1463 np.arange(12).reshape(3, 4), columns=Index(list("ABCD"), name="foo")1464 )1465 expected = df.copy()1466 if isinstance(expected.index, RangeIndex):1467 expected.index = Int64Index(expected.index)1468 with ensure_clean_store(setup_path) as store:1469 store.put("df1", df, format="table")1470 tm.assert_frame_equal(1471 store["df1"], expected, check_index_type=True, check_column_type=True1472 )1473 def test_store_multiindex(self, setup_path):1474 # validate multi-index names1475 # GH 55271476 with ensure_clean_store(setup_path) as store:1477 def make_index(names=None):1478 return MultiIndex.from_tuples(1479 [1480 (datetime.datetime(2013, 12, d), s, t)1481 for d in range(1, 3)1482 for s in range(2)1483 for t in range(3)1484 ],1485 names=names,1486 )1487 # no names1488 _maybe_remove(store, "df")1489 df = DataFrame(np.zeros((12, 2)), columns=["a", "b"], index=make_index())1490 store.append("df", df)1491 tm.assert_frame_equal(store.select("df"), df)1492 # partial names1493 _maybe_remove(store, "df")1494 df = DataFrame(1495 np.zeros((12, 2)),1496 columns=["a", "b"],1497 index=make_index(["date", None, None]),1498 )1499 store.append("df", df)1500 tm.assert_frame_equal(store.select("df"), df)1501 # series1502 _maybe_remove(store, "s")1503 s = Series(np.zeros(12), index=make_index(["date", None, None]))1504 store.append("s", s)1505 xp = Series(np.zeros(12), index=make_index(["date", "level_1", "level_2"]))1506 tm.assert_series_equal(store.select("s"), xp)1507 # dup with column1508 _maybe_remove(store, "df")1509 df = DataFrame(1510 np.zeros((12, 2)),1511 columns=["a", "b"],1512 index=make_index(["date", "a", "t"]),1513 )1514 with pytest.raises(ValueError):1515 store.append("df", df)1516 # dup within level1517 _maybe_remove(store, "df")1518 df = DataFrame(1519 np.zeros((12, 2)),1520 columns=["a", "b"],1521 index=make_index(["date", "date", "date"]),1522 )1523 with pytest.raises(ValueError):1524 store.append("df", df)1525 # fully names1526 _maybe_remove(store, "df")1527 df = DataFrame(1528 np.zeros((12, 2)),1529 columns=["a", "b"],1530 index=make_index(["date", "s", "t"]),1531 )1532 store.append("df", df)1533 tm.assert_frame_equal(store.select("df"), df)1534 def test_select_columns_in_where(self, setup_path):1535 # GH 61691536 # recreate multi-indexes when columns is passed1537 # in the `where` argument1538 index = MultiIndex(1539 levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],1540 codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],1541 names=["foo_name", "bar_name"],1542 )1543 # With a DataFrame1544 df = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])1545 with ensure_clean_store(setup_path) as store:1546 store.put("df", df, format="table")1547 expected = df[["A"]]1548 tm.assert_frame_equal(store.select("df", columns=["A"]), expected)1549 tm.assert_frame_equal(store.select("df", where="columns=['A']"), expected)1550 # With a Series1551 s = Series(np.random.randn(10), index=index, name="A")1552 with ensure_clean_store(setup_path) as store:1553 store.put("s", s, format="table")1554 tm.assert_series_equal(store.select("s", where="columns=['A']"), s)1555 def test_mi_data_columns(self, setup_path):1556 # GH 144351557 idx = pd.MultiIndex.from_arrays(1558 [date_range("2000-01-01", periods=5), range(5)], names=["date", "id"]1559 )1560 df = pd.DataFrame({"a": [1.1, 1.2, 1.3, 1.4, 1.5]}, index=idx)1561 with ensure_clean_store(setup_path) as store:1562 store.append("df", df, data_columns=True)1563 actual = store.select("df", where="id == 1")1564 expected = df.iloc[[1], :]1565 tm.assert_frame_equal(actual, expected)1566 def test_pass_spec_to_storer(self, setup_path):1567 df = tm.makeDataFrame()1568 with ensure_clean_store(setup_path) as store:1569 store.put("df", df)1570 with pytest.raises(TypeError):1571 store.select("df", columns=["A"])1572 with pytest.raises(TypeError):1573 store.select("df", where=[("columns=A")])1574 @td.xfail_non_writeable1575 def test_append_misc(self, setup_path):1576 with ensure_clean_store(setup_path) as store:1577 df = tm.makeDataFrame()1578 store.append("df", df, chunksize=1)1579 result = store.select("df")1580 tm.assert_frame_equal(result, df)1581 store.append("df1", df, expectedrows=10)1582 result = store.select("df1")1583 tm.assert_frame_equal(result, df)1584 # more chunksize in append tests1585 def check(obj, comparator):1586 for c in [10, 200, 1000]:1587 with ensure_clean_store(setup_path, mode="w") as store:1588 store.append("obj", obj, chunksize=c)1589 result = store.select("obj")1590 comparator(result, obj)1591 df = tm.makeDataFrame()1592 df["string"] = "foo"1593 df["float322"] = 1.01594 df["float322"] = df["float322"].astype("float32")1595 df["bool"] = df["float322"] > 01596 df["time1"] = Timestamp("20130101")1597 df["time2"] = Timestamp("20130102")1598 check(df, tm.assert_frame_equal)1599 # empty frame, GH42731600 with ensure_clean_store(setup_path) as store:1601 # 0 len1602 df_empty = DataFrame(columns=list("ABC"))1603 store.append("df", df_empty)1604 with pytest.raises(KeyError, match="'No object named df in the file'"):1605 store.select("df")1606 # repeated append of 0/non-zero frames1607 df = DataFrame(np.random.rand(10, 3), columns=list("ABC"))1608 store.append("df", df)1609 tm.assert_frame_equal(store.select("df"), df)1610 store.append("df", df_empty)1611 tm.assert_frame_equal(store.select("df"), df)1612 # store1613 df = DataFrame(columns=list("ABC"))1614 store.put("df2", df)1615 tm.assert_frame_equal(store.select("df2"), df)1616 def test_append_raise(self, setup_path):1617 with ensure_clean_store(setup_path) as store:1618 # test append with invalid input to get good error messages1619 # list in column1620 df = tm.makeDataFrame()1621 df["invalid"] = [["a"]] * len(df)1622 assert df.dtypes["invalid"] == np.object_1623 with pytest.raises(TypeError):1624 store.append("df", df)1625 # multiple invalid columns1626 df["invalid2"] = [["a"]] * len(df)1627 df["invalid3"] = [["a"]] * len(df)1628 with pytest.raises(TypeError):1629 store.append("df", df)1630 # datetime with embedded nans as object1631 df = tm.makeDataFrame()1632 s = Series(datetime.datetime(2001, 1, 2), index=df.index)1633 s = s.astype(object)1634 s[0:5] = np.nan1635 df["invalid"] = s1636 assert df.dtypes["invalid"] == np.object_1637 with pytest.raises(TypeError):1638 store.append("df", df)1639 # directly ndarray1640 with pytest.raises(TypeError):1641 store.append("df", np.arange(10))1642 # series directly1643 with pytest.raises(TypeError):1644 store.append("df", Series(np.arange(10)))1645 # appending an incompatible table1646 df = tm.makeDataFrame()1647 store.append("df", df)1648 df["foo"] = "foo"1649 with pytest.raises(ValueError):1650 store.append("df", df)1651 def test_table_index_incompatible_dtypes(self, setup_path):1652 df1 = DataFrame({"a": [1, 2, 3]})1653 df2 = DataFrame({"a": [4, 5, 6]}, index=date_range("1/1/2000", periods=3))1654 with ensure_clean_store(setup_path) as store:1655 store.put("frame", df1, format="table")1656 with pytest.raises(TypeError):1657 store.put("frame", df2, format="table", append=True)1658 def test_table_values_dtypes_roundtrip(self, setup_path):1659 with ensure_clean_store(setup_path) as store:1660 df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8")1661 store.append("df_f8", df1)1662 tm.assert_series_equal(df1.dtypes, store["df_f8"].dtypes)1663 df2 = DataFrame({"a": [1, 2, 3]}, dtype="i8")1664 store.append("df_i8", df2)1665 tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes)1666 # incompatible dtype1667 with pytest.raises(ValueError):1668 store.append("df_i8", df1)1669 # check creation/storage/retrieval of float32 (a bit hacky to1670 # actually create them thought)1671 df1 = DataFrame(np.array([[1], [2], [3]], dtype="f4"), columns=["A"])1672 store.append("df_f4", df1)1673 tm.assert_series_equal(df1.dtypes, store["df_f4"].dtypes)1674 assert df1.dtypes[0] == "float32"1675 # check with mixed dtypes1676 df1 = DataFrame(1677 {1678 c: Series(np.random.randint(5), dtype=c)1679 for c in ["float32", "float64", "int32", "int64", "int16", "int8"]1680 }1681 )1682 df1["string"] = "foo"1683 df1["float322"] = 1.01684 df1["float322"] = df1["float322"].astype("float32")1685 df1["bool"] = df1["float32"] > 01686 df1["time1"] = Timestamp("20130101")1687 df1["time2"] = Timestamp("20130102")1688 store.append("df_mixed_dtypes1", df1)1689 result = store.select("df_mixed_dtypes1").dtypes.value_counts()1690 result.index = [str(i) for i in result.index]1691 expected = Series(1692 {1693 "float32": 2,1694 "float64": 1,1695 "int32": 1,1696 "bool": 1,1697 "int16": 1,1698 "int8": 1,1699 "int64": 1,1700 "object": 1,1701 "datetime64[ns]": 2,1702 }1703 )1704 result = result.sort_index()1705 expected = expected.sort_index()1706 tm.assert_series_equal(result, expected)1707 def test_table_mixed_dtypes(self, setup_path):1708 # frame1709 df = tm.makeDataFrame()1710 df["obj1"] = "foo"1711 df["obj2"] = "bar"1712 df["bool1"] = df["A"] > 01713 df["bool2"] = df["B"] > 01714 df["bool3"] = True1715 df["int1"] = 11716 df["int2"] = 21717 df["timestamp1"] = Timestamp("20010102")1718 df["timestamp2"] = Timestamp("20010103")1719 df["datetime1"] = datetime.datetime(2001, 1, 2, 0, 0)1720 df["datetime2"] = datetime.datetime(2001, 1, 3, 0, 0)1721 df.loc[df.index[3:6], ["obj1"]] = np.nan1722 df = df._consolidate()._convert(datetime=True)1723 with ensure_clean_store(setup_path) as store:1724 store.append("df1_mixed", df)1725 tm.assert_frame_equal(store.select("df1_mixed"), df)1726 def test_unimplemented_dtypes_table_columns(self, setup_path):1727 with ensure_clean_store(setup_path) as store:1728 dtypes = [("date", datetime.date(2001, 1, 2))]1729 # currently not supported dtypes ####1730 for n, f in dtypes:1731 df = tm.makeDataFrame()1732 df[n] = f1733 with pytest.raises(TypeError):1734 store.append(f"df1_{n}", df)1735 # frame1736 df = tm.makeDataFrame()1737 df["obj1"] = "foo"1738 df["obj2"] = "bar"1739 df["datetime1"] = datetime.date(2001, 1, 2)1740 df = df._consolidate()._convert(datetime=True)1741 with ensure_clean_store(setup_path) as store:1742 # this fails because we have a date in the object block......1743 with pytest.raises(TypeError):1744 store.append("df_unimplemented", df)1745 @td.xfail_non_writeable1746 @pytest.mark.skipif(1747 LooseVersion(np.__version__) == LooseVersion("1.15.0"),1748 reason=(1749 "Skipping pytables test when numpy version is "1750 "exactly equal to 1.15.0: gh-22098"1751 ),1752 )1753 def test_calendar_roundtrip_issue(self, setup_path):1754 # 85911755 # doc example from tseries holiday section1756 weekmask_egypt = "Sun Mon Tue Wed Thu"1757 holidays = [1758 "2012-05-01",1759 datetime.datetime(2013, 5, 1),1760 np.datetime64("2014-05-01"),1761 ]1762 bday_egypt = pd.offsets.CustomBusinessDay(1763 holidays=holidays, weekmask=weekmask_egypt1764 )1765 dt = datetime.datetime(2013, 4, 30)1766 dts = date_range(dt, periods=5, freq=bday_egypt)1767 s = Series(dts.weekday, dts).map(Series("Mon Tue Wed Thu Fri Sat Sun".split()))1768 with ensure_clean_store(setup_path) as store:1769 store.put("fixed", s)1770 result = store.select("fixed")1771 tm.assert_series_equal(result, s)1772 store.append("table", s)1773 result = store.select("table")1774 tm.assert_series_equal(result, s)1775 def test_roundtrip_tz_aware_index(self, setup_path):1776 # GH 176181777 time = pd.Timestamp("2000-01-01 01:00:00", tz="US/Eastern")1778 df = pd.DataFrame(data=[0], index=[time])1779 with ensure_clean_store(setup_path) as store:1780 store.put("frame", df, format="fixed")1781 recons = store["frame"]1782 tm.assert_frame_equal(recons, df)1783 assert recons.index[0].value == 9467064000000000001784 def test_append_with_timedelta(self, setup_path):1785 # GH 35771786 # append timedelta1787 df = DataFrame(1788 dict(1789 A=Timestamp("20130101"),1790 B=[1791 Timestamp("20130101") + timedelta(days=i, seconds=10)1792 for i in range(10)1793 ],1794 )1795 )1796 df["C"] = df["A"] - df["B"]1797 df.loc[3:5, "C"] = np.nan1798 with ensure_clean_store(setup_path) as store:1799 # table1800 _maybe_remove(store, "df")1801 store.append("df", df, data_columns=True)1802 result = store.select("df")1803 tm.assert_frame_equal(result, df)1804 result = store.select("df", where="C<100000")1805 tm.assert_frame_equal(result, df)1806 result = store.select("df", where="C<pd.Timedelta('-3D')")1807 tm.assert_frame_equal(result, df.iloc[3:])1808 result = store.select("df", "C<'-3D'")1809 tm.assert_frame_equal(result, df.iloc[3:])1810 # a bit hacky here as we don't really deal with the NaT properly1811 result = store.select("df", "C<'-500000s'")1812 result = result.dropna(subset=["C"])1813 tm.assert_frame_equal(result, df.iloc[6:])1814 result = store.select("df", "C<'-3.5D'")1815 result = result.iloc[1:]1816 tm.assert_frame_equal(result, df.iloc[4:])1817 # fixed1818 _maybe_remove(store, "df2")1819 store.put("df2", df)1820 result = store.select("df2")1821 tm.assert_frame_equal(result, df)1822 def test_remove(self, setup_path):1823 with ensure_clean_store(setup_path) as store:1824 ts = tm.makeTimeSeries()1825 df = tm.makeDataFrame()1826 store["a"] = ts1827 store["b"] = df1828 _maybe_remove(store, "a")1829 assert len(store) == 11830 tm.assert_frame_equal(df, store["b"])1831 _maybe_remove(store, "b")1832 assert len(store) == 01833 # nonexistence1834 with pytest.raises(1835 KeyError, match="'No object named a_nonexistent_store in the file'"1836 ):1837 store.remove("a_nonexistent_store")1838 # pathing1839 store["a"] = ts1840 store["b/foo"] = df1841 _maybe_remove(store, "foo")1842 _maybe_remove(store, "b/foo")1843 assert len(store) == 11844 store["a"] = ts1845 store["b/foo"] = df1846 _maybe_remove(store, "b")1847 assert len(store) == 11848 # __delitem__1849 store["a"] = ts1850 store["b"] = df1851 del store["a"]1852 del store["b"]1853 assert len(store) == 01854 def test_invalid_terms(self, setup_path):1855 with ensure_clean_store(setup_path) as store:1856 with catch_warnings(record=True):1857 df = tm.makeTimeDataFrame()1858 df["string"] = "foo"1859 df.loc[df.index[0:4], "string"] = "bar"1860 store.put("df", df, format="table")1861 # some invalid terms1862 with pytest.raises(TypeError):1863 Term()1864 # more invalid1865 with pytest.raises(ValueError):1866 store.select("df", "df.index[3]")1867 with pytest.raises(SyntaxError):1868 store.select("df", "index>")1869 # from the docs1870 with ensure_clean_path(setup_path) as path:1871 dfq = DataFrame(1872 np.random.randn(10, 4),1873 columns=list("ABCD"),1874 index=date_range("20130101", periods=10),1875 )1876 dfq.to_hdf(path, "dfq", format="table", data_columns=True)1877 # check ok1878 read_hdf(1879 path, "dfq", where="index>Timestamp('20130104') & columns=['A', 'B']"1880 )1881 read_hdf(path, "dfq", where="A>0 or C>0")1882 # catch the invalid reference1883 with ensure_clean_path(setup_path) as path:1884 dfq = DataFrame(1885 np.random.randn(10, 4),1886 columns=list("ABCD"),1887 index=date_range("20130101", periods=10),1888 )1889 dfq.to_hdf(path, "dfq", format="table")1890 with pytest.raises(ValueError):1891 read_hdf(path, "dfq", where="A>0 or C>0")1892 def test_same_name_scoping(self, setup_path):1893 with ensure_clean_store(setup_path) as store:1894 import pandas as pd1895 df = DataFrame(1896 np.random.randn(20, 2), index=pd.date_range("20130101", periods=20)1897 )1898 store.put("df", df, format="table")1899 expected = df[df.index > pd.Timestamp("20130105")]1900 import datetime # noqa1901 result = store.select("df", "index>datetime.datetime(2013,1,5)")1902 tm.assert_frame_equal(result, expected)1903 from datetime import datetime # noqa1904 # technically an error, but allow it1905 result = store.select("df", "index>datetime.datetime(2013,1,5)")1906 tm.assert_frame_equal(result, expected)1907 result = store.select("df", "index>datetime(2013,1,5)")1908 tm.assert_frame_equal(result, expected)1909 def test_series(self, setup_path):1910 s = tm.makeStringSeries()1911 self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)1912 ts = tm.makeTimeSeries()1913 self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path)1914 ts2 = Series(ts.index, Index(ts.index, dtype=object))1915 self._check_roundtrip(ts2, tm.assert_series_equal, path=setup_path)1916 ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object))1917 self._check_roundtrip(1918 ts3, tm.assert_series_equal, path=setup_path, check_index_type=False1919 )1920 def test_float_index(self, setup_path):1921 # GH #4541922 index = np.random.randn(10)1923 s = Series(np.random.randn(10), index=index)1924 self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)1925 @td.xfail_non_writeable1926 def test_tuple_index(self, setup_path):1927 # GH #4921928 col = np.arange(10)1929 idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)]1930 data = np.random.randn(30).reshape((3, 10))1931 DF = DataFrame(data, index=idx, columns=col)1932 with catch_warnings(record=True):1933 simplefilter("ignore", pd.errors.PerformanceWarning)1934 self._check_roundtrip(DF, tm.assert_frame_equal, path=setup_path)1935 @td.xfail_non_writeable1936 @pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")1937 def test_index_types(self, setup_path):1938 with catch_warnings(record=True):1939 values = np.random.randn(2)1940 func = lambda l, r: tm.assert_series_equal(l, r, check_index_type=True)1941 with catch_warnings(record=True):1942 ser = Series(values, [0, "y"])1943 self._check_roundtrip(ser, func, path=setup_path)1944 with catch_warnings(record=True):1945 ser = Series(values, [datetime.datetime.today(), 0])1946 self._check_roundtrip(ser, func, path=setup_path)1947 with catch_warnings(record=True):1948 ser = Series(values, ["y", 0])1949 self._check_roundtrip(ser, func, path=setup_path)1950 with catch_warnings(record=True):1951 ser = Series(values, [datetime.date.today(), "a"])1952 self._check_roundtrip(ser, func, path=setup_path)1953 with catch_warnings(record=True):1954 ser = Series(values, [0, "y"])1955 self._check_roundtrip(ser, func, path=setup_path)1956 ser = Series(values, [datetime.datetime.today(), 0])1957 self._check_roundtrip(ser, func, path=setup_path)1958 ser = Series(values, ["y", 0])1959 self._check_roundtrip(ser, func, path=setup_path)1960 ser = Series(values, [datetime.date.today(), "a"])1961 self._check_roundtrip(ser, func, path=setup_path)1962 ser = Series(values, [1.23, "b"])1963 self._check_roundtrip(ser, func, path=setup_path)1964 ser = Series(values, [1, 1.53])1965 self._check_roundtrip(ser, func, path=setup_path)1966 ser = Series(values, [1, 5])1967 self._check_roundtrip(ser, func, path=setup_path)1968 ser = Series(1969 values, [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 1, 2)]1970 )1971 self._check_roundtrip(ser, func, path=setup_path)1972 def test_timeseries_preepoch(self, setup_path):1973 dr = bdate_range("1/1/1940", "1/1/1960")1974 ts = Series(np.random.randn(len(dr)), index=dr)1975 try:1976 self._check_roundtrip(ts, tm.assert_series_equal, path=setup_path)1977 except OverflowError:1978 pytest.skip("known failer on some windows platforms")1979 @td.xfail_non_writeable1980 @pytest.mark.parametrize(1981 "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)]1982 )1983 def test_frame(self, compression, setup_path):1984 df = tm.makeDataFrame()1985 # put in some random NAs1986 df.values[0, 0] = np.nan1987 df.values[5, 3] = np.nan1988 self._check_roundtrip_table(1989 df, tm.assert_frame_equal, path=setup_path, compression=compression1990 )1991 self._check_roundtrip(1992 df, tm.assert_frame_equal, path=setup_path, compression=compression1993 )1994 tdf = tm.makeTimeDataFrame()1995 self._check_roundtrip(1996 tdf, tm.assert_frame_equal, path=setup_path, compression=compression1997 )1998 with ensure_clean_store(setup_path) as store:1999 # not consolidated2000 df["foo"] = np.random.randn(len(df))2001 store["df"] = df2002 recons = store["df"]2003 assert recons._mgr.is_consolidated()2004 # empty2005 self._check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path)2006 @td.xfail_non_writeable2007 def test_empty_series_frame(self, setup_path):2008 s0 = Series(dtype=object)2009 s1 = Series(name="myseries", dtype=object)2010 df0 = DataFrame()2011 df1 = DataFrame(index=["a", "b", "c"])2012 df2 = DataFrame(columns=["d", "e", "f"])2013 self._check_roundtrip(s0, tm.assert_series_equal, path=setup_path)2014 self._check_roundtrip(s1, tm.assert_series_equal, path=setup_path)2015 self._check_roundtrip(df0, tm.assert_frame_equal, path=setup_path)2016 self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)2017 self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)2018 @td.xfail_non_writeable2019 @pytest.mark.parametrize(2020 "dtype", [np.int64, np.float64, object, "m8[ns]", "M8[ns]"]2021 )2022 def test_empty_series(self, dtype, setup_path):2023 s = Series(dtype=dtype)2024 self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)2025 def test_can_serialize_dates(self, setup_path):2026 rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")]2027 frame = DataFrame(np.random.randn(len(rng), 4), index=rng)2028 self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)2029 def test_store_hierarchical(self, setup_path):2030 index = MultiIndex(2031 levels=[["foo", "bar", "baz", "qux"], ["one", "two", "three"]],2032 codes=[[0, 0, 0, 1, 1, 2, 2, 3, 3, 3], [0, 1, 2, 0, 1, 1, 2, 0, 1, 2]],2033 names=["foo", "bar"],2034 )2035 frame = DataFrame(np.random.randn(10, 3), index=index, columns=["A", "B", "C"])2036 self._check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)2037 self._check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path)2038 self._check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path)2039 # check that the names are stored2040 with ensure_clean_store(setup_path) as store:2041 store["frame"] = frame2042 recons = store["frame"]2043 tm.assert_frame_equal(recons, frame)2044 def test_store_index_name(self, setup_path):2045 df = tm.makeDataFrame()2046 df.index.name = "foo"2047 with ensure_clean_store(setup_path) as store:2048 store["frame"] = df2049 recons = store["frame"]2050 tm.assert_frame_equal(recons, df)2051 def test_store_index_name_with_tz(self, setup_path):2052 # GH 138842053 df = pd.DataFrame({"A": [1, 2]})2054 df.index = pd.DatetimeIndex([1234567890123456787, 1234567890123456788])2055 df.index = df.index.tz_localize("UTC")2056 df.index.name = "foo"2057 with ensure_clean_store(setup_path) as store:2058 store.put("frame", df, format="table")2059 recons = store["frame"]2060 tm.assert_frame_equal(recons, df)2061 @pytest.mark.parametrize("table_format", ["table", "fixed"])2062 def test_store_index_name_numpy_str(self, table_format, setup_path):2063 # GH #134922064 idx = pd.Index(2065 pd.to_datetime([datetime.date(2000, 1, 1), datetime.date(2000, 1, 2)]),2066 name="cols\u05d2",2067 )2068 idx1 = pd.Index(2069 pd.to_datetime([datetime.date(2010, 1, 1), datetime.date(2010, 1, 2)]),2070 name="rows\u05d0",2071 )2072 df = pd.DataFrame(np.arange(4).reshape(2, 2), columns=idx, index=idx1)2073 # This used to fail, returning numpy strings instead of python strings.2074 with ensure_clean_path(setup_path) as path:2075 df.to_hdf(path, "df", format=table_format)2076 df2 = read_hdf(path, "df")2077 tm.assert_frame_equal(df, df2, check_names=True)2078 assert type(df2.index.name) == str2079 assert type(df2.columns.name) == str2080 def test_store_series_name(self, setup_path):2081 df = tm.makeDataFrame()2082 series = df["A"]2083 with ensure_clean_store(setup_path) as store:2084 store["series"] = series2085 recons = store["series"]2086 tm.assert_series_equal(recons, series)2087 @td.xfail_non_writeable2088 @pytest.mark.parametrize(2089 "compression", [False, pytest.param(True, marks=td.skip_if_windows_python_3)]2090 )2091 def test_store_mixed(self, compression, setup_path):2092 def _make_one():2093 df = tm.makeDataFrame()2094 df["obj1"] = "foo"2095 df["obj2"] = "bar"2096 df["bool1"] = df["A"] > 02097 df["bool2"] = df["B"] > 02098 df["int1"] = 12099 df["int2"] = 22100 return df._consolidate()2101 df1 = _make_one()2102 df2 = _make_one()2103 self._check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)2104 self._check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)2105 with ensure_clean_store(setup_path) as store:2106 store["obj"] = df12107 tm.assert_frame_equal(store["obj"], df1)2108 store["obj"] = df22109 tm.assert_frame_equal(store["obj"], df2)2110 # check that can store Series of all of these types2111 self._check_roundtrip(2112 df1["obj1"],2113 tm.assert_series_equal,2114 path=setup_path,2115 compression=compression,2116 )2117 self._check_roundtrip(2118 df1["bool1"],2119 tm.assert_series_equal,2120 path=setup_path,2121 compression=compression,2122 )2123 self._check_roundtrip(2124 df1["int1"],2125 tm.assert_series_equal,2126 path=setup_path,2127 compression=compression,2128 )2129 @pytest.mark.filterwarnings(2130 "ignore:\\nduplicate:pandas.io.pytables.DuplicateWarning"2131 )2132 def test_select_with_dups(self, setup_path):2133 # single dtypes2134 df = DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"])2135 df.index = date_range("20130101 9:30", periods=10, freq="T")2136 with ensure_clean_store(setup_path) as store:2137 store.append("df", df)2138 result = store.select("df")2139 expected = df2140 tm.assert_frame_equal(result, expected, by_blocks=True)2141 result = store.select("df", columns=df.columns)2142 expected = df2143 tm.assert_frame_equal(result, expected, by_blocks=True)2144 result = store.select("df", columns=["A"])2145 expected = df.loc[:, ["A"]]2146 tm.assert_frame_equal(result, expected)2147 # dups across dtypes2148 df = concat(2149 [2150 DataFrame(np.random.randn(10, 4), columns=["A", "A", "B", "B"]),2151 DataFrame(2152 np.random.randint(0, 10, size=20).reshape(10, 2), columns=["A", "C"]2153 ),2154 ],2155 axis=1,2156 )2157 df.index = date_range("20130101 9:30", periods=10, freq="T")2158 with ensure_clean_store(setup_path) as store:2159 store.append("df", df)2160 result = store.select("df")2161 expected = df2162 tm.assert_frame_equal(result, expected, by_blocks=True)2163 result = store.select("df", columns=df.columns)2164 expected = df2165 tm.assert_frame_equal(result, expected, by_blocks=True)2166 expected = df.loc[:, ["A"]]2167 result = store.select("df", columns=["A"])2168 tm.assert_frame_equal(result, expected, by_blocks=True)2169 expected = df.loc[:, ["B", "A"]]2170 result = store.select("df", columns=["B", "A"])2171 tm.assert_frame_equal(result, expected, by_blocks=True)2172 # duplicates on both index and columns2173 with ensure_clean_store(setup_path) as store:2174 store.append("df", df)2175 store.append("df", df)2176 expected = df.loc[:, ["B", "A"]]2177 expected = concat([expected, expected])2178 result = store.select("df", columns=["B", "A"])2179 tm.assert_frame_equal(result, expected, by_blocks=True)2180 def test_overwrite_node(self, setup_path):2181 with ensure_clean_store(setup_path) as store:2182 store["a"] = tm.makeTimeDataFrame()2183 ts = tm.makeTimeSeries()2184 store["a"] = ts2185 tm.assert_series_equal(store["a"], ts)2186 def test_select(self, setup_path):2187 with ensure_clean_store(setup_path) as store:2188 with catch_warnings(record=True):2189 # select with columns=2190 df = tm.makeTimeDataFrame()2191 _maybe_remove(store, "df")2192 store.append("df", df)2193 result = store.select("df", columns=["A", "B"])2194 expected = df.reindex(columns=["A", "B"])2195 tm.assert_frame_equal(expected, result)2196 # equivalently2197 result = store.select("df", [("columns=['A', 'B']")])2198 expected = df.reindex(columns=["A", "B"])2199 tm.assert_frame_equal(expected, result)2200 # with a data column2201 _maybe_remove(store, "df")2202 store.append("df", df, data_columns=["A"])2203 result = store.select("df", ["A > 0"], columns=["A", "B"])2204 expected = df[df.A > 0].reindex(columns=["A", "B"])2205 tm.assert_frame_equal(expected, result)2206 # all a data columns2207 _maybe_remove(store, "df")2208 store.append("df", df, data_columns=True)2209 result = store.select("df", ["A > 0"], columns=["A", "B"])2210 expected = df[df.A > 0].reindex(columns=["A", "B"])2211 tm.assert_frame_equal(expected, result)2212 # with a data column, but different columns2213 _maybe_remove(store, "df")2214 store.append("df", df, data_columns=["A"])2215 result = store.select("df", ["A > 0"], columns=["C", "D"])2216 expected = df[df.A > 0].reindex(columns=["C", "D"])2217 tm.assert_frame_equal(expected, result)2218 def test_select_dtypes(self, setup_path):2219 with ensure_clean_store(setup_path) as store:2220 # with a Timestamp data column (GH #2637)2221 df = DataFrame(2222 dict(ts=bdate_range("2012-01-01", periods=300), A=np.random.randn(300))2223 )2224 _maybe_remove(store, "df")2225 store.append("df", df, data_columns=["ts", "A"])2226 result = store.select("df", "ts>=Timestamp('2012-02-01')")2227 expected = df[df.ts >= Timestamp("2012-02-01")]2228 tm.assert_frame_equal(expected, result)2229 # bool columns (GH #2849)2230 df = DataFrame(np.random.randn(5, 2), columns=["A", "B"])2231 df["object"] = "foo"2232 df.loc[4:5, "object"] = "bar"2233 df["boolv"] = df["A"] > 02234 _maybe_remove(store, "df")2235 store.append("df", df, data_columns=True)2236 expected = df[df.boolv == True].reindex(columns=["A", "boolv"]) # noqa2237 for v in [True, "true", 1]:2238 result = store.select("df", f"boolv == {v}", columns=["A", "boolv"])2239 tm.assert_frame_equal(expected, result)2240 expected = df[df.boolv == False].reindex(columns=["A", "boolv"]) # noqa2241 for v in [False, "false", 0]:2242 result = store.select("df", f"boolv == {v}", columns=["A", "boolv"])2243 tm.assert_frame_equal(expected, result)2244 # integer index2245 df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))2246 _maybe_remove(store, "df_int")2247 store.append("df_int", df)2248 result = store.select("df_int", "index<10 and columns=['A']")2249 expected = df.reindex(index=list(df.index)[0:10], columns=["A"])2250 tm.assert_frame_equal(expected, result)2251 # float index2252 df = DataFrame(2253 dict(2254 A=np.random.rand(20),2255 B=np.random.rand(20),2256 index=np.arange(20, dtype="f8"),2257 )2258 )2259 _maybe_remove(store, "df_float")2260 store.append("df_float", df)2261 result = store.select("df_float", "index<10.0 and columns=['A']")2262 expected = df.reindex(index=list(df.index)[0:10], columns=["A"])2263 tm.assert_frame_equal(expected, result)2264 with ensure_clean_store(setup_path) as store:2265 # floats w/o NaN2266 df = DataFrame(dict(cols=range(11), values=range(11)), dtype="float64")2267 df["cols"] = (df["cols"] + 10).apply(str)2268 store.append("df1", df, data_columns=True)2269 result = store.select("df1", where="values>2.0")2270 expected = df[df["values"] > 2.0]2271 tm.assert_frame_equal(expected, result)2272 # floats with NaN2273 df.iloc[0] = np.nan2274 expected = df[df["values"] > 2.0]2275 store.append("df2", df, data_columns=True, index=False)2276 result = store.select("df2", where="values>2.0")2277 tm.assert_frame_equal(expected, result)2278 # https://github.com/PyTables/PyTables/issues/2822279 # bug in selection when 0th row has a np.nan and an index2280 # store.append('df3',df,data_columns=True)2281 # result = store.select(2282 # 'df3', where='values>2.0')2283 # tm.assert_frame_equal(expected, result)2284 # not in first position float with NaN ok too2285 df = DataFrame(dict(cols=range(11), values=range(11)), dtype="float64")2286 df["cols"] = (df["cols"] + 10).apply(str)2287 df.iloc[1] = np.nan2288 expected = df[df["values"] > 2.0]2289 store.append("df4", df, data_columns=True)2290 result = store.select("df4", where="values>2.0")2291 tm.assert_frame_equal(expected, result)2292 # test selection with comparison against numpy scalar2293 # GH 112832294 with ensure_clean_store(setup_path) as store:2295 df = tm.makeDataFrame()2296 expected = df[df["A"] > 0]2297 store.append("df", df, data_columns=True)2298 np_zero = np.float64(0) # noqa2299 result = store.select("df", where=["A>np_zero"])2300 tm.assert_frame_equal(expected, result)2301 def test_select_with_many_inputs(self, setup_path):2302 with ensure_clean_store(setup_path) as store:2303 df = DataFrame(2304 dict(2305 ts=bdate_range("2012-01-01", periods=300),2306 A=np.random.randn(300),2307 B=range(300),2308 users=["a"] * 502309 + ["b"] * 502310 + ["c"] * 1002311 + [f"a{i:03d}" for i in range(100)],2312 )2313 )2314 _maybe_remove(store, "df")2315 store.append("df", df, data_columns=["ts", "A", "B", "users"])2316 # regular select2317 result = store.select("df", "ts>=Timestamp('2012-02-01')")2318 expected = df[df.ts >= Timestamp("2012-02-01")]2319 tm.assert_frame_equal(expected, result)2320 # small selector2321 result = store.select(2322 "df", "ts>=Timestamp('2012-02-01') & users=['a','b','c']"2323 )2324 expected = df[2325 (df.ts >= Timestamp("2012-02-01")) & df.users.isin(["a", "b", "c"])2326 ]2327 tm.assert_frame_equal(expected, result)2328 # big selector along the columns2329 selector = ["a", "b", "c"] + [f"a{i:03d}" for i in range(60)]2330 result = store.select(2331 "df", "ts>=Timestamp('2012-02-01') and users=selector"2332 )2333 expected = df[(df.ts >= Timestamp("2012-02-01")) & df.users.isin(selector)]2334 tm.assert_frame_equal(expected, result)2335 selector = range(100, 200)2336 result = store.select("df", "B=selector")2337 expected = df[df.B.isin(selector)]2338 tm.assert_frame_equal(expected, result)2339 assert len(result) == 1002340 # big selector along the index2341 selector = Index(df.ts[0:100].values)2342 result = store.select("df", "ts=selector")2343 expected = df[df.ts.isin(selector.values)]2344 tm.assert_frame_equal(expected, result)2345 assert len(result) == 1002346 def test_select_iterator(self, setup_path):2347 # single table2348 with ensure_clean_store(setup_path) as store:2349 df = tm.makeTimeDataFrame(500)2350 _maybe_remove(store, "df")2351 store.append("df", df)2352 expected = store.select("df")2353 results = list(store.select("df", iterator=True))2354 result = concat(results)2355 tm.assert_frame_equal(expected, result)2356 results = list(store.select("df", chunksize=100))2357 assert len(results) == 52358 result = concat(results)2359 tm.assert_frame_equal(expected, result)2360 results = list(store.select("df", chunksize=150))2361 result = concat(results)2362 tm.assert_frame_equal(result, expected)2363 with ensure_clean_path(setup_path) as path:2364 df = tm.makeTimeDataFrame(500)2365 df.to_hdf(path, "df_non_table")2366 with pytest.raises(TypeError):2367 read_hdf(path, "df_non_table", chunksize=100)2368 with pytest.raises(TypeError):2369 read_hdf(path, "df_non_table", iterator=True)2370 with ensure_clean_path(setup_path) as path:2371 df = tm.makeTimeDataFrame(500)2372 df.to_hdf(path, "df", format="table")2373 results = list(read_hdf(path, "df", chunksize=100))2374 result = concat(results)2375 assert len(results) == 52376 tm.assert_frame_equal(result, df)2377 tm.assert_frame_equal(result, read_hdf(path, "df"))2378 # multiple2379 with ensure_clean_store(setup_path) as store:2380 df1 = tm.makeTimeDataFrame(500)2381 store.append("df1", df1, data_columns=True)2382 df2 = tm.makeTimeDataFrame(500).rename(columns="{}_2".format)2383 df2["foo"] = "bar"2384 store.append("df2", df2)2385 df = concat([df1, df2], axis=1)2386 # full selection2387 expected = store.select_as_multiple(["df1", "df2"], selector="df1")2388 results = list(2389 store.select_as_multiple(["df1", "df2"], selector="df1", chunksize=150)2390 )2391 result = concat(results)2392 tm.assert_frame_equal(expected, result)2393 def test_select_iterator_complete_8014(self, setup_path):2394 # GH 80142395 # using iterator and where clause2396 chunksize = 1e42397 # no iterator2398 with ensure_clean_store(setup_path) as store:2399 expected = tm.makeTimeDataFrame(100064, "S")2400 _maybe_remove(store, "df")2401 store.append("df", expected)2402 beg_dt = expected.index[0]2403 end_dt = expected.index[-1]2404 # select w/o iteration and no where clause works2405 result = store.select("df")2406 tm.assert_frame_equal(expected, result)2407 # select w/o iterator and where clause, single term, begin2408 # of range, works2409 where = f"index >= '{beg_dt}'"2410 result = store.select("df", where=where)2411 tm.assert_frame_equal(expected, result)2412 # select w/o iterator and where clause, single term, end2413 # of range, works2414 where = f"index <= '{end_dt}'"2415 result = store.select("df", where=where)2416 tm.assert_frame_equal(expected, result)2417 # select w/o iterator and where clause, inclusive range,2418 # works2419 where = f"index >= '{beg_dt}' & index <= '{end_dt}'"2420 result = store.select("df", where=where)2421 tm.assert_frame_equal(expected, result)2422 # with iterator, full range2423 with ensure_clean_store(setup_path) as store:2424 expected = tm.makeTimeDataFrame(100064, "S")2425 _maybe_remove(store, "df")2426 store.append("df", expected)2427 beg_dt = expected.index[0]2428 end_dt = expected.index[-1]2429 # select w/iterator and no where clause works2430 results = list(store.select("df", chunksize=chunksize))2431 result = concat(results)2432 tm.assert_frame_equal(expected, result)2433 # select w/iterator and where clause, single term, begin of range2434 where = f"index >= '{beg_dt}'"2435 results = list(store.select("df", where=where, chunksize=chunksize))2436 result = concat(results)2437 tm.assert_frame_equal(expected, result)2438 # select w/iterator and where clause, single term, end of range2439 where = f"index <= '{end_dt}'"2440 results = list(store.select("df", where=where, chunksize=chunksize))2441 result = concat(results)2442 tm.assert_frame_equal(expected, result)2443 # select w/iterator and where clause, inclusive range2444 where = f"index >= '{beg_dt}' & index <= '{end_dt}'"2445 results = list(store.select("df", where=where, chunksize=chunksize))2446 result = concat(results)2447 tm.assert_frame_equal(expected, result)2448 def test_select_iterator_non_complete_8014(self, setup_path):2449 # GH 80142450 # using iterator and where clause2451 chunksize = 1e42452 # with iterator, non complete range2453 with ensure_clean_store(setup_path) as store:2454 expected = tm.makeTimeDataFrame(100064, "S")2455 _maybe_remove(store, "df")2456 store.append("df", expected)2457 beg_dt = expected.index[1]2458 end_dt = expected.index[-2]2459 # select w/iterator and where clause, single term, begin of range2460 where = f"index >= '{beg_dt}'"2461 results = list(store.select("df", where=where, chunksize=chunksize))2462 result = concat(results)2463 rexpected = expected[expected.index >= beg_dt]2464 tm.assert_frame_equal(rexpected, result)2465 # select w/iterator and where clause, single term, end of range2466 where = f"index <= '{end_dt}'"2467 results = list(store.select("df", where=where, chunksize=chunksize))2468 result = concat(results)2469 rexpected = expected[expected.index <= end_dt]2470 tm.assert_frame_equal(rexpected, result)2471 # select w/iterator and where clause, inclusive range2472 where = f"index >= '{beg_dt}' & index <= '{end_dt}'"2473 results = list(store.select("df", where=where, chunksize=chunksize))2474 result = concat(results)2475 rexpected = expected[2476 (expected.index >= beg_dt) & (expected.index <= end_dt)2477 ]2478 tm.assert_frame_equal(rexpected, result)2479 # with iterator, empty where2480 with ensure_clean_store(setup_path) as store:2481 expected = tm.makeTimeDataFrame(100064, "S")2482 _maybe_remove(store, "df")2483 store.append("df", expected)2484 end_dt = expected.index[-1]2485 # select w/iterator and where clause, single term, begin of range2486 where = f"index > '{end_dt}'"2487 results = list(store.select("df", where=where, chunksize=chunksize))2488 assert 0 == len(results)2489 def test_select_iterator_many_empty_frames(self, setup_path):2490 # GH 80142491 # using iterator and where clause can return many empty2492 # frames.2493 chunksize = int(1e4)2494 # with iterator, range limited to the first chunk2495 with ensure_clean_store(setup_path) as store:2496 expected = tm.makeTimeDataFrame(100000, "S")2497 _maybe_remove(store, "df")2498 store.append("df", expected)2499 beg_dt = expected.index[0]2500 end_dt = expected.index[chunksize - 1]2501 # select w/iterator and where clause, single term, begin of range2502 where = f"index >= '{beg_dt}'"2503 results = list(store.select("df", where=where, chunksize=chunksize))2504 result = concat(results)2505 rexpected = expected[expected.index >= beg_dt]2506 tm.assert_frame_equal(rexpected, result)2507 # select w/iterator and where clause, single term, end of range2508 where = f"index <= '{end_dt}'"2509 results = list(store.select("df", where=where, chunksize=chunksize))2510 assert len(results) == 12511 result = concat(results)2512 rexpected = expected[expected.index <= end_dt]2513 tm.assert_frame_equal(rexpected, result)2514 # select w/iterator and where clause, inclusive range2515 where = f"index >= '{beg_dt}' & index <= '{end_dt}'"2516 results = list(store.select("df", where=where, chunksize=chunksize))2517 # should be 1, is 102518 assert len(results) == 12519 result = concat(results)2520 rexpected = expected[2521 (expected.index >= beg_dt) & (expected.index <= end_dt)2522 ]2523 tm.assert_frame_equal(rexpected, result)2524 # select w/iterator and where clause which selects2525 # *nothing*.2526 #2527 # To be consistent with Python idiom I suggest this should2528 # return [] e.g. `for e in []: print True` never prints2529 # True.2530 where = f"index <= '{beg_dt}' & index >= '{end_dt}'"2531 results = list(store.select("df", where=where, chunksize=chunksize))2532 # should be []2533 assert len(results) == 02534 @pytest.mark.filterwarnings(2535 "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"2536 )2537 def test_retain_index_attributes(self, setup_path):2538 # GH 3499, losing frequency info on index recreation2539 df = DataFrame(2540 dict(A=Series(range(3), index=date_range("2000-1-1", periods=3, freq="H")))2541 )2542 with ensure_clean_store(setup_path) as store:2543 _maybe_remove(store, "data")2544 store.put("data", df, format="table")2545 result = store.get("data")2546 tm.assert_frame_equal(df, result)2547 for attr in ["freq", "tz", "name"]:2548 for idx in ["index", "columns"]:2549 assert getattr(getattr(df, idx), attr, None) == getattr(2550 getattr(result, idx), attr, None2551 )2552 # try to append a table with a different frequency2553 with catch_warnings(record=True):2554 df2 = DataFrame(2555 dict(2556 A=Series(2557 range(3), index=date_range("2002-1-1", periods=3, freq="D")2558 )2559 )2560 )2561 store.append("data", df2)2562 assert store.get_storer("data").info["index"]["freq"] is None2563 # this is ok2564 _maybe_remove(store, "df2")2565 df2 = DataFrame(2566 dict(2567 A=Series(2568 range(3),2569 index=[2570 Timestamp("20010101"),2571 Timestamp("20010102"),2572 Timestamp("20020101"),2573 ],2574 )2575 )2576 )2577 store.append("df2", df2)2578 df3 = DataFrame(2579 dict(2580 A=Series(2581 range(3), index=date_range("2002-1-1", periods=3, freq="D")2582 )2583 )2584 )2585 store.append("df2", df3)2586 @pytest.mark.filterwarnings(2587 "ignore:\\nthe :pandas.io.pytables.AttributeConflictWarning"2588 )2589 def test_retain_index_attributes2(self, setup_path):2590 with ensure_clean_path(setup_path) as path:2591 with catch_warnings(record=True):2592 df = DataFrame(2593 dict(2594 A=Series(2595 range(3), index=date_range("2000-1-1", periods=3, freq="H")2596 )2597 )2598 )2599 df.to_hdf(path, "data", mode="w", append=True)2600 df2 = DataFrame(2601 dict(2602 A=Series(2603 range(3), index=date_range("2002-1-1", periods=3, freq="D")2604 )2605 )2606 )2607 df2.to_hdf(path, "data", append=True)2608 idx = date_range("2000-1-1", periods=3, freq="H")2609 idx.name = "foo"2610 df = DataFrame(dict(A=Series(range(3), index=idx)))2611 df.to_hdf(path, "data", mode="w", append=True)2612 assert read_hdf(path, "data").index.name == "foo"2613 with catch_warnings(record=True):2614 idx2 = date_range("2001-1-1", periods=3, freq="H")2615 idx2.name = "bar"2616 df2 = DataFrame(dict(A=Series(range(3), index=idx2)))2617 df2.to_hdf(path, "data", append=True)2618 assert read_hdf(path, "data").index.name is None2619 def test_frame_select(self, setup_path):2620 df = tm.makeTimeDataFrame()2621 with ensure_clean_store(setup_path) as store:2622 store.put("frame", df, format="table")2623 date = df.index[len(df) // 2]2624 crit1 = Term("index>=date")2625 assert crit1.env.scope["date"] == date2626 crit2 = "columns=['A', 'D']"2627 crit3 = "columns=A"2628 result = store.select("frame", [crit1, crit2])2629 expected = df.loc[date:, ["A", "D"]]2630 tm.assert_frame_equal(result, expected)2631 result = store.select("frame", [crit3])2632 expected = df.loc[:, ["A"]]2633 tm.assert_frame_equal(result, expected)2634 # invalid terms2635 df = tm.makeTimeDataFrame()2636 store.append("df_time", df)2637 with pytest.raises(ValueError):2638 store.select("df_time", "index>0")2639 # can't select if not written as table2640 # store['frame'] = df2641 # with pytest.raises(ValueError):2642 # store.select('frame', [crit1, crit2])2643 def test_frame_select_complex(self, setup_path):2644 # select via complex criteria2645 df = tm.makeTimeDataFrame()2646 df["string"] = "foo"2647 df.loc[df.index[0:4], "string"] = "bar"2648 with ensure_clean_store(setup_path) as store:2649 store.put("df", df, format="table", data_columns=["string"])2650 # empty2651 result = store.select("df", 'index>df.index[3] & string="bar"')2652 expected = df.loc[(df.index > df.index[3]) & (df.string == "bar")]2653 tm.assert_frame_equal(result, expected)2654 result = store.select("df", 'index>df.index[3] & string="foo"')2655 expected = df.loc[(df.index > df.index[3]) & (df.string == "foo")]2656 tm.assert_frame_equal(result, expected)2657 # or2658 result = store.select("df", 'index>df.index[3] | string="bar"')2659 expected = df.loc[(df.index > df.index[3]) | (df.string == "bar")]2660 tm.assert_frame_equal(result, expected)2661 result = store.select(2662 "df", '(index>df.index[3] & index<=df.index[6]) | string="bar"'2663 )2664 expected = df.loc[2665 ((df.index > df.index[3]) & (df.index <= df.index[6]))2666 | (df.string == "bar")2667 ]2668 tm.assert_frame_equal(result, expected)2669 # invert2670 result = store.select("df", 'string!="bar"')2671 expected = df.loc[df.string != "bar"]2672 tm.assert_frame_equal(result, expected)2673 # invert not implemented in numexpr :(2674 with pytest.raises(NotImplementedError):2675 store.select("df", '~(string="bar")')2676 # invert ok for filters2677 result = store.select("df", "~(columns=['A','B'])")2678 expected = df.loc[:, df.columns.difference(["A", "B"])]2679 tm.assert_frame_equal(result, expected)2680 # in2681 result = store.select("df", "index>df.index[3] & columns in ['A','B']")2682 expected = df.loc[df.index > df.index[3]].reindex(columns=["A", "B"])2683 tm.assert_frame_equal(result, expected)2684 def test_frame_select_complex2(self, setup_path):2685 with ensure_clean_path(["parms.hdf", "hist.hdf"]) as paths:2686 pp, hh = paths2687 # use non-trivial selection criteria2688 parms = DataFrame({"A": [1, 1, 2, 2, 3]})2689 parms.to_hdf(pp, "df", mode="w", format="table", data_columns=["A"])2690 selection = read_hdf(pp, "df", where="A=[2,3]")2691 hist = DataFrame(2692 np.random.randn(25, 1),2693 columns=["data"],2694 index=MultiIndex.from_tuples(2695 [(i, j) for i in range(5) for j in range(5)], names=["l1", "l2"]2696 ),2697 )2698 hist.to_hdf(hh, "df", mode="w", format="table")2699 expected = read_hdf(hh, "df", where="l1=[2, 3, 4]")2700 # scope with list like2701 l = selection.index.tolist() # noqa2702 store = HDFStore(hh)2703 result = store.select("df", where="l1=l")2704 tm.assert_frame_equal(result, expected)2705 store.close()2706 result = read_hdf(hh, "df", where="l1=l")2707 tm.assert_frame_equal(result, expected)2708 # index2709 index = selection.index # noqa2710 result = read_hdf(hh, "df", where="l1=index")2711 tm.assert_frame_equal(result, expected)2712 result = read_hdf(hh, "df", where="l1=selection.index")2713 tm.assert_frame_equal(result, expected)2714 result = read_hdf(hh, "df", where="l1=selection.index.tolist()")2715 tm.assert_frame_equal(result, expected)2716 result = read_hdf(hh, "df", where="l1=list(selection.index)")2717 tm.assert_frame_equal(result, expected)2718 # scope with index2719 store = HDFStore(hh)2720 result = store.select("df", where="l1=index")2721 tm.assert_frame_equal(result, expected)2722 result = store.select("df", where="l1=selection.index")2723 tm.assert_frame_equal(result, expected)2724 result = store.select("df", where="l1=selection.index.tolist()")2725 tm.assert_frame_equal(result, expected)2726 result = store.select("df", where="l1=list(selection.index)")2727 tm.assert_frame_equal(result, expected)2728 store.close()2729 def test_invalid_filtering(self, setup_path):2730 # can't use more than one filter (atm)2731 df = tm.makeTimeDataFrame()2732 with ensure_clean_store(setup_path) as store:2733 store.put("df", df, format="table")2734 # not implemented2735 with pytest.raises(NotImplementedError):2736 store.select("df", "columns=['A'] | columns=['B']")2737 # in theory we could deal with this2738 with pytest.raises(NotImplementedError):2739 store.select("df", "columns=['A','B'] & columns=['C']")2740 def test_string_select(self, setup_path):2741 # GH 29732742 with ensure_clean_store(setup_path) as store:2743 df = tm.makeTimeDataFrame()2744 # test string ==/!=2745 df["x"] = "none"2746 df.loc[df.index[2:7], "x"] = ""2747 store.append("df", df, data_columns=["x"])2748 result = store.select("df", "x=none")2749 expected = df[df.x == "none"]2750 tm.assert_frame_equal(result, expected)2751 result = store.select("df", "x!=none")2752 expected = df[df.x != "none"]2753 tm.assert_frame_equal(result, expected)2754 df2 = df.copy()2755 df2.loc[df2.x == "", "x"] = np.nan2756 store.append("df2", df2, data_columns=["x"])2757 result = store.select("df2", "x!=none")2758 expected = df2[isna(df2.x)]2759 tm.assert_frame_equal(result, expected)2760 # int ==/!=2761 df["int"] = 12762 df.loc[df.index[2:7], "int"] = 22763 store.append("df3", df, data_columns=["int"])2764 result = store.select("df3", "int=2")2765 expected = df[df.int == 2]2766 tm.assert_frame_equal(result, expected)2767 result = store.select("df3", "int!=2")2768 expected = df[df.int != 2]2769 tm.assert_frame_equal(result, expected)2770 def test_read_column(self, setup_path):2771 df = tm.makeTimeDataFrame()2772 with ensure_clean_store(setup_path) as store:2773 _maybe_remove(store, "df")2774 # GH 179122775 # HDFStore.select_column should raise a KeyError2776 # exception if the key is not a valid store2777 with pytest.raises(KeyError, match="No object named df in the file"):2778 store.select_column("df", "index")2779 store.append("df", df)2780 # error2781 with pytest.raises(2782 KeyError, match=re.escape("'column [foo] not found in the table'")2783 ):2784 store.select_column("df", "foo")2785 with pytest.raises(Exception):2786 store.select_column("df", "index", where=["index>5"])2787 # valid2788 result = store.select_column("df", "index")2789 tm.assert_almost_equal(result.values, Series(df.index).values)2790 assert isinstance(result, Series)2791 # not a data indexable column2792 with pytest.raises(ValueError):2793 store.select_column("df", "values_block_0")2794 # a data column2795 df2 = df.copy()2796 df2["string"] = "foo"2797 store.append("df2", df2, data_columns=["string"])2798 result = store.select_column("df2", "string")2799 tm.assert_almost_equal(result.values, df2["string"].values)2800 # a data column with NaNs, result excludes the NaNs2801 df3 = df.copy()2802 df3["string"] = "foo"2803 df3.loc[df3.index[4:6], "string"] = np.nan2804 store.append("df3", df3, data_columns=["string"])2805 result = store.select_column("df3", "string")2806 tm.assert_almost_equal(result.values, df3["string"].values)2807 # start/stop2808 result = store.select_column("df3", "string", start=2)2809 tm.assert_almost_equal(result.values, df3["string"].values[2:])2810 result = store.select_column("df3", "string", start=-2)2811 tm.assert_almost_equal(result.values, df3["string"].values[-2:])2812 result = store.select_column("df3", "string", stop=2)2813 tm.assert_almost_equal(result.values, df3["string"].values[:2])2814 result = store.select_column("df3", "string", stop=-2)2815 tm.assert_almost_equal(result.values, df3["string"].values[:-2])2816 result = store.select_column("df3", "string", start=2, stop=-2)2817 tm.assert_almost_equal(result.values, df3["string"].values[2:-2])2818 result = store.select_column("df3", "string", start=-2, stop=2)2819 tm.assert_almost_equal(result.values, df3["string"].values[-2:2])2820 # GH 10392 - make sure column name is preserved2821 df4 = DataFrame({"A": np.random.randn(10), "B": "foo"})2822 store.append("df4", df4, data_columns=True)2823 expected = df4["B"]2824 result = store.select_column("df4", "B")2825 tm.assert_series_equal(result, expected)2826 def test_coordinates(self, setup_path):2827 df = tm.makeTimeDataFrame()2828 with ensure_clean_store(setup_path) as store:2829 _maybe_remove(store, "df")2830 store.append("df", df)2831 # all2832 c = store.select_as_coordinates("df")2833 assert (c.values == np.arange(len(df.index))).all()2834 # get coordinates back & test vs frame2835 _maybe_remove(store, "df")2836 df = DataFrame(dict(A=range(5), B=range(5)))2837 store.append("df", df)2838 c = store.select_as_coordinates("df", ["index<3"])2839 assert (c.values == np.arange(3)).all()2840 result = store.select("df", where=c)2841 expected = df.loc[0:2, :]2842 tm.assert_frame_equal(result, expected)2843 c = store.select_as_coordinates("df", ["index>=3", "index<=4"])2844 assert (c.values == np.arange(2) + 3).all()2845 result = store.select("df", where=c)2846 expected = df.loc[3:4, :]2847 tm.assert_frame_equal(result, expected)2848 assert isinstance(c, Index)2849 # multiple tables2850 _maybe_remove(store, "df1")2851 _maybe_remove(store, "df2")2852 df1 = tm.makeTimeDataFrame()2853 df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)2854 store.append("df1", df1, data_columns=["A", "B"])2855 store.append("df2", df2)2856 c = store.select_as_coordinates("df1", ["A>0", "B>0"])2857 df1_result = store.select("df1", c)2858 df2_result = store.select("df2", c)2859 result = concat([df1_result, df2_result], axis=1)2860 expected = concat([df1, df2], axis=1)2861 expected = expected[(expected.A > 0) & (expected.B > 0)]2862 tm.assert_frame_equal(result, expected)2863 # pass array/mask as the coordinates2864 with ensure_clean_store(setup_path) as store:2865 df = DataFrame(2866 np.random.randn(1000, 2), index=date_range("20000101", periods=1000)2867 )2868 store.append("df", df)2869 c = store.select_column("df", "index")2870 where = c[DatetimeIndex(c).month == 5].index2871 expected = df.iloc[where]2872 # locations2873 result = store.select("df", where=where)2874 tm.assert_frame_equal(result, expected)2875 # boolean2876 result = store.select("df", where=where)2877 tm.assert_frame_equal(result, expected)2878 # invalid2879 with pytest.raises(ValueError):2880 store.select("df", where=np.arange(len(df), dtype="float64"))2881 with pytest.raises(ValueError):2882 store.select("df", where=np.arange(len(df) + 1))2883 with pytest.raises(ValueError):2884 store.select("df", where=np.arange(len(df)), start=5)2885 with pytest.raises(ValueError):2886 store.select("df", where=np.arange(len(df)), start=5, stop=10)2887 # selection with filter2888 selection = date_range("20000101", periods=500)2889 result = store.select("df", where="index in selection")2890 expected = df[df.index.isin(selection)]2891 tm.assert_frame_equal(result, expected)2892 # list2893 df = DataFrame(np.random.randn(10, 2))2894 store.append("df2", df)2895 result = store.select("df2", where=[0, 3, 5])2896 expected = df.iloc[[0, 3, 5]]2897 tm.assert_frame_equal(result, expected)2898 # boolean2899 where = [True] * 102900 where[-2] = False2901 result = store.select("df2", where=where)2902 expected = df.loc[where]2903 tm.assert_frame_equal(result, expected)2904 # start/stop2905 result = store.select("df2", start=5, stop=10)2906 expected = df[5:10]2907 tm.assert_frame_equal(result, expected)2908 def test_append_to_multiple(self, setup_path):2909 df1 = tm.makeTimeDataFrame()2910 df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)2911 df2["foo"] = "bar"2912 df = concat([df1, df2], axis=1)2913 with ensure_clean_store(setup_path) as store:2914 # exceptions2915 with pytest.raises(ValueError):2916 store.append_to_multiple(2917 {"df1": ["A", "B"], "df2": None}, df, selector="df3"2918 )2919 with pytest.raises(ValueError):2920 store.append_to_multiple({"df1": None, "df2": None}, df, selector="df3")2921 with pytest.raises(ValueError):2922 store.append_to_multiple("df1", df, "df1")2923 # regular operation2924 store.append_to_multiple(2925 {"df1": ["A", "B"], "df2": None}, df, selector="df1"2926 )2927 result = store.select_as_multiple(2928 ["df1", "df2"], where=["A>0", "B>0"], selector="df1"2929 )2930 expected = df[(df.A > 0) & (df.B > 0)]2931 tm.assert_frame_equal(result, expected)2932 def test_append_to_multiple_dropna(self, setup_path):2933 df1 = tm.makeTimeDataFrame()2934 df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)2935 df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan2936 df = concat([df1, df2], axis=1)2937 with ensure_clean_store(setup_path) as store:2938 # dropna=True should guarantee rows are synchronized2939 store.append_to_multiple(2940 {"df1": ["A", "B"], "df2": None}, df, selector="df1", dropna=True2941 )2942 result = store.select_as_multiple(["df1", "df2"])2943 expected = df.dropna()2944 tm.assert_frame_equal(result, expected)2945 tm.assert_index_equal(store.select("df1").index, store.select("df2").index)2946 @pytest.mark.xfail(2947 run=False, reason="append_to_multiple_dropna_false is not raising as failed"2948 )2949 def test_append_to_multiple_dropna_false(self, setup_path):2950 df1 = tm.makeTimeDataFrame()2951 df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)2952 df1.iloc[1, df1.columns.get_indexer(["A", "B"])] = np.nan2953 df = concat([df1, df2], axis=1)2954 with ensure_clean_store(setup_path) as store:2955 # dropna=False shouldn't synchronize row indexes2956 store.append_to_multiple(2957 {"df1a": ["A", "B"], "df2a": None}, df, selector="df1a", dropna=False2958 )2959 with pytest.raises(ValueError):2960 store.select_as_multiple(["df1a", "df2a"])2961 assert not store.select("df1a").index.equals(store.select("df2a").index)2962 def test_append_to_multiple_min_itemsize(self, setup_path):2963 # GH 112382964 df = pd.DataFrame(2965 {2966 "IX": np.arange(1, 21),2967 "Num": np.arange(1, 21),2968 "BigNum": np.arange(1, 21) * 88,2969 "Str": ["a" for _ in range(20)],2970 "LongStr": ["abcde" for _ in range(20)],2971 }2972 )2973 expected = df.iloc[[0]]2974 with ensure_clean_store(setup_path) as store:2975 store.append_to_multiple(2976 {2977 "index": ["IX"],2978 "nums": ["Num", "BigNum"],2979 "strs": ["Str", "LongStr"],2980 },2981 df.iloc[[0]],2982 "index",2983 min_itemsize={"Str": 10, "LongStr": 100, "Num": 2},2984 )2985 result = store.select_as_multiple(["index", "nums", "strs"])2986 tm.assert_frame_equal(result, expected)2987 def test_select_as_multiple(self, setup_path):2988 df1 = tm.makeTimeDataFrame()2989 df2 = tm.makeTimeDataFrame().rename(columns="{}_2".format)2990 df2["foo"] = "bar"2991 with ensure_clean_store(setup_path) as store:2992 # no tables stored2993 with pytest.raises(Exception):2994 store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1")2995 store.append("df1", df1, data_columns=["A", "B"])2996 store.append("df2", df2)2997 # exceptions2998 with pytest.raises(Exception):2999 store.select_as_multiple(None, where=["A>0", "B>0"], selector="df1")3000 with pytest.raises(Exception):3001 store.select_as_multiple([None], where=["A>0", "B>0"], selector="df1")3002 msg = "'No object named df3 in the file'"3003 with pytest.raises(KeyError, match=msg):3004 store.select_as_multiple(3005 ["df1", "df3"], where=["A>0", "B>0"], selector="df1"3006 )3007 with pytest.raises(KeyError, match=msg):3008 store.select_as_multiple(["df3"], where=["A>0", "B>0"], selector="df1")3009 with pytest.raises(KeyError, match="'No object named df4 in the file'"):3010 store.select_as_multiple(3011 ["df1", "df2"], where=["A>0", "B>0"], selector="df4"3012 )3013 # default select3014 result = store.select("df1", ["A>0", "B>0"])3015 expected = store.select_as_multiple(3016 ["df1"], where=["A>0", "B>0"], selector="df1"3017 )3018 tm.assert_frame_equal(result, expected)3019 expected = store.select_as_multiple(3020 "df1", where=["A>0", "B>0"], selector="df1"3021 )3022 tm.assert_frame_equal(result, expected)3023 # multiple3024 result = store.select_as_multiple(3025 ["df1", "df2"], where=["A>0", "B>0"], selector="df1"3026 )3027 expected = concat([df1, df2], axis=1)3028 expected = expected[(expected.A > 0) & (expected.B > 0)]3029 tm.assert_frame_equal(result, expected)3030 # multiple (diff selector)3031 result = store.select_as_multiple(3032 ["df1", "df2"], where="index>df2.index[4]", selector="df2"3033 )3034 expected = concat([df1, df2], axis=1)3035 expected = expected[5:]3036 tm.assert_frame_equal(result, expected)3037 # test exception for diff rows3038 store.append("df3", tm.makeTimeDataFrame(nper=50))3039 with pytest.raises(ValueError):3040 store.select_as_multiple(3041 ["df1", "df3"], where=["A>0", "B>0"], selector="df1"3042 )3043 @pytest.mark.skipif(3044 LooseVersion(tables.__version__) < LooseVersion("3.1.0"),3045 reason=("tables version does not support fix for nan selection bug: GH 4858"),3046 )3047 def test_nan_selection_bug_4858(self, setup_path):3048 with ensure_clean_store(setup_path) as store:3049 df = DataFrame(dict(cols=range(6), values=range(6)), dtype="float64")3050 df["cols"] = (df["cols"] + 10).apply(str)3051 df.iloc[0] = np.nan3052 expected = DataFrame(3053 dict(cols=["13.0", "14.0", "15.0"], values=[3.0, 4.0, 5.0]),3054 index=[3, 4, 5],3055 )3056 # write w/o the index on that particular column3057 store.append("df", df, data_columns=True, index=["cols"])3058 result = store.select("df", where="values>2.0")3059 tm.assert_frame_equal(result, expected)3060 def test_start_stop_table(self, setup_path):3061 with ensure_clean_store(setup_path) as store:3062 # table3063 df = DataFrame(dict(A=np.random.rand(20), B=np.random.rand(20)))3064 store.append("df", df)3065 result = store.select("df", "columns=['A']", start=0, stop=5)3066 expected = df.loc[0:4, ["A"]]3067 tm.assert_frame_equal(result, expected)3068 # out of range3069 result = store.select("df", "columns=['A']", start=30, stop=40)3070 assert len(result) == 03071 expected = df.loc[30:40, ["A"]]3072 tm.assert_frame_equal(result, expected)3073 def test_start_stop_multiple(self, setup_path):3074 # GH 162093075 with ensure_clean_store(setup_path) as store:3076 df = DataFrame({"foo": [1, 2], "bar": [1, 2]})3077 store.append_to_multiple(3078 {"selector": ["foo"], "data": None}, df, selector="selector"3079 )3080 result = store.select_as_multiple(3081 ["selector", "data"], selector="selector", start=0, stop=13082 )3083 expected = df.loc[[0], ["foo", "bar"]]3084 tm.assert_frame_equal(result, expected)3085 def test_start_stop_fixed(self, setup_path):3086 with ensure_clean_store(setup_path) as store:3087 # fixed, GH 82873088 df = DataFrame(3089 dict(A=np.random.rand(20), B=np.random.rand(20)),3090 index=pd.date_range("20130101", periods=20),3091 )3092 store.put("df", df)3093 result = store.select("df", start=0, stop=5)3094 expected = df.iloc[0:5, :]3095 tm.assert_frame_equal(result, expected)3096 result = store.select("df", start=5, stop=10)3097 expected = df.iloc[5:10, :]3098 tm.assert_frame_equal(result, expected)3099 # out of range3100 result = store.select("df", start=30, stop=40)3101 expected = df.iloc[30:40, :]3102 tm.assert_frame_equal(result, expected)3103 # series3104 s = df.A3105 store.put("s", s)3106 result = store.select("s", start=0, stop=5)3107 expected = s.iloc[0:5]3108 tm.assert_series_equal(result, expected)3109 result = store.select("s", start=5, stop=10)3110 expected = s.iloc[5:10]3111 tm.assert_series_equal(result, expected)3112 # sparse; not implemented3113 df = tm.makeDataFrame()3114 df.iloc[3:5, 1:3] = np.nan3115 df.iloc[8:10, -2] = np.nan3116 def test_select_filter_corner(self, setup_path):3117 df = DataFrame(np.random.randn(50, 100))3118 df.index = [f"{c:3d}" for c in df.index]3119 df.columns = [f"{c:3d}" for c in df.columns]3120 with ensure_clean_store(setup_path) as store:3121 store.put("frame", df, format="table")3122 crit = "columns=df.columns[:75]"3123 result = store.select("frame", [crit])3124 tm.assert_frame_equal(result, df.loc[:, df.columns[:75]])3125 crit = "columns=df.columns[:75:2]"3126 result = store.select("frame", [crit])3127 tm.assert_frame_equal(result, df.loc[:, df.columns[:75:2]])3128 def test_path_pathlib(self, setup_path):3129 df = tm.makeDataFrame()3130 result = tm.round_trip_pathlib(3131 lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df")3132 )3133 tm.assert_frame_equal(df, result)3134 @pytest.mark.parametrize("start, stop", [(0, 2), (1, 2), (None, None)])3135 def test_contiguous_mixed_data_table(self, start, stop, setup_path):3136 # GH 170213137 # ValueError when reading a contiguous mixed-data table ft. VLArray3138 df = DataFrame(3139 {3140 "a": Series([20111010, 20111011, 20111012]),3141 "b": Series(["ab", "cd", "ab"]),3142 }3143 )3144 with ensure_clean_store(setup_path) as store:3145 store.append("test_dataset", df)3146 result = store.select("test_dataset", start=start, stop=stop)3147 tm.assert_frame_equal(df[start:stop], result)3148 def test_path_pathlib_hdfstore(self, setup_path):3149 df = tm.makeDataFrame()3150 def writer(path):3151 with pd.HDFStore(path) as store:3152 df.to_hdf(store, "df")3153 def reader(path):3154 with pd.HDFStore(path) as store:3155 return pd.read_hdf(store, "df")3156 result = tm.round_trip_pathlib(writer, reader)3157 tm.assert_frame_equal(df, result)3158 def test_pickle_path_localpath(self, setup_path):3159 df = tm.makeDataFrame()3160 result = tm.round_trip_pathlib(3161 lambda p: df.to_hdf(p, "df"), lambda p: pd.read_hdf(p, "df")3162 )3163 tm.assert_frame_equal(df, result)3164 def test_path_localpath_hdfstore(self, setup_path):3165 df = tm.makeDataFrame()3166 def writer(path):3167 with pd.HDFStore(path) as store:3168 df.to_hdf(store, "df")3169 def reader(path):3170 with pd.HDFStore(path) as store:3171 return pd.read_hdf(store, "df")3172 result = tm.round_trip_localpath(writer, reader)3173 tm.assert_frame_equal(df, result)3174 def _check_roundtrip(self, obj, comparator, path, compression=False, **kwargs):3175 options = {}3176 if compression:3177 options["complib"] = _default_compressor3178 with ensure_clean_store(path, "w", **options) as store:3179 store["obj"] = obj3180 retrieved = store["obj"]3181 comparator(retrieved, obj, **kwargs)3182 def _check_double_roundtrip(3183 self, obj, comparator, path, compression=False, **kwargs3184 ):3185 options = {}3186 if compression:3187 options["complib"] = compression or _default_compressor3188 with ensure_clean_store(path, "w", **options) as store:3189 store["obj"] = obj3190 retrieved = store["obj"]3191 comparator(retrieved, obj, **kwargs)3192 store["obj"] = retrieved3193 again = store["obj"]3194 comparator(again, obj, **kwargs)3195 def _check_roundtrip_table(self, obj, comparator, path, compression=False):3196 options = {}3197 if compression:3198 options["complib"] = _default_compressor3199 with ensure_clean_store(path, "w", **options) as store:3200 store.put("obj", obj, format="table")3201 retrieved = store["obj"]3202 comparator(retrieved, obj)3203 def test_multiple_open_close(self, setup_path):3204 # gh-4409: open & close multiple times3205 with ensure_clean_path(setup_path) as path:3206 df = tm.makeDataFrame()3207 df.to_hdf(path, "df", mode="w", format="table")3208 # single3209 store = HDFStore(path)3210 assert "CLOSED" not in store.info()3211 assert store.is_open3212 store.close()3213 assert "CLOSED" in store.info()3214 assert not store.is_open3215 with ensure_clean_path(setup_path) as path:3216 if pytables._table_file_open_policy_is_strict:3217 # multiples3218 store1 = HDFStore(path)3219 with pytest.raises(ValueError):3220 HDFStore(path)3221 store1.close()3222 else:3223 # multiples3224 store1 = HDFStore(path)3225 store2 = HDFStore(path)3226 assert "CLOSED" not in store1.info()3227 assert "CLOSED" not in store2.info()3228 assert store1.is_open3229 assert store2.is_open3230 store1.close()3231 assert "CLOSED" in store1.info()3232 assert not store1.is_open3233 assert "CLOSED" not in store2.info()3234 assert store2.is_open3235 store2.close()3236 assert "CLOSED" in store1.info()3237 assert "CLOSED" in store2.info()3238 assert not store1.is_open3239 assert not store2.is_open3240 # nested close3241 store = HDFStore(path, mode="w")3242 store.append("df", df)3243 store2 = HDFStore(path)3244 store2.append("df2", df)3245 store2.close()3246 assert "CLOSED" in store2.info()3247 assert not store2.is_open3248 store.close()3249 assert "CLOSED" in store.info()3250 assert not store.is_open3251 # double closing3252 store = HDFStore(path, mode="w")3253 store.append("df", df)3254 store2 = HDFStore(path)3255 store.close()3256 assert "CLOSED" in store.info()3257 assert not store.is_open3258 store2.close()3259 assert "CLOSED" in store2.info()3260 assert not store2.is_open3261 # ops on a closed store3262 with ensure_clean_path(setup_path) as path:3263 df = tm.makeDataFrame()3264 df.to_hdf(path, "df", mode="w", format="table")3265 store = HDFStore(path)3266 store.close()3267 with pytest.raises(ClosedFileError):3268 store.keys()3269 with pytest.raises(ClosedFileError):3270 "df" in store3271 with pytest.raises(ClosedFileError):3272 len(store)3273 with pytest.raises(ClosedFileError):3274 store["df"]3275 with pytest.raises(AttributeError):3276 store.df3277 with pytest.raises(ClosedFileError):3278 store.select("df")3279 with pytest.raises(ClosedFileError):3280 store.get("df")3281 with pytest.raises(ClosedFileError):3282 store.append("df2", df)3283 with pytest.raises(ClosedFileError):3284 store.put("df3", df)3285 with pytest.raises(ClosedFileError):3286 store.get_storer("df2")3287 with pytest.raises(ClosedFileError):3288 store.remove("df2")3289 with pytest.raises(ClosedFileError, match="file is not open"):3290 store.select("df")3291 def test_pytables_native_read(self, datapath, setup_path):3292 with ensure_clean_store(3293 datapath("io", "data", "legacy_hdf/pytables_native.h5"), mode="r"3294 ) as store:3295 d2 = store["detector/readout"]3296 assert isinstance(d2, DataFrame)3297 @pytest.mark.skipif(3298 is_platform_windows(), reason="native2 read fails oddly on windows"3299 )3300 def test_pytables_native2_read(self, datapath, setup_path):3301 with ensure_clean_store(3302 datapath("io", "data", "legacy_hdf", "pytables_native2.h5"), mode="r"3303 ) as store:3304 str(store)3305 d1 = store["detector"]3306 assert isinstance(d1, DataFrame)3307 @td.xfail_non_writeable3308 def test_legacy_table_fixed_format_read_py2(self, datapath, setup_path):3309 # GH 245103310 # legacy table with fixed format written in Python 23311 with ensure_clean_store(3312 datapath("io", "data", "legacy_hdf", "legacy_table_fixed_py2.h5"), mode="r"3313 ) as store:3314 result = store.select("df")3315 expected = pd.DataFrame(3316 [[1, 2, 3, "D"]],3317 columns=["A", "B", "C", "D"],3318 index=pd.Index(["ABC"], name="INDEX_NAME"),3319 )3320 tm.assert_frame_equal(expected, result)3321 def test_legacy_table_fixed_format_read_datetime_py2(self, datapath, setup_path):3322 # GH 317503323 # legacy table with fixed format and datetime64 column written in Python 23324 with ensure_clean_store(3325 datapath("io", "data", "legacy_hdf", "legacy_table_fixed_datetime_py2.h5"),3326 mode="r",3327 ) as store:3328 result = store.select("df")3329 expected = pd.DataFrame(3330 [[pd.Timestamp("2020-02-06T18:00")]],3331 columns=["A"],3332 index=pd.Index(["date"]),3333 )3334 tm.assert_frame_equal(expected, result)3335 def test_legacy_table_read_py2(self, datapath, setup_path):3336 # issue: 249253337 # legacy table written in Python 23338 with ensure_clean_store(3339 datapath("io", "data", "legacy_hdf", "legacy_table_py2.h5"), mode="r"3340 ) as store:3341 result = store.select("table")3342 expected = pd.DataFrame({"a": ["a", "b"], "b": [2, 3]})3343 tm.assert_frame_equal(expected, result)3344 def test_copy(self, setup_path):3345 with catch_warnings(record=True):3346 def do_copy(f, new_f=None, keys=None, propindexes=True, **kwargs):3347 try:3348 store = HDFStore(f, "r")3349 if new_f is None:3350 import tempfile3351 fd, new_f = tempfile.mkstemp()3352 tstore = store.copy(3353 new_f, keys=keys, propindexes=propindexes, **kwargs3354 )3355 # check keys3356 if keys is None:3357 keys = store.keys()3358 assert set(keys) == set(tstore.keys())3359 # check indices & nrows3360 for k in tstore.keys():3361 if tstore.get_storer(k).is_table:3362 new_t = tstore.get_storer(k)3363 orig_t = store.get_storer(k)3364 assert orig_t.nrows == new_t.nrows3365 # check propindixes3366 if propindexes:3367 for a in orig_t.axes:3368 if a.is_indexed:3369 assert new_t[a.name].is_indexed3370 finally:3371 safe_close(store)3372 safe_close(tstore)3373 try:3374 os.close(fd)3375 except (OSError, ValueError):3376 pass3377 safe_remove(new_f)3378 # new table3379 df = tm.makeDataFrame()3380 try:3381 path = create_tempfile(setup_path)3382 st = HDFStore(path)3383 st.append("df", df, data_columns=["A"])3384 st.close()3385 do_copy(f=path)3386 do_copy(f=path, propindexes=False)3387 finally:3388 safe_remove(path)3389 def test_store_datetime_fractional_secs(self, setup_path):3390 with ensure_clean_store(setup_path) as store:3391 dt = datetime.datetime(2012, 1, 2, 3, 4, 5, 123456)3392 series = Series([0], [dt])3393 store["a"] = series3394 assert store["a"].index[0] == dt3395 def test_tseries_indices_series(self, setup_path):3396 with ensure_clean_store(setup_path) as store:3397 idx = tm.makeDateIndex(10)3398 ser = Series(np.random.randn(len(idx)), idx)3399 store["a"] = ser3400 result = store["a"]3401 tm.assert_series_equal(result, ser)3402 assert result.index.freq == ser.index.freq3403 tm.assert_class_equal(result.index, ser.index, obj="series index")3404 idx = tm.makePeriodIndex(10)3405 ser = Series(np.random.randn(len(idx)), idx)3406 store["a"] = ser3407 result = store["a"]3408 tm.assert_series_equal(result, ser)3409 assert result.index.freq == ser.index.freq3410 tm.assert_class_equal(result.index, ser.index, obj="series index")3411 def test_tseries_indices_frame(self, setup_path):3412 with ensure_clean_store(setup_path) as store:3413 idx = tm.makeDateIndex(10)3414 df = DataFrame(np.random.randn(len(idx), 3), index=idx)3415 store["a"] = df3416 result = store["a"]3417 tm.assert_frame_equal(result, df)3418 assert result.index.freq == df.index.freq3419 tm.assert_class_equal(result.index, df.index, obj="dataframe index")3420 idx = tm.makePeriodIndex(10)3421 df = DataFrame(np.random.randn(len(idx), 3), idx)3422 store["a"] = df3423 result = store["a"]3424 tm.assert_frame_equal(result, df)3425 assert result.index.freq == df.index.freq3426 tm.assert_class_equal(result.index, df.index, obj="dataframe index")3427 def test_unicode_index(self, setup_path):3428 unicode_values = ["\u03c3", "\u03c3\u03c3"]3429 # PerformanceWarning3430 with catch_warnings(record=True):3431 simplefilter("ignore", pd.errors.PerformanceWarning)3432 s = Series(np.random.randn(len(unicode_values)), unicode_values)3433 self._check_roundtrip(s, tm.assert_series_equal, path=setup_path)3434 def test_unicode_longer_encoded(self, setup_path):3435 # GH 112343436 char = "\u0394"3437 df = pd.DataFrame({"A": [char]})3438 with ensure_clean_store(setup_path) as store:3439 store.put("df", df, format="table", encoding="utf-8")3440 result = store.get("df")3441 tm.assert_frame_equal(result, df)3442 df = pd.DataFrame({"A": ["a", char], "B": ["b", "b"]})3443 with ensure_clean_store(setup_path) as store:3444 store.put("df", df, format="table", encoding="utf-8")3445 result = store.get("df")3446 tm.assert_frame_equal(result, df)3447 @td.xfail_non_writeable3448 def test_store_datetime_mixed(self, setup_path):3449 df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]})3450 ts = tm.makeTimeSeries()3451 df["d"] = ts.index[:3]3452 self._check_roundtrip(df, tm.assert_frame_equal, path=setup_path)3453 # FIXME: don't leave commented-out code3454 # def test_cant_write_multiindex_table(self):3455 # # for now, #18483456 # df = DataFrame(np.random.randn(10, 4),3457 # index=[np.arange(5).repeat(2),3458 # np.tile(np.arange(2), 5)])3459 #3460 # with pytest.raises(Exception):3461 # store.put('foo', df, format='table')3462 def test_append_with_diff_col_name_types_raises_value_error(self, setup_path):3463 df = DataFrame(np.random.randn(10, 1))3464 df2 = DataFrame({"a": np.random.randn(10)})3465 df3 = DataFrame({(1, 2): np.random.randn(10)})3466 df4 = DataFrame({("1", 2): np.random.randn(10)})3467 df5 = DataFrame({("1", 2, object): np.random.randn(10)})3468 with ensure_clean_store(setup_path) as store:3469 name = f"df_{tm.rands(10)}"3470 store.append(name, df)3471 for d in (df2, df3, df4, df5):3472 with pytest.raises(ValueError):3473 store.append(name, d)3474 def test_query_with_nested_special_character(self, setup_path):3475 df = DataFrame(3476 {3477 "a": ["a", "a", "c", "b", "test & test", "c", "b", "e"],3478 "b": [1, 2, 3, 4, 5, 6, 7, 8],3479 }3480 )3481 expected = df[df.a == "test & test"]3482 with ensure_clean_store(setup_path) as store:3483 store.append("test", df, format="table", data_columns=True)3484 result = store.select("test", 'a = "test & test"')3485 tm.assert_frame_equal(expected, result)3486 def test_categorical(self, setup_path):3487 with ensure_clean_store(setup_path) as store:3488 # Basic3489 _maybe_remove(store, "s")3490 s = Series(3491 Categorical(3492 ["a", "b", "b", "a", "a", "c"],3493 categories=["a", "b", "c", "d"],3494 ordered=False,3495 )3496 )3497 store.append("s", s, format="table")3498 result = store.select("s")3499 tm.assert_series_equal(s, result)3500 _maybe_remove(store, "s_ordered")3501 s = Series(3502 Categorical(3503 ["a", "b", "b", "a", "a", "c"],3504 categories=["a", "b", "c", "d"],3505 ordered=True,3506 )3507 )3508 store.append("s_ordered", s, format="table")3509 result = store.select("s_ordered")3510 tm.assert_series_equal(s, result)3511 _maybe_remove(store, "df")3512 df = DataFrame({"s": s, "vals": [1, 2, 3, 4, 5, 6]})3513 store.append("df", df, format="table")3514 result = store.select("df")3515 tm.assert_frame_equal(result, df)3516 # Dtypes3517 _maybe_remove(store, "si")3518 s = Series([1, 1, 2, 2, 3, 4, 5]).astype("category")3519 store.append("si", s)3520 result = store.select("si")3521 tm.assert_series_equal(result, s)3522 _maybe_remove(store, "si2")3523 s = Series([1, 1, np.nan, 2, 3, 4, 5]).astype("category")3524 store.append("si2", s)3525 result = store.select("si2")3526 tm.assert_series_equal(result, s)3527 # Multiple3528 _maybe_remove(store, "df2")3529 df2 = df.copy()3530 df2["s2"] = Series(list("abcdefg")).astype("category")3531 store.append("df2", df2)3532 result = store.select("df2")3533 tm.assert_frame_equal(result, df2)3534 # Make sure the metadata is OK3535 info = store.info()3536 assert "/df2 " in info3537 # assert '/df2/meta/values_block_0/meta' in info3538 assert "/df2/meta/values_block_1/meta" in info3539 # unordered3540 _maybe_remove(store, "s2")3541 s = Series(3542 Categorical(3543 ["a", "b", "b", "a", "a", "c"],3544 categories=["a", "b", "c", "d"],3545 ordered=False,3546 )3547 )3548 store.append("s2", s, format="table")3549 result = store.select("s2")3550 tm.assert_series_equal(result, s)3551 # Query3552 _maybe_remove(store, "df3")3553 store.append("df3", df, data_columns=["s"])3554 expected = df[df.s.isin(["b", "c"])]3555 result = store.select("df3", where=['s in ["b","c"]'])3556 tm.assert_frame_equal(result, expected)3557 expected = df[df.s.isin(["b", "c"])]3558 result = store.select("df3", where=['s = ["b","c"]'])3559 tm.assert_frame_equal(result, expected)3560 expected = df[df.s.isin(["d"])]3561 result = store.select("df3", where=['s in ["d"]'])3562 tm.assert_frame_equal(result, expected)3563 expected = df[df.s.isin(["f"])]3564 result = store.select("df3", where=['s in ["f"]'])3565 tm.assert_frame_equal(result, expected)3566 # Appending with same categories is ok3567 store.append("df3", df)3568 df = concat([df, df])3569 expected = df[df.s.isin(["b", "c"])]3570 result = store.select("df3", where=['s in ["b","c"]'])3571 tm.assert_frame_equal(result, expected)3572 # Appending must have the same categories3573 df3 = df.copy()3574 df3["s"].cat.remove_unused_categories(inplace=True)3575 with pytest.raises(ValueError):3576 store.append("df3", df3)3577 # Remove, and make sure meta data is removed (its a recursive3578 # removal so should be).3579 result = store.select("df3/meta/s/meta")3580 assert result is not None3581 store.remove("df3")3582 with pytest.raises(3583 KeyError, match="'No object named df3/meta/s/meta in the file'"3584 ):3585 store.select("df3/meta/s/meta")3586 def test_categorical_conversion(self, setup_path):3587 # GH133223588 # Check that read_hdf with categorical columns doesn't return rows if3589 # where criteria isn't met.3590 obsids = ["ESP_012345_6789", "ESP_987654_3210"]3591 imgids = ["APF00006np", "APF0001imm"]3592 data = [4.3, 9.8]3593 # Test without categories3594 df = DataFrame(dict(obsids=obsids, imgids=imgids, data=data))3595 # We are expecting an empty DataFrame matching types of df3596 expected = df.iloc[[], :]3597 with ensure_clean_path(setup_path) as path:3598 df.to_hdf(path, "df", format="table", data_columns=True)3599 result = read_hdf(path, "df", where="obsids=B")3600 tm.assert_frame_equal(result, expected)3601 # Test with categories3602 df.obsids = df.obsids.astype("category")3603 df.imgids = df.imgids.astype("category")3604 # We are expecting an empty DataFrame matching types of df3605 expected = df.iloc[[], :]3606 with ensure_clean_path(setup_path) as path:3607 df.to_hdf(path, "df", format="table", data_columns=True)3608 result = read_hdf(path, "df", where="obsids=B")3609 tm.assert_frame_equal(result, expected)3610 def test_categorical_nan_only_columns(self, setup_path):3611 # GH184133612 # Check that read_hdf with categorical columns with NaN-only values can3613 # be read back.3614 df = pd.DataFrame(3615 {3616 "a": ["a", "b", "c", np.nan],3617 "b": [np.nan, np.nan, np.nan, np.nan],3618 "c": [1, 2, 3, 4],3619 "d": pd.Series([None] * 4, dtype=object),3620 }3621 )3622 df["a"] = df.a.astype("category")3623 df["b"] = df.b.astype("category")3624 df["d"] = df.b.astype("category")3625 expected = df3626 with ensure_clean_path(setup_path) as path:3627 df.to_hdf(path, "df", format="table", data_columns=True)3628 result = read_hdf(path, "df")3629 tm.assert_frame_equal(result, expected)3630 def test_duplicate_column_name(self, setup_path):3631 df = DataFrame(columns=["a", "a"], data=[[0, 0]])3632 with ensure_clean_path(setup_path) as path:3633 with pytest.raises(ValueError):3634 df.to_hdf(path, "df", format="fixed")3635 df.to_hdf(path, "df", format="table")3636 other = read_hdf(path, "df")3637 tm.assert_frame_equal(df, other)3638 assert df.equals(other)3639 assert other.equals(df)3640 def test_round_trip_equals(self, setup_path):3641 # GH 93303642 df = DataFrame({"B": [1, 2], "A": ["x", "y"]})3643 with ensure_clean_path(setup_path) as path:3644 df.to_hdf(path, "df", format="table")3645 other = read_hdf(path, "df")3646 tm.assert_frame_equal(df, other)3647 assert df.equals(other)3648 assert other.equals(df)3649 def test_preserve_timedeltaindex_type(self, setup_path):3650 # GH96353651 # Storing TimedeltaIndexed DataFrames in fixed stores did not preserve3652 # the type of the index.3653 df = DataFrame(np.random.normal(size=(10, 5)))3654 df.index = timedelta_range(start="0s", periods=10, freq="1s", name="example")3655 with ensure_clean_store(setup_path) as store:3656 store["df"] = df3657 tm.assert_frame_equal(store["df"], df)3658 def test_columns_multiindex_modified(self, setup_path):3659 # BUG: 72123660 # read_hdf store.select modified the passed columns parameters3661 # when multi-indexed.3662 df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3663 df.index.name = "letters"3664 df = df.set_index(keys="E", append=True)3665 data_columns = df.index.names + df.columns.tolist()3666 with ensure_clean_path(setup_path) as path:3667 df.to_hdf(3668 path,3669 "df",3670 mode="a",3671 append=True,3672 data_columns=data_columns,3673 index=False,3674 )3675 cols2load = list("BCD")3676 cols2load_original = list(cols2load)3677 df_loaded = read_hdf(path, "df", columns=cols2load) # noqa3678 assert cols2load_original == cols2load3679 @ignore_natural_naming_warning3680 def test_to_hdf_with_object_column_names(self, setup_path):3681 # GH90573682 # Writing HDF5 table format should only work for string-like3683 # column types3684 types_should_fail = [3685 tm.makeIntIndex,3686 tm.makeFloatIndex,3687 tm.makeDateIndex,3688 tm.makeTimedeltaIndex,3689 tm.makePeriodIndex,3690 ]3691 types_should_run = [3692 tm.makeStringIndex,3693 tm.makeCategoricalIndex,3694 tm.makeUnicodeIndex,3695 ]3696 for index in types_should_fail:3697 df = DataFrame(np.random.randn(10, 2), columns=index(2))3698 with ensure_clean_path(setup_path) as path:3699 with catch_warnings(record=True):3700 msg = "cannot have non-object label DataIndexableCol"3701 with pytest.raises(ValueError, match=msg):3702 df.to_hdf(path, "df", format="table", data_columns=True)3703 for index in types_should_run:3704 df = DataFrame(np.random.randn(10, 2), columns=index(2))3705 with ensure_clean_path(setup_path) as path:3706 with catch_warnings(record=True):3707 df.to_hdf(path, "df", format="table", data_columns=True)3708 result = pd.read_hdf(path, "df", where=f"index = [{df.index[0]}]")3709 assert len(result)3710 def test_read_hdf_open_store(self, setup_path):3711 # GH103303712 # No check for non-string path_or-buf, and no test of open store3713 df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3714 df.index.name = "letters"3715 df = df.set_index(keys="E", append=True)3716 with ensure_clean_path(setup_path) as path:3717 df.to_hdf(path, "df", mode="w")3718 direct = read_hdf(path, "df")3719 store = HDFStore(path, mode="r")3720 indirect = read_hdf(store, "df")3721 tm.assert_frame_equal(direct, indirect)3722 assert store.is_open3723 store.close()3724 def test_read_hdf_iterator(self, setup_path):3725 df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3726 df.index.name = "letters"3727 df = df.set_index(keys="E", append=True)3728 with ensure_clean_path(setup_path) as path:3729 df.to_hdf(path, "df", mode="w", format="t")3730 direct = read_hdf(path, "df")3731 iterator = read_hdf(path, "df", iterator=True)3732 assert isinstance(iterator, TableIterator)3733 indirect = next(iterator.__iter__())3734 tm.assert_frame_equal(direct, indirect)3735 iterator.store.close()3736 def test_read_hdf_errors(self, setup_path):3737 df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3738 with ensure_clean_path(setup_path) as path:3739 with pytest.raises(IOError):3740 read_hdf(path, "key")3741 df.to_hdf(path, "df")3742 store = HDFStore(path, mode="r")3743 store.close()3744 with pytest.raises(IOError):3745 read_hdf(store, "df")3746 def test_read_hdf_generic_buffer_errors(self):3747 with pytest.raises(NotImplementedError):3748 read_hdf(BytesIO(b""), "df")3749 def test_invalid_complib(self, setup_path):3750 df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3751 with ensure_clean_path(setup_path) as path:3752 with pytest.raises(ValueError):3753 df.to_hdf(path, "df", complib="foolib")3754 # GH104433755 def test_read_nokey(self, setup_path):3756 df = DataFrame(np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE"))3757 # Categorical dtype not supported for "fixed" format. So no need3758 # to test with that dtype in the dataframe here.3759 with ensure_clean_path(setup_path) as path:3760 df.to_hdf(path, "df", mode="a")3761 reread = read_hdf(path)3762 tm.assert_frame_equal(df, reread)3763 df.to_hdf(path, "df2", mode="a")3764 with pytest.raises(ValueError):3765 read_hdf(path)3766 def test_read_nokey_table(self, setup_path):3767 # GH132313768 df = DataFrame({"i": range(5), "c": Series(list("abacd"), dtype="category")})3769 with ensure_clean_path(setup_path) as path:3770 df.to_hdf(path, "df", mode="a", format="table")3771 reread = read_hdf(path)3772 tm.assert_frame_equal(df, reread)3773 df.to_hdf(path, "df2", mode="a", format="table")3774 with pytest.raises(ValueError):3775 read_hdf(path)3776 def test_read_nokey_empty(self, setup_path):3777 with ensure_clean_path(setup_path) as path:3778 store = HDFStore(path)3779 store.close()3780 with pytest.raises(ValueError):3781 read_hdf(path)3782 def test_read_from_pathlib_path(self, setup_path):3783 # GH117733784 expected = DataFrame(3785 np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")3786 )3787 with ensure_clean_path(setup_path) as filename:3788 path_obj = Path(filename)3789 expected.to_hdf(path_obj, "df", mode="a")3790 actual = read_hdf(path_obj, "df")3791 tm.assert_frame_equal(expected, actual)3792 @td.skip_if_no("py.path")3793 def test_read_from_py_localpath(self, setup_path):3794 # GH117733795 from py.path import local as LocalPath3796 expected = DataFrame(3797 np.random.rand(4, 5), index=list("abcd"), columns=list("ABCDE")3798 )3799 with ensure_clean_path(setup_path) as filename:3800 path_obj = LocalPath(filename)3801 expected.to_hdf(path_obj, "df", mode="a")3802 actual = read_hdf(path_obj, "df")3803 tm.assert_frame_equal(expected, actual)3804 def test_query_long_float_literal(self, setup_path):3805 # GH 142413806 df = pd.DataFrame({"A": [1000000000.0009, 1000000000.0011, 1000000000.0015]})3807 with ensure_clean_store(setup_path) as store:3808 store.append("test", df, format="table", data_columns=True)3809 cutoff = 1000000000.00063810 result = store.select("test", f"A < {cutoff:.4f}")3811 assert result.empty3812 cutoff = 1000000000.00103813 result = store.select("test", f"A > {cutoff:.4f}")3814 expected = df.loc[[1, 2], :]3815 tm.assert_frame_equal(expected, result)3816 exact = 1000000000.00113817 result = store.select("test", f"A == {exact:.4f}")3818 expected = df.loc[[1], :]3819 tm.assert_frame_equal(expected, result)3820 def test_query_compare_column_type(self, setup_path):3821 # GH 154923822 df = pd.DataFrame(3823 {3824 "date": ["2014-01-01", "2014-01-02"],3825 "real_date": date_range("2014-01-01", periods=2),3826 "float": [1.1, 1.2],3827 "int": [1, 2],3828 },3829 columns=["date", "real_date", "float", "int"],3830 )3831 with ensure_clean_store(setup_path) as store:3832 store.append("test", df, format="table", data_columns=True)3833 ts = pd.Timestamp("2014-01-01") # noqa3834 result = store.select("test", where="real_date > ts")3835 expected = df.loc[[1], :]3836 tm.assert_frame_equal(expected, result)3837 for op in ["<", ">", "=="]:3838 # non strings to string column always fail3839 for v in [2.1, True, pd.Timestamp("2014-01-01"), pd.Timedelta(1, "s")]:3840 query = f"date {op} v"3841 with pytest.raises(TypeError):3842 store.select("test", where=query)3843 # strings to other columns must be convertible to type3844 v = "a"3845 for col in ["int", "float", "real_date"]:3846 query = f"{col} {op} v"3847 with pytest.raises(ValueError):3848 store.select("test", where=query)3849 for v, col in zip(3850 ["1", "1.1", "2014-01-01"], ["int", "float", "real_date"]3851 ):3852 query = f"{col} {op} v"3853 result = store.select("test", where=query)3854 if op == "==":3855 expected = df.loc[[0], :]3856 elif op == ">":3857 expected = df.loc[[1], :]3858 else:3859 expected = df.loc[[], :]3860 tm.assert_frame_equal(expected, result)3861 @pytest.mark.parametrize("format", ["fixed", "table"])3862 def test_read_hdf_series_mode_r(self, format, setup_path):3863 # GH 165833864 # Tests that reading a Series saved to an HDF file3865 # still works if a mode='r' argument is supplied3866 series = tm.makeFloatSeries()3867 with ensure_clean_path(setup_path) as path:3868 series.to_hdf(path, key="data", format=format)3869 result = pd.read_hdf(path, key="data", mode="r")3870 tm.assert_series_equal(result, series)3871 def test_fspath(self):3872 with tm.ensure_clean("foo.h5") as path:3873 with pd.HDFStore(path) as store:3874 assert os.fspath(store) == str(path)3875 def test_read_py2_hdf_file_in_py3(self, datapath):3876 # GH 167813877 # tests reading a PeriodIndex DataFrame written in Python2 in Python33878 # the file was generated in Python 2.7 like so:3879 #3880 # df = pd.DataFrame([1.,2,3], index=pd.PeriodIndex(3881 # ['2015-01-01', '2015-01-02', '2015-01-05'], freq='B'))3882 # df.to_hdf('periodindex_0.20.1_x86_64_darwin_2.7.13.h5', 'p')3883 expected = pd.DataFrame(3884 [1.0, 2, 3],3885 index=pd.PeriodIndex(["2015-01-01", "2015-01-02", "2015-01-05"], freq="B"),3886 )3887 with ensure_clean_store(3888 datapath(3889 "io", "data", "legacy_hdf", "periodindex_0.20.1_x86_64_darwin_2.7.13.h5"3890 ),3891 mode="r",3892 ) as store:3893 result = store["p"]3894 tm.assert_frame_equal(result, expected)3895 @pytest.mark.parametrize("where", ["", (), (None,), [], [None]])3896 def test_select_empty_where(self, where):3897 # GH266103898 # Using keyword `where` as '' or (), or [None], etc3899 # while reading from HDF store raises3900 # "SyntaxError: only a single expression is allowed"3901 df = pd.DataFrame([1, 2, 3])3902 with ensure_clean_path("empty_where.h5") as path:3903 with pd.HDFStore(path) as store:3904 store.put("df", df, "t")3905 result = pd.read_hdf(store, "df", where=where)3906 tm.assert_frame_equal(result, df)3907 @pytest.mark.parametrize(3908 "idx",3909 [3910 date_range("2019", freq="D", periods=3, tz="UTC"),3911 CategoricalIndex(list("abc")),3912 ],3913 )3914 def test_to_hdf_multiindex_extension_dtype(self, idx, setup_path):3915 # GH 77753916 mi = MultiIndex.from_arrays([idx, idx])3917 df = pd.DataFrame(0, index=mi, columns=["a"])3918 with ensure_clean_path(setup_path) as path:3919 with pytest.raises(NotImplementedError, match="Saving a MultiIndex"):3920 df.to_hdf(path, "df")3921 def test_unsuppored_hdf_file_error(self, datapath):3922 # GH 95393923 data_path = datapath("io", "data", "legacy_hdf/incompatible_dataset.h5")3924 message = (3925 r"Dataset\(s\) incompatible with Pandas data types, "3926 "not table, or no datasets found in HDF5 file."3927 )3928 with pytest.raises(ValueError, match=message):...
test_round_trip.py
Source:test_round_trip.py
1import datetime2import re3from warnings import (4 catch_warnings,5 simplefilter,6)7import numpy as np8import pytest9from pandas._libs.tslibs import Timestamp10from pandas.compat import is_platform_windows11import pandas as pd12from pandas import (13 DataFrame,14 Index,15 Series,16 _testing as tm,17 bdate_range,18 read_hdf,19)20from pandas.tests.io.pytables.common import (21 _maybe_remove,22 ensure_clean_path,23 ensure_clean_store,24)25from pandas.util import _test_decorators as td26_default_compressor = "blosc"27pytestmark = pytest.mark.single28def test_conv_read_write(setup_path):29 with tm.ensure_clean() as path:30 def roundtrip(key, obj, **kwargs):31 obj.to_hdf(path, key, **kwargs)32 return read_hdf(path, key)33 o = tm.makeTimeSeries()34 tm.assert_series_equal(o, roundtrip("series", o))35 o = tm.makeStringSeries()36 tm.assert_series_equal(o, roundtrip("string_series", o))37 o = tm.makeDataFrame()38 tm.assert_frame_equal(o, roundtrip("frame", o))39 # table40 df = DataFrame({"A": range(5), "B": range(5)})41 df.to_hdf(path, "table", append=True)42 result = read_hdf(path, "table", where=["index>2"])43 tm.assert_frame_equal(df[df.index > 2], result)44def test_long_strings(setup_path):45 # GH616646 df = DataFrame(47 {"a": tm.rands_array(100, size=10)}, index=tm.rands_array(100, size=10)48 )49 with ensure_clean_store(setup_path) as store:50 store.append("df", df, data_columns=["a"])51 result = store.select("df")52 tm.assert_frame_equal(df, result)53def test_api(setup_path):54 # GH458455 # API issue when to_hdf doesn't accept append AND format args56 with ensure_clean_path(setup_path) as path:57 df = tm.makeDataFrame()58 df.iloc[:10].to_hdf(path, "df", append=True, format="table")59 df.iloc[10:].to_hdf(path, "df", append=True, format="table")60 tm.assert_frame_equal(read_hdf(path, "df"), df)61 # append to False62 df.iloc[:10].to_hdf(path, "df", append=False, format="table")63 df.iloc[10:].to_hdf(path, "df", append=True, format="table")64 tm.assert_frame_equal(read_hdf(path, "df"), df)65 with ensure_clean_path(setup_path) as path:66 df = tm.makeDataFrame()67 df.iloc[:10].to_hdf(path, "df", append=True)68 df.iloc[10:].to_hdf(path, "df", append=True, format="table")69 tm.assert_frame_equal(read_hdf(path, "df"), df)70 # append to False71 df.iloc[:10].to_hdf(path, "df", append=False, format="table")72 df.iloc[10:].to_hdf(path, "df", append=True)73 tm.assert_frame_equal(read_hdf(path, "df"), df)74 with ensure_clean_path(setup_path) as path:75 df = tm.makeDataFrame()76 df.to_hdf(path, "df", append=False, format="fixed")77 tm.assert_frame_equal(read_hdf(path, "df"), df)78 df.to_hdf(path, "df", append=False, format="f")79 tm.assert_frame_equal(read_hdf(path, "df"), df)80 df.to_hdf(path, "df", append=False)81 tm.assert_frame_equal(read_hdf(path, "df"), df)82 df.to_hdf(path, "df")83 tm.assert_frame_equal(read_hdf(path, "df"), df)84 with ensure_clean_store(setup_path) as store:85 df = tm.makeDataFrame()86 _maybe_remove(store, "df")87 store.append("df", df.iloc[:10], append=True, format="table")88 store.append("df", df.iloc[10:], append=True, format="table")89 tm.assert_frame_equal(store.select("df"), df)90 # append to False91 _maybe_remove(store, "df")92 store.append("df", df.iloc[:10], append=False, format="table")93 store.append("df", df.iloc[10:], append=True, format="table")94 tm.assert_frame_equal(store.select("df"), df)95 # formats96 _maybe_remove(store, "df")97 store.append("df", df.iloc[:10], append=False, format="table")98 store.append("df", df.iloc[10:], append=True, format="table")99 tm.assert_frame_equal(store.select("df"), df)100 _maybe_remove(store, "df")101 store.append("df", df.iloc[:10], append=False, format="table")102 store.append("df", df.iloc[10:], append=True, format=None)103 tm.assert_frame_equal(store.select("df"), df)104 with ensure_clean_path(setup_path) as path:105 # Invalid.106 df = tm.makeDataFrame()107 msg = "Can only append to Tables"108 with pytest.raises(ValueError, match=msg):109 df.to_hdf(path, "df", append=True, format="f")110 with pytest.raises(ValueError, match=msg):111 df.to_hdf(path, "df", append=True, format="fixed")112 msg = r"invalid HDFStore format specified \[foo\]"113 with pytest.raises(TypeError, match=msg):114 df.to_hdf(path, "df", append=True, format="foo")115 with pytest.raises(TypeError, match=msg):116 df.to_hdf(path, "df", append=False, format="foo")117 # File path doesn't exist118 path = ""119 msg = f"File {path} does not exist"120 with pytest.raises(FileNotFoundError, match=msg):121 read_hdf(path, "df")122def test_get(setup_path):123 with ensure_clean_store(setup_path) as store:124 store["a"] = tm.makeTimeSeries()125 left = store.get("a")126 right = store["a"]127 tm.assert_series_equal(left, right)128 left = store.get("/a")129 right = store["/a"]130 tm.assert_series_equal(left, right)131 with pytest.raises(KeyError, match="'No object named b in the file'"):132 store.get("b")133def test_put_integer(setup_path):134 # non-date, non-string index135 df = DataFrame(np.random.randn(50, 100))136 _check_roundtrip(df, tm.assert_frame_equal, setup_path)137def test_table_values_dtypes_roundtrip(setup_path):138 with ensure_clean_store(setup_path) as store:139 df1 = DataFrame({"a": [1, 2, 3]}, dtype="f8")140 store.append("df_f8", df1)141 tm.assert_series_equal(df1.dtypes, store["df_f8"].dtypes)142 df2 = DataFrame({"a": [1, 2, 3]}, dtype="i8")143 store.append("df_i8", df2)144 tm.assert_series_equal(df2.dtypes, store["df_i8"].dtypes)145 # incompatible dtype146 msg = re.escape(147 "invalid combination of [values_axes] on appending data "148 "[name->values_block_0,cname->values_block_0,"149 "dtype->float64,kind->float,shape->(1, 3)] vs "150 "current table [name->values_block_0,"151 "cname->values_block_0,dtype->int64,kind->integer,"152 "shape->None]"153 )154 with pytest.raises(ValueError, match=msg):155 store.append("df_i8", df1)156 # check creation/storage/retrieval of float32 (a bit hacky to157 # actually create them thought)158 df1 = DataFrame(np.array([[1], [2], [3]], dtype="f4"), columns=["A"])159 store.append("df_f4", df1)160 tm.assert_series_equal(df1.dtypes, store["df_f4"].dtypes)161 assert df1.dtypes[0] == "float32"162 # check with mixed dtypes163 df1 = DataFrame(164 {165 c: Series(np.random.randint(5), dtype=c)166 for c in ["float32", "float64", "int32", "int64", "int16", "int8"]167 }168 )169 df1["string"] = "foo"170 df1["float322"] = 1.0171 df1["float322"] = df1["float322"].astype("float32")172 df1["bool"] = df1["float32"] > 0173 df1["time1"] = Timestamp("20130101")174 df1["time2"] = Timestamp("20130102")175 store.append("df_mixed_dtypes1", df1)176 result = store.select("df_mixed_dtypes1").dtypes.value_counts()177 result.index = [str(i) for i in result.index]178 expected = Series(179 {180 "float32": 2,181 "float64": 1,182 "int32": 1,183 "bool": 1,184 "int16": 1,185 "int8": 1,186 "int64": 1,187 "object": 1,188 "datetime64[ns]": 2,189 }190 )191 result = result.sort_index()192 expected = expected.sort_index()193 tm.assert_series_equal(result, expected)194def test_series(setup_path):195 s = tm.makeStringSeries()196 _check_roundtrip(s, tm.assert_series_equal, path=setup_path)197 ts = tm.makeTimeSeries()198 _check_roundtrip(ts, tm.assert_series_equal, path=setup_path)199 ts2 = Series(ts.index, Index(ts.index, dtype=object))200 _check_roundtrip(ts2, tm.assert_series_equal, path=setup_path)201 ts3 = Series(ts.values, Index(np.asarray(ts.index, dtype=object), dtype=object))202 _check_roundtrip(203 ts3, tm.assert_series_equal, path=setup_path, check_index_type=False204 )205def test_float_index(setup_path):206 # GH #454207 index = np.random.randn(10)208 s = Series(np.random.randn(10), index=index)209 _check_roundtrip(s, tm.assert_series_equal, path=setup_path)210def test_tuple_index(setup_path):211 # GH #492212 col = np.arange(10)213 idx = [(0.0, 1.0), (2.0, 3.0), (4.0, 5.0)]214 data = np.random.randn(30).reshape((3, 10))215 DF = DataFrame(data, index=idx, columns=col)216 with catch_warnings(record=True):217 simplefilter("ignore", pd.errors.PerformanceWarning)218 _check_roundtrip(DF, tm.assert_frame_equal, path=setup_path)219@pytest.mark.filterwarnings("ignore::pandas.errors.PerformanceWarning")220def test_index_types(setup_path):221 with catch_warnings(record=True):222 values = np.random.randn(2)223 func = lambda l, r: tm.assert_series_equal(l, r, check_index_type=True)224 with catch_warnings(record=True):225 ser = Series(values, [0, "y"])226 _check_roundtrip(ser, func, path=setup_path)227 with catch_warnings(record=True):228 ser = Series(values, [datetime.datetime.today(), 0])229 _check_roundtrip(ser, func, path=setup_path)230 with catch_warnings(record=True):231 ser = Series(values, ["y", 0])232 _check_roundtrip(ser, func, path=setup_path)233 with catch_warnings(record=True):234 ser = Series(values, [datetime.date.today(), "a"])235 _check_roundtrip(ser, func, path=setup_path)236 with catch_warnings(record=True):237 ser = Series(values, [0, "y"])238 _check_roundtrip(ser, func, path=setup_path)239 ser = Series(values, [datetime.datetime.today(), 0])240 _check_roundtrip(ser, func, path=setup_path)241 ser = Series(values, ["y", 0])242 _check_roundtrip(ser, func, path=setup_path)243 ser = Series(values, [datetime.date.today(), "a"])244 _check_roundtrip(ser, func, path=setup_path)245 ser = Series(values, [1.23, "b"])246 _check_roundtrip(ser, func, path=setup_path)247 ser = Series(values, [1, 1.53])248 _check_roundtrip(ser, func, path=setup_path)249 ser = Series(values, [1, 5])250 _check_roundtrip(ser, func, path=setup_path)251 ser = Series(252 values, [datetime.datetime(2012, 1, 1), datetime.datetime(2012, 1, 2)]253 )254 _check_roundtrip(ser, func, path=setup_path)255def test_timeseries_preepoch(setup_path):256 dr = bdate_range("1/1/1940", "1/1/1960")257 ts = Series(np.random.randn(len(dr)), index=dr)258 try:259 _check_roundtrip(ts, tm.assert_series_equal, path=setup_path)260 except OverflowError:261 if is_platform_windows():262 pytest.xfail("known failure on some windows platforms")263 else:264 raise265@pytest.mark.parametrize(266 "compression", [False, pytest.param(True, marks=td.skip_if_windows)]267)268def test_frame(compression, setup_path):269 df = tm.makeDataFrame()270 # put in some random NAs271 df.values[0, 0] = np.nan272 df.values[5, 3] = np.nan273 _check_roundtrip_table(274 df, tm.assert_frame_equal, path=setup_path, compression=compression275 )276 _check_roundtrip(277 df, tm.assert_frame_equal, path=setup_path, compression=compression278 )279 tdf = tm.makeTimeDataFrame()280 _check_roundtrip(281 tdf, tm.assert_frame_equal, path=setup_path, compression=compression282 )283 with ensure_clean_store(setup_path) as store:284 # not consolidated285 df["foo"] = np.random.randn(len(df))286 store["df"] = df287 recons = store["df"]288 assert recons._mgr.is_consolidated()289 # empty290 _check_roundtrip(df[:0], tm.assert_frame_equal, path=setup_path)291def test_empty_series_frame(setup_path):292 s0 = Series(dtype=object)293 s1 = Series(name="myseries", dtype=object)294 df0 = DataFrame()295 df1 = DataFrame(index=["a", "b", "c"])296 df2 = DataFrame(columns=["d", "e", "f"])297 _check_roundtrip(s0, tm.assert_series_equal, path=setup_path)298 _check_roundtrip(s1, tm.assert_series_equal, path=setup_path)299 _check_roundtrip(df0, tm.assert_frame_equal, path=setup_path)300 _check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)301 _check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)302@pytest.mark.parametrize("dtype", [np.int64, np.float64, object, "m8[ns]", "M8[ns]"])303def test_empty_series(dtype, setup_path):304 s = Series(dtype=dtype)305 _check_roundtrip(s, tm.assert_series_equal, path=setup_path)306def test_can_serialize_dates(setup_path):307 rng = [x.date() for x in bdate_range("1/1/2000", "1/30/2000")]308 frame = DataFrame(np.random.randn(len(rng), 4), index=rng)309 _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)310def test_store_hierarchical(setup_path, multiindex_dataframe_random_data):311 frame = multiindex_dataframe_random_data312 _check_roundtrip(frame, tm.assert_frame_equal, path=setup_path)313 _check_roundtrip(frame.T, tm.assert_frame_equal, path=setup_path)314 _check_roundtrip(frame["A"], tm.assert_series_equal, path=setup_path)315 # check that the names are stored316 with ensure_clean_store(setup_path) as store:317 store["frame"] = frame318 recons = store["frame"]319 tm.assert_frame_equal(recons, frame)320@pytest.mark.parametrize(321 "compression", [False, pytest.param(True, marks=td.skip_if_windows)]322)323def test_store_mixed(compression, setup_path):324 def _make_one():325 df = tm.makeDataFrame()326 df["obj1"] = "foo"327 df["obj2"] = "bar"328 df["bool1"] = df["A"] > 0329 df["bool2"] = df["B"] > 0330 df["int1"] = 1331 df["int2"] = 2332 return df._consolidate()333 df1 = _make_one()334 df2 = _make_one()335 _check_roundtrip(df1, tm.assert_frame_equal, path=setup_path)336 _check_roundtrip(df2, tm.assert_frame_equal, path=setup_path)337 with ensure_clean_store(setup_path) as store:338 store["obj"] = df1339 tm.assert_frame_equal(store["obj"], df1)340 store["obj"] = df2341 tm.assert_frame_equal(store["obj"], df2)342 # check that can store Series of all of these types343 _check_roundtrip(344 df1["obj1"],345 tm.assert_series_equal,346 path=setup_path,347 compression=compression,348 )349 _check_roundtrip(350 df1["bool1"],351 tm.assert_series_equal,352 path=setup_path,353 compression=compression,354 )355 _check_roundtrip(356 df1["int1"],357 tm.assert_series_equal,358 path=setup_path,359 compression=compression,360 )361def _check_roundtrip(obj, comparator, path, compression=False, **kwargs):362 options = {}363 if compression:364 options["complib"] = _default_compressor365 with ensure_clean_store(path, "w", **options) as store:366 store["obj"] = obj367 retrieved = store["obj"]368 comparator(retrieved, obj, **kwargs)369def _check_double_roundtrip(self, obj, comparator, path, compression=False, **kwargs):370 options = {}371 if compression:372 options["complib"] = compression or _default_compressor373 with ensure_clean_store(path, "w", **options) as store:374 store["obj"] = obj375 retrieved = store["obj"]376 comparator(retrieved, obj, **kwargs)377 store["obj"] = retrieved378 again = store["obj"]379 comparator(again, obj, **kwargs)380def _check_roundtrip_table(obj, comparator, path, compression=False):381 options = {}382 if compression:383 options["complib"] = _default_compressor384 with ensure_clean_store(path, "w", **options) as store:385 store.put("obj", obj, format="table")386 retrieved = store["obj"]387 comparator(retrieved, obj)388def test_unicode_index(setup_path):389 unicode_values = ["\u03c3", "\u03c3\u03c3"]390 # PerformanceWarning391 with catch_warnings(record=True):392 simplefilter("ignore", pd.errors.PerformanceWarning)393 s = Series(np.random.randn(len(unicode_values)), unicode_values)394 _check_roundtrip(s, tm.assert_series_equal, path=setup_path)395def test_unicode_longer_encoded(setup_path):396 # GH 11234397 char = "\u0394"398 df = DataFrame({"A": [char]})399 with ensure_clean_store(setup_path) as store:400 store.put("df", df, format="table", encoding="utf-8")401 result = store.get("df")402 tm.assert_frame_equal(result, df)403 df = DataFrame({"A": ["a", char], "B": ["b", "b"]})404 with ensure_clean_store(setup_path) as store:405 store.put("df", df, format="table", encoding="utf-8")406 result = store.get("df")407 tm.assert_frame_equal(result, df)408def test_store_datetime_mixed(setup_path):409 df = DataFrame({"a": [1, 2, 3], "b": [1.0, 2.0, 3.0], "c": ["a", "b", "c"]})410 ts = tm.makeTimeSeries()411 df["d"] = ts.index[:3]412 _check_roundtrip(df, tm.assert_frame_equal, path=setup_path)413def test_round_trip_equals(setup_path):414 # GH 9330415 df = DataFrame({"B": [1, 2], "A": ["x", "y"]})416 with ensure_clean_path(setup_path) as path:417 df.to_hdf(path, "df", format="table")418 other = read_hdf(path, "df")419 tm.assert_frame_equal(df, other)420 assert df.equals(other)...
test_timezones.py
Source:test_timezones.py
1import datetime23import numpy as np4import pytest56import pandas.util._test_decorators as td78import pandas as pd9from pandas import DataFrame, DatetimeIndex, Series, Timestamp, date_range10import pandas._testing as tm11from pandas.tests.io.pytables.common import (12 _maybe_remove,13 ensure_clean_path,14 ensure_clean_store,15)161718def _compare_with_tz(a, b):19 tm.assert_frame_equal(a, b)2021 # compare the zones on each element22 for c in a.columns:23 for i in a.index:24 a_e = a.loc[i, c]25 b_e = b.loc[i, c]26 if not (a_e == b_e and a_e.tz == b_e.tz):27 raise AssertionError(f"invalid tz comparison [{a_e}] [{b_e}]")282930def test_append_with_timezones_dateutil(setup_path):3132 from datetime import timedelta3334 # use maybe_get_tz instead of dateutil.tz.gettz to handle the windows35 # filename issues.36 from pandas._libs.tslibs.timezones import maybe_get_tz3738 gettz = lambda x: maybe_get_tz("dateutil/" + x)3940 # as columns41 with ensure_clean_store(setup_path) as store:4243 _maybe_remove(store, "df_tz")44 df = DataFrame(45 {46 "A": [47 Timestamp("20130102 2:00:00", tz=gettz("US/Eastern"))48 + timedelta(hours=1) * i49 for i in range(5)50 ]51 }52 )5354 store.append("df_tz", df, data_columns=["A"])55 result = store["df_tz"]56 _compare_with_tz(result, df)57 tm.assert_frame_equal(result, df)5859 # select with tz aware60 expected = df[df.A >= df.A[3]]61 result = store.select("df_tz", where="A>=df.A[3]")62 _compare_with_tz(result, expected)6364 # ensure we include dates in DST and STD time here.65 _maybe_remove(store, "df_tz")66 df = DataFrame(67 {68 "A": Timestamp("20130102", tz=gettz("US/Eastern")),69 "B": Timestamp("20130603", tz=gettz("US/Eastern")),70 },71 index=range(5),72 )73 store.append("df_tz", df)74 result = store["df_tz"]75 _compare_with_tz(result, df)76 tm.assert_frame_equal(result, df)7778 df = DataFrame(79 {80 "A": Timestamp("20130102", tz=gettz("US/Eastern")),81 "B": Timestamp("20130102", tz=gettz("EET")),82 },83 index=range(5),84 )8586 msg = (87 r"invalid info for \[values_block_1\] for \[tz\], "88 r"existing_value \[dateutil/.*US/Eastern\] "89 r"conflicts with new value \[dateutil/.*EET\]"90 )91 with pytest.raises(ValueError, match=msg):92 store.append("df_tz", df)9394 # this is ok95 _maybe_remove(store, "df_tz")96 store.append("df_tz", df, data_columns=["A", "B"])97 result = store["df_tz"]98 _compare_with_tz(result, df)99 tm.assert_frame_equal(result, df)100101 # can't append with diff timezone102 df = DataFrame(103 {104 "A": Timestamp("20130102", tz=gettz("US/Eastern")),105 "B": Timestamp("20130102", tz=gettz("CET")),106 },107 index=range(5),108 )109110 msg = (111 r"invalid info for \[B\] for \[tz\], "112 r"existing_value \[dateutil/.*EET\] "113 r"conflicts with new value \[dateutil/.*CET\]"114 )115 with pytest.raises(ValueError, match=msg):116 store.append("df_tz", df)117118 # as index119 with ensure_clean_store(setup_path) as store:120121 dti = date_range("2000-1-1", periods=3, freq="H", tz=gettz("US/Eastern"))122 dti = dti._with_freq(None) # freq doesnt round-trip123124 # GH 4098 example125 df = DataFrame({"A": Series(range(3), index=dti)})126127 _maybe_remove(store, "df")128 store.put("df", df)129 result = store.select("df")130 tm.assert_frame_equal(result, df)131132 _maybe_remove(store, "df")133 store.append("df", df)134 result = store.select("df")135 tm.assert_frame_equal(result, df)136137138def test_append_with_timezones_pytz(setup_path):139140 from datetime import timedelta141142 # as columns143 with ensure_clean_store(setup_path) as store:144145 _maybe_remove(store, "df_tz")146 df = DataFrame(147 {148 "A": [149 Timestamp("20130102 2:00:00", tz="US/Eastern")150 + timedelta(hours=1) * i151 for i in range(5)152 ]153 }154 )155 store.append("df_tz", df, data_columns=["A"])156 result = store["df_tz"]157 _compare_with_tz(result, df)158 tm.assert_frame_equal(result, df)159160 # select with tz aware161 _compare_with_tz(store.select("df_tz", where="A>=df.A[3]"), df[df.A >= df.A[3]])162163 _maybe_remove(store, "df_tz")164 # ensure we include dates in DST and STD time here.165 df = DataFrame(166 {167 "A": Timestamp("20130102", tz="US/Eastern"),168 "B": Timestamp("20130603", tz="US/Eastern"),169 },170 index=range(5),171 )172 store.append("df_tz", df)173 result = store["df_tz"]174 _compare_with_tz(result, df)175 tm.assert_frame_equal(result, df)176177 df = DataFrame(178 {179 "A": Timestamp("20130102", tz="US/Eastern"),180 "B": Timestamp("20130102", tz="EET"),181 },182 index=range(5),183 )184185 msg = (186 r"invalid info for \[values_block_1\] for \[tz\], "187 r"existing_value \[US/Eastern\] conflicts with new value \[EET\]"188 )189 with pytest.raises(ValueError, match=msg):190 store.append("df_tz", df)191192 # this is ok193 _maybe_remove(store, "df_tz")194 store.append("df_tz", df, data_columns=["A", "B"])195 result = store["df_tz"]196 _compare_with_tz(result, df)197 tm.assert_frame_equal(result, df)198199 # can't append with diff timezone200 df = DataFrame(201 {202 "A": Timestamp("20130102", tz="US/Eastern"),203 "B": Timestamp("20130102", tz="CET"),204 },205 index=range(5),206 )207208 msg = (209 r"invalid info for \[B\] for \[tz\], "210 r"existing_value \[EET\] conflicts with new value \[CET\]"211 )212 with pytest.raises(ValueError, match=msg):213 store.append("df_tz", df)214215 # as index216 with ensure_clean_store(setup_path) as store:217218 dti = date_range("2000-1-1", periods=3, freq="H", tz="US/Eastern")219 dti = dti._with_freq(None) # freq doesnt round-trip220221 # GH 4098 example222 df = DataFrame({"A": Series(range(3), index=dti)})223224 _maybe_remove(store, "df")225 store.put("df", df)226 result = store.select("df")227 tm.assert_frame_equal(result, df)228229 _maybe_remove(store, "df")230 store.append("df", df)231 result = store.select("df")232 tm.assert_frame_equal(result, df)233234235def test_roundtrip_tz_aware_index(setup_path):236 # GH 17618237 time = Timestamp("2000-01-01 01:00:00", tz="US/Eastern")238 df = DataFrame(data=[0], index=[time])239240 with ensure_clean_store(setup_path) as store:241 store.put("frame", df, format="fixed")242 recons = store["frame"]243 tm.assert_frame_equal(recons, df)244 assert recons.index[0].value == 946706400000000000245246247def test_store_index_name_with_tz(setup_path):248 # GH 13884249 df = DataFrame({"A": [1, 2]})250 df.index = DatetimeIndex([1234567890123456787, 1234567890123456788])251 df.index = df.index.tz_localize("UTC")252 df.index.name = "foo"253254 with ensure_clean_store(setup_path) as store:255 store.put("frame", df, format="table")256 recons = store["frame"]257 tm.assert_frame_equal(recons, df)258259260def test_tseries_select_index_column(setup_path):261 # GH7777262 # selecting a UTC datetimeindex column did263 # not preserve UTC tzinfo set before storing264265 # check that no tz still works266 rng = date_range("1/1/2000", "1/30/2000")267 frame = DataFrame(np.random.randn(len(rng), 4), index=rng)268269 with ensure_clean_store(setup_path) as store:270 store.append("frame", frame)271 result = store.select_column("frame", "index")272 assert rng.tz == DatetimeIndex(result.values).tz273274 # check utc275 rng = date_range("1/1/2000", "1/30/2000", tz="UTC")276 frame = DataFrame(np.random.randn(len(rng), 4), index=rng)277278 with ensure_clean_store(setup_path) as store:279 store.append("frame", frame)280 result = store.select_column("frame", "index")281 assert rng.tz == result.dt.tz282283 # double check non-utc284 rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")285 frame = DataFrame(np.random.randn(len(rng), 4), index=rng)286287 with ensure_clean_store(setup_path) as store:288 store.append("frame", frame)289 result = store.select_column("frame", "index")290 assert rng.tz == result.dt.tz291292293def test_timezones_fixed_format_frame_non_empty(setup_path):294 with ensure_clean_store(setup_path) as store:295296 # index297 rng = date_range("1/1/2000", "1/30/2000", tz="US/Eastern")298 rng = rng._with_freq(None) # freq doesnt round-trip299 df = DataFrame(np.random.randn(len(rng), 4), index=rng)300 store["df"] = df301 result = store["df"]302 tm.assert_frame_equal(result, df)303304 # as data305 # GH11411306 _maybe_remove(store, "df")307 df = DataFrame(308 {309 "A": rng,310 "B": rng.tz_convert("UTC").tz_localize(None),311 "C": rng.tz_convert("CET"),312 "D": range(len(rng)),313 },314 index=rng,315 )316 store["df"] = df317 result = store["df"]318 tm.assert_frame_equal(result, df)319320321def test_timezones_fixed_format_frame_empty(setup_path, tz_aware_fixture):322 # GH 20594323324 dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)325326 with ensure_clean_store(setup_path) as store:327 s = Series(dtype=dtype)328 df = DataFrame({"A": s})329 store["df"] = df330 result = store["df"]331 tm.assert_frame_equal(result, df)332333334def test_timezones_fixed_format_series_nonempty(setup_path, tz_aware_fixture):335 # GH 20594336337 dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)338339 with ensure_clean_store(setup_path) as store:340 s = Series([0], dtype=dtype)341 store["s"] = s342 result = store["s"]343 tm.assert_series_equal(result, s)344345346def test_timezones_fixed_format_series_empty(setup_path, tz_aware_fixture):347 # GH 20594348349 dtype = pd.DatetimeTZDtype(tz=tz_aware_fixture)350351 with ensure_clean_store(setup_path) as store:352 s = Series(dtype=dtype)353 store["s"] = s354 result = store["s"]355 tm.assert_series_equal(result, s)356357358def test_fixed_offset_tz(setup_path):359 rng = date_range("1/1/2000 00:00:00-07:00", "1/30/2000 00:00:00-07:00")360 frame = DataFrame(np.random.randn(len(rng), 4), index=rng)361362 with ensure_clean_store(setup_path) as store:363 store["frame"] = frame364 recons = store["frame"]365 tm.assert_index_equal(recons.index, rng)366 assert rng.tz == recons.index.tz367368369@td.skip_if_windows370def test_store_timezone(setup_path):371 # GH2852372 # issue storing datetime.date with a timezone as it resets when read373 # back in a new timezone374375 # original method376 with ensure_clean_store(setup_path) as store:377378 today = datetime.date(2013, 9, 10)379 df = DataFrame([1, 2, 3], index=[today, today, today])380 store["obj1"] = df381 result = store["obj1"]382 tm.assert_frame_equal(result, df)383384 # with tz setting385 with ensure_clean_store(setup_path) as store:386387 with tm.set_timezone("EST5EDT"):388 today = datetime.date(2013, 9, 10)389 df = DataFrame([1, 2, 3], index=[today, today, today])390 store["obj1"] = df391392 with tm.set_timezone("CST6CDT"):393 result = store["obj1"]394395 tm.assert_frame_equal(result, df)396397398def test_legacy_datetimetz_object(datapath, setup_path):399 # legacy from < 0.17.0400 # 8260401 expected = DataFrame(402 {403 "A": Timestamp("20130102", tz="US/Eastern"),404 "B": Timestamp("20130603", tz="CET"),405 },406 index=range(5),407 )408 with ensure_clean_store(409 datapath("io", "data", "legacy_hdf", "datetimetz_object.h5"), mode="r"410 ) as store:411 result = store["df"]412 tm.assert_frame_equal(result, expected)413414415def test_dst_transitions(setup_path):416 # make sure we are not failing on transitions417 with ensure_clean_store(setup_path) as store:418 times = pd.date_range(419 "2013-10-26 23:00",420 "2013-10-27 01:00",421 tz="Europe/London",422 freq="H",423 ambiguous="infer",424 )425 times = times._with_freq(None) # freq doesnt round-trip426427 for i in [times, times + pd.Timedelta("10min")]:428 _maybe_remove(store, "df")429 df = DataFrame({"A": range(len(i)), "B": i}, index=i)430 store.append("df", df)431 result = store.select("df")432 tm.assert_frame_equal(result, df)433434435def test_read_with_where_tz_aware_index(setup_path):436 # GH 11926437 periods = 10438 dts = pd.date_range("20151201", periods=periods, freq="D", tz="UTC")439 mi = pd.MultiIndex.from_arrays([dts, range(periods)], names=["DATE", "NO"])440 expected = DataFrame({"MYCOL": 0}, index=mi)441442 key = "mykey"443 with ensure_clean_path(setup_path) as path:444 with pd.HDFStore(path) as store:445 store.append(key, expected, format="table", append=True)446 result = pd.read_hdf(path, key, where="DATE > 20151130")447 tm.assert_frame_equal(result, expected)448449450def test_py2_created_with_datetimez(datapath, setup_path):451 # The test HDF5 file was created in Python 2, but could not be read in452 # Python 3.453 #454 # GH26443455 index = [Timestamp("2019-01-01T18:00").tz_localize("America/New_York")]456 expected = DataFrame({"data": 123}, index=index)457 with ensure_clean_store(458 datapath("io", "data", "legacy_hdf", "gh26443.h5"), mode="r"459 ) as store:460 result = store["key"]
...
test_complex.py
Source:test_complex.py
1from warnings import catch_warnings2import numpy as np3import pytest4import pandas.util._test_decorators as td5import pandas as pd6from pandas import DataFrame, Series7import pandas._testing as tm8from pandas.tests.io.pytables.common import ensure_clean_path, ensure_clean_store9from pandas.io.pytables import read_hdf10# GH1044711def test_complex_fixed(setup_path):12 df = DataFrame(13 np.random.rand(4, 5).astype(np.complex64),14 index=list("abcd"),15 columns=list("ABCDE"),16 )17 with ensure_clean_path(setup_path) as path:18 df.to_hdf(path, "df")19 reread = read_hdf(path, "df")20 tm.assert_frame_equal(df, reread)21 df = DataFrame(22 np.random.rand(4, 5).astype(np.complex128),23 index=list("abcd"),24 columns=list("ABCDE"),25 )26 with ensure_clean_path(setup_path) as path:27 df.to_hdf(path, "df")28 reread = read_hdf(path, "df")29 tm.assert_frame_equal(df, reread)30def test_complex_table(setup_path):31 df = DataFrame(32 np.random.rand(4, 5).astype(np.complex64),33 index=list("abcd"),34 columns=list("ABCDE"),35 )36 with ensure_clean_path(setup_path) as path:37 df.to_hdf(path, "df", format="table")38 reread = read_hdf(path, "df")39 tm.assert_frame_equal(df, reread)40 df = DataFrame(41 np.random.rand(4, 5).astype(np.complex128),42 index=list("abcd"),43 columns=list("ABCDE"),44 )45 with ensure_clean_path(setup_path) as path:46 df.to_hdf(path, "df", format="table", mode="w")47 reread = read_hdf(path, "df")48 tm.assert_frame_equal(df, reread)49@td.xfail_non_writeable50def test_complex_mixed_fixed(setup_path):51 complex64 = np.array(52 [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex6453 )54 complex128 = np.array(55 [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex12856 )57 df = DataFrame(58 {59 "A": [1, 2, 3, 4],60 "B": ["a", "b", "c", "d"],61 "C": complex64,62 "D": complex128,63 "E": [1.0, 2.0, 3.0, 4.0],64 },65 index=list("abcd"),66 )67 with ensure_clean_path(setup_path) as path:68 df.to_hdf(path, "df")69 reread = read_hdf(path, "df")70 tm.assert_frame_equal(df, reread)71def test_complex_mixed_table(setup_path):72 complex64 = np.array(73 [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex6474 )75 complex128 = np.array(76 [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex12877 )78 df = DataFrame(79 {80 "A": [1, 2, 3, 4],81 "B": ["a", "b", "c", "d"],82 "C": complex64,83 "D": complex128,84 "E": [1.0, 2.0, 3.0, 4.0],85 },86 index=list("abcd"),87 )88 with ensure_clean_store(setup_path) as store:89 store.append("df", df, data_columns=["A", "B"])90 result = store.select("df", where="A>2")91 tm.assert_frame_equal(df.loc[df.A > 2], result)92 with ensure_clean_path(setup_path) as path:93 df.to_hdf(path, "df", format="table")94 reread = read_hdf(path, "df")95 tm.assert_frame_equal(df, reread)96def test_complex_across_dimensions_fixed(setup_path):97 with catch_warnings(record=True):98 complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])99 s = Series(complex128, index=list("abcd"))100 df = DataFrame({"A": s, "B": s})101 objs = [s, df]102 comps = [tm.assert_series_equal, tm.assert_frame_equal]103 for obj, comp in zip(objs, comps):104 with ensure_clean_path(setup_path) as path:105 obj.to_hdf(path, "obj", format="fixed")106 reread = read_hdf(path, "obj")107 comp(obj, reread)108def test_complex_across_dimensions(setup_path):109 complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])110 s = Series(complex128, index=list("abcd"))111 df = DataFrame({"A": s, "B": s})112 with catch_warnings(record=True):113 objs = [df]114 comps = [tm.assert_frame_equal]115 for obj, comp in zip(objs, comps):116 with ensure_clean_path(setup_path) as path:117 obj.to_hdf(path, "obj", format="table")118 reread = read_hdf(path, "obj")119 comp(obj, reread)120def test_complex_indexing_error(setup_path):121 complex128 = np.array(122 [1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j], dtype=np.complex128123 )124 df = DataFrame(125 {"A": [1, 2, 3, 4], "B": ["a", "b", "c", "d"], "C": complex128},126 index=list("abcd"),127 )128 with ensure_clean_store(setup_path) as store:129 with pytest.raises(TypeError):130 store.append("df", df, data_columns=["C"])131def test_complex_series_error(setup_path):132 complex128 = np.array([1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j, 1.0 + 1.0j])133 s = Series(complex128, index=list("abcd"))134 with ensure_clean_path(setup_path) as path:135 with pytest.raises(TypeError):136 s.to_hdf(path, "obj", format="t")137 with ensure_clean_path(setup_path) as path:138 s.to_hdf(path, "obj", format="t", index=False)139 reread = read_hdf(path, "obj")140 tm.assert_series_equal(s, reread)141def test_complex_append(setup_path):142 df = DataFrame(143 {"a": np.random.randn(100).astype(np.complex128), "b": np.random.randn(100)}144 )145 with ensure_clean_store(setup_path) as store:146 store.append("df", df, data_columns=["b"])147 store.append("df", df)148 result = store.select("df")...
Learn to execute automation testing from scratch with LambdaTest Learning Hub. Right from setting up the prerequisites to run your first automation test, to following best practices and diving deeper into advanced test scenarios. LambdaTest Learning Hubs compile a list of step-by-step guides to help you be proficient with different test automation frameworks i.e. Selenium, Cypress, TestNG etc.
You could also refer to video tutorials over LambdaTest YouTube channel to get step by step demonstration from industry experts.
Get 100 minutes of automation test minutes FREE!!