问题
I am hoping for some help in opening a corrupted HDF5 file. I am accessing PyTables
via Pandas
, but a pd.read_hdf()
call produces the following error. I don't know anything about the inner workings of PyTables
.
I believe the error was created because the process saving to the file (appending every 10 seconds or so) got duplicated, so there were then 2 identical processes appending. I am not sure why this would corrupt the file rather than duplicate data, but the two errors occurred together which is why I think they are causally related.
---------------
HDF5ExtError Traceback (most recent call last)
<ipython-input-37-99558b43d768> in <module>()
----> 1 ES2 = h.read('./ES_201509-1')
/Users/AFK/Desktop/fastback/historical_store.pyc in read(self, path, key, **kwargs)
53 frame. Extra keyword args are all passed down to pandas.read_hdf().
54 """
---> 55 df = pd.read_hdf(path, key, **kwargs)
56 df.index = pd.to_datetime(df.Time)
57 del df['Time']
//anaconda/lib/python2.7/site-packages/pandas/io/pytables.pyc in read_hdf(path_or_buf, key, **kwargs)
326 # if there is an error, close the store
327 try:
--> 328 store.close()
329 except:
330 pass
//anaconda/lib/python2.7/site-packages/pandas/io/pytables.pyc in close(self)
566 """
567 if self._handle is not None:
--> 568 self._handle.close()
569 self._handle = None
570
//anaconda/lib/python2.7/site-packages/tables/file.pyc in close(self)
2726
2727 # Close all loaded nodes.
-> 2728 self.root._f_close()
2729
2730 self._node_manager.shutdown()
//anaconda/lib/python2.7/site-packages/tables/group.pyc in _f_close(self)
907 # this is not an explicit close issued by the user.
908 if not (self._v__deleting or self._v_objectid is None):
--> 909 self._g_close_descendents()
910
911 # When all the descendents have been closed, close this group.
//anaconda/lib/python2.7/site-packages/tables/group.pyc in _g_close_descendents(self)
870
871 node_manager = self._v_file._node_manager
--> 872 node_manager.close_subtree(self._v_pathname)
873
874 _g_closeDescendents = previous_api(_g_close_descendents)
//anaconda/lib/python2.7/site-packages/tables/file.pyc in close_subtree(self, prefix)
540 if path.startswith(prefix) and '/_i_' not in path
541 ]
--> 542 self._close_nodes(paths, cache.pop)
543
544 # Close everything else (i.e. indices)
//anaconda/lib/python2.7/site-packages/tables/file.pyc in _close_nodes(nodepaths, get_node)
515 node._g_close()
516 else:
--> 517 node._f_close()
518 del node
519 except ClosedNodeError:
//anaconda/lib/python2.7/site-packages/tables/table.pyc in _f_close(self, flush)
3034 # Flush right now so the row object does not get in the middle.
3035 if flush:
-> 3036 self.flush()
3037
3038 # Some warnings can be issued after calling `self._g_set_location()`
//anaconda/lib/python2.7/site-packages/tables/table.pyc in flush(self)
2969 if self.indexed and self.autoindex:
2970 # Flush any unindexed row
-> 2971 rowsadded = self.flush_rows_to_index(_lastrow=True)
2972 assert rowsadded <= 0 or self._indexedrows == self.nrows, \
2973 ("internal error: the number of indexed rows (%d) "
//anaconda/lib/python2.7/site-packages/tables/table.pyc in flush_rows_to_index(self, _lastrow)
2578 if nrows > 0 and not col.index.dirty:
2579 rowsadded = self._add_rows_to_index(
-> 2580 colname, start, nrows, _lastrow, update=True)
2581 self._unsaved_indexedrows -= rowsadded
2582 self._indexedrows += rowsadded
//anaconda/lib/python2.7/site-packages/tables/table.pyc in _add_rows_to_index(self, colname, start, nrows, lastrow, update)
2609 if lastrow and startLR < self.nrows:
2610 index.append_last_row(
-> 2611 [self._read(startLR, self.nrows, 1, colname)],
2612 update=update)
2613 indexedrows += self.nrows - startLR
//anaconda/lib/python2.7/site-packages/tables/table.pyc in _read(self, start, stop, step, field, out)
1895 self._read_field_name(result, start, stop, step, field)
1896 else:
-> 1897 self.row._fill_col(result, start, stop, step, field)
1898
1899 if select_field:
//anaconda/lib/python2.7/site-packages/tables/tableextension.so in tables.tableextension.Row._fill_col (tables/tableextension.c:12653)()
//anaconda/lib/python2.7/site-packages/tables/tableextension.so in tables.tableextension.Table._read_records (tables/tableextension.c:6721)()
HDF5ExtError: HDF5 error back trace
File "H5Dio.c", line 174, in H5Dread
can't read data
File "H5Dio.c", line 449, in H5D_read
can't read data
File "H5Dchunk.c", line 1729, in H5D_chunk_read
unable to read raw data chunk
File "H5Dchunk.c", line 2760, in H5D_chunk_lock
data pipeline read failed
File "H5Z.c", line 1120, in H5Z_pipeline
filter returned failure during read
File "H5Zdeflate.c", line 125, in H5Z_filter_deflate
inflate() failed
End of HDF5 error back trace
Problems reading records.
回答1:
your file is borked. no way to recover from this. this is specifically warned against (using multiple threads/processes as writers). see docs here.
HDF5 is NOT threadsafe/process safe for writers.
来源:https://stackoverflow.com/questions/31523985/opening-a-corrupted-pytables-hdf5-file