问题
I have a problem reading hdf file in pandas. As of now, I don't know the keys of the file.
How do I read the file [data.hdf] in such a case? And, my file is .hdf not .h5 , Does it make a difference it terms data fetching?
I see that you need a 'group identifier in the store'
pandas.io.pytables.read_hdf(path_or_buf, key, **kwargs)
I was able to get the metadata from pytables
File(filename=data.hdf, title='', mode='a', root_uep='/', filters=Filters(complevel=0, shuffle=False, fletcher32=False, least_significant_digit=None))
/ (RootGroup) ''
/UID (EArray(317,)) ''
atom := StringAtom(itemsize=36, shape=(), dflt='')
maindim := 0
flavor := 'numpy'
byteorder := 'irrelevant'
chunkshape := (100,)
/X Y (EArray(8319, 2, 317)) ''
atom := Float32Atom(shape=(), dflt=0.0)
maindim := 0
flavor := 'numpy'
byteorder := 'little'
chunkshape := (1000, 2, 100)
How do I make it readable via pandas?
回答1:
First (.hdf or .h5) doesn't make any difference. Second, I'm not sure about the pandas, but I read the HDF5 key like:
import h5py
h5f = h5py.File("test.h5", "r")
h5f.keys()
or
h5f.values()
回答2:
Docs are here. However you will jot be able to directly read the format you show with pandas. You need to use PyTables to read it in. pandas can read in PyTables Table format directly even without the meta data that pandas uses.
回答3:
pyhdf
will be alternative option for hdf file in python
you can read and see keys from:
import pyhdf
hdf = pyhdf.SD.SD('file.hdf')
hdf.datasets()
I hope it will help you! gud luck
回答4:
You can use this simple function to see the variable names of any the HDF file (only works for the variables in the scientific mode)
from pyhdf.SD import *
def HDFvars(File):
"""
Extract variable names for an hdf file
"""
# hdfFile = SD.SD(File, mode=1)
hdfFile = SD(File, mode=1)
dsets = hdfFile.datasets()
k = []
for key in dsets.keys():
k.append(key)
k.sort()
hdfFile.end() # close the file
return k
If the variables aren't in the scientific mode, you can try whit pyhdf.V using the following program that shows the contents of the vgroups contained inside any HDF file.
from pyhdf.HDF import *
from pyhdf.V import *
from pyhdf.VS import *
from pyhdf.SD import *
def describevg(refnum):
# Describe the vgroup with the given refnum.
# Open vgroup in read mode.
vg = v.attach(refnum)
print "----------------"
print "name:", vg._name, "class:",vg._class, "tag,ref:",
print vg._tag, vg._refnum
# Show the number of members of each main object type.
print "members: ", vg._nmembers,
print "datasets:", vg.nrefs(HC.DFTAG_NDG),
print "vdatas: ", vg.nrefs(HC.DFTAG_VH),
print "vgroups: ", vg.nrefs(HC.DFTAG_VG)
# Read the contents of the vgroup.
members = vg.tagrefs()
# Display info about each member.
index = -1
for tag, ref in members:
index += 1
print "member index", index
# Vdata tag
if tag == HC.DFTAG_VH:
vd = vs.attach(ref)
nrecs, intmode, fields, size, name = vd.inquire()
print " vdata:",name, "tag,ref:",tag, ref
print " fields:",fields
print " nrecs:",nrecs
vd.detach()
# SDS tag
elif tag == HC.DFTAG_NDG:
sds = sd.select(sd.reftoindex(ref))
name, rank, dims, type, nattrs = sds.info()
print " dataset:",name, "tag,ref:", tag, ref
print " dims:",dims
print " type:",type
sds.endaccess()
# VS tag
elif tag == HC.DFTAG_VG:
vg0 = v.attach(ref)
print " vgroup:", vg0._name, "tag,ref:", tag, ref
vg0.detach()
# Unhandled tag
else:
print "unhandled tag,ref",tag,ref
# Close vgroup
vg.detach()
# Open HDF file in readonly mode.
filename = 'yourfile.hdf'
hdf = HDF(filename)
# Initialize the SD, V and VS interfaces on the file.
sd = SD(filename)
vs = hdf.vstart()
v = hdf.vgstart()
# Scan all vgroups in the file.
ref = -1
while 1:
try:
ref = v.getid(ref)
print ref
except HDF4Error,msg: # no more vgroup
break
describevg(ref)
来源:https://stackoverflow.com/questions/24236252/read-the-properties-of-hdf-file-in-python