import h5py as h5
import numpy as np
################################################################################
[docs]def load(filename=None,verbose=False,desired_datasets=None,subset=None):
""" Reads all, or a subset of the data, from the HDF5 file to fill a data dictionary.
Returns an empty dictionary to be filled later with select events.
Args:
**filename** (string): Name of the input file
**verbose** (boolean): True if debug output is required
**desired_datasets** (list): Datasets to be read from input file
**subset** (int): Number of events to be read from input file
Returns:
**ourdata (dict): Selected data from HDF5 file
**event** (dict): An empty event dictionary to be filled by individual events
"""
f = None
if filename!=None:
f = h5.File(filename,'r+')
else:
print("No filename passed in! Can't open file.\n")
return None
ourdata = {}
ourdata['datasets_and_counters'] = {}
ourdata['datasets_and_indices'] = {}
ourdata['list_of_counters'] = []
ourdata['all_datasets'] = []
ourdata['nentries'] = f.attrs['nentries']
if subset is not None:
if type(subset) == int:
subset = (0,subset)
ourdata['nentries'] = subset[1] - subset[0]
event = {}
# Get the datasets and counters
dc = f['datasets_and_counters']
for vals in dc:
# The decode is there because vals were stored as numpy.bytes
counter = vals[1].decode()
index = "%s_INDEX" % (counter)
ourdata['datasets_and_counters'][vals[0].decode()] = counter
ourdata['datasets_and_indices'][vals[0].decode()] = index
ourdata['list_of_counters'].append(vals[1].decode())
ourdata['all_datasets'].append(vals[0].decode())
ourdata['all_datasets'].append(vals[1].decode()) # Get the counters as well
# We may have added some strings (like counters) multiple times.
ourdata['list_of_counters'] = np.unique(ourdata['list_of_counters']).tolist()
ourdata['all_datasets'] = np.unique(ourdata['all_datasets']).tolist()
# Pull out the SINGLETON datasets
sg = f['_SINGLETONGROUP_'][0] # This is a numpy array of strings
decoded_string = sg[1].decode()
vals = decoded_string.split("__:__")
vals.remove('INDEX')
ourdata['_SINGLETON_'] = vals
# Get the list of datasets and groups, but remove the
# 'datasets_and_counters', as that is a protected key.
entries = ourdata['all_datasets']
########################################################
# Only keep select data from file
########################################################
if desired_datasets is not None:
if type(desired_datasets) != list:
desired_datasets = list(desired_datasets)
# Count backwards because we'll be removing stuff as we go.
i = len(entries)-1
while i>=0:
entry = entries[i]
is_dropped = True
for desdat in desired_datasets:
if desdat in entry:
is_dropped = False
break
if is_dropped==True:
print("Not reading out %s from the file...." % (entry))
entries.remove(entry)
i -= 1
#######################################################
if verbose==True:
print("Datasets and counters:")
print(ourdata['datasets_and_counters'])
print("\nDatasets and indices:")
print(ourdata['list_of_counters'])
# Pull out the counters first and build the indices
print("Building the indices...")
for name in ourdata['list_of_counters']:
if subset is not None:
ourdata[name] = f[name][subset[0]:subset[1]]
else:
ourdata[name] = f[name][:]
#counter = f[name].value
indexname = "%s_INDEX" % (name)
index = np.zeros(len(ourdata[name]),dtype=int)
start = 0
nentries = len(index)
for i in range(0,nentries):
index[i] = start
nobjs = ourdata[name][i]
start = index[i] + nobjs
ourdata[indexname] = index
print("Built the indices!")
# Loop over the entries we want and pull out the data.
for name in entries:
# The decode is there because counter is a numpy.bytes object
counter = None
if name not in ourdata['list_of_counters']:
counter = ourdata['datasets_and_counters'][name]
if verbose==True:
print(f[name])
data = f[name]
#for data in f[name]:
if type(data)==h5.Dataset:
datasetname = name
if subset is not None:
ourdata[datasetname] = data[subset[0]:subset[1]]
else:
ourdata[datasetname] = data[:]
event[datasetname] = None # This will be filled for individual events
if verbose==True:
print(data)
f.close()
print("Data is read in and input file is closed.")
return ourdata,event
################################################################################
################################################################################
[docs]def unpack(event,data,n=0):
""" Fills the event dictionary with selected events.
Args:
**event** (dict): Event dictionary to be filled
**data** (dict): Data dictionary used to fill the event dictionary
"""
keys = event.keys()
for key in keys:
#if "num" in key:
# IS THERE A WAY THAT THIS COULD BE FASTER?
#print(data['list_of_counters'],key)
if key in data['list_of_counters'] or key in data['_SINGLETON_']:
#print("here! ",key)
event[key] = data[key][n]
elif "INDEX" not in key:# and 'Jets' in key:
indexkey = data['datasets_and_indices'][key]
numkey = data['datasets_and_counters'][key]
if len(data[indexkey])>0:
index = data[indexkey][n]
if len(data[numkey])>0:
nobjs = data[numkey][n]
event[key] = data[key][index:index+nobjs]
################################################################################
[docs]def get_nentries(filename):
""" Get the number of entries in the file.
"""
f = h5.File(filename,'r+')
a = f.attrs
if a.__contains__('nentries'):
nentries = a.get('nentries')
f.close()
return nentries
else:
print("\nFile does not contain the attribute, \"nentries\"\n")
f.close()
return None