Source code for h5hep.read

import h5py as h5
import numpy as np

################################################################################
[docs]def load(filename=None,verbose=False,desired_datasets=None,subset=None): """ Reads all, or a subset of the data, from the HDF5 file to fill a data dictionary. Returns an empty dictionary to be filled later with select events. Args: **filename** (string): Name of the input file **verbose** (boolean): True if debug output is required **desired_datasets** (list): Datasets to be read from input file **subset** (int): Number of events to be read from input file Returns: **ourdata (dict): Selected data from HDF5 file **event** (dict): An empty event dictionary to be filled by individual events """ f = None if filename!=None: f = h5.File(filename,'r+') else: print("No filename passed in! Can't open file.\n") return None ourdata = {} ourdata['datasets_and_counters'] = {} ourdata['datasets_and_indices'] = {} ourdata['list_of_counters'] = [] ourdata['all_datasets'] = [] ourdata['nentries'] = f.attrs['nentries'] if subset is not None: if type(subset) == int: subset = (0,subset) ourdata['nentries'] = subset[1] - subset[0] event = {} # Get the datasets and counters dc = f['datasets_and_counters'] for vals in dc: # The decode is there because vals were stored as numpy.bytes counter = vals[1].decode() index = "%s_INDEX" % (counter) ourdata['datasets_and_counters'][vals[0].decode()] = counter ourdata['datasets_and_indices'][vals[0].decode()] = index ourdata['list_of_counters'].append(vals[1].decode()) ourdata['all_datasets'].append(vals[0].decode()) ourdata['all_datasets'].append(vals[1].decode()) # Get the counters as well # We may have added some strings (like counters) multiple times. ourdata['list_of_counters'] = np.unique(ourdata['list_of_counters']).tolist() ourdata['all_datasets'] = np.unique(ourdata['all_datasets']).tolist() # Pull out the SINGLETON datasets sg = f['_SINGLETONGROUP_'][0] # This is a numpy array of strings decoded_string = sg[1].decode() vals = decoded_string.split("__:__") vals.remove('INDEX') ourdata['_SINGLETON_'] = vals # Get the list of datasets and groups, but remove the # 'datasets_and_counters', as that is a protected key. entries = ourdata['all_datasets'] ######################################################## # Only keep select data from file ######################################################## if desired_datasets is not None: if type(desired_datasets) != list: desired_datasets = list(desired_datasets) # Count backwards because we'll be removing stuff as we go. i = len(entries)-1 while i>=0: entry = entries[i] is_dropped = True for desdat in desired_datasets: if desdat in entry: is_dropped = False break if is_dropped==True: print("Not reading out %s from the file...." % (entry)) entries.remove(entry) i -= 1 ####################################################### if verbose==True: print("Datasets and counters:") print(ourdata['datasets_and_counters']) print("\nDatasets and indices:") print(ourdata['list_of_counters']) # Pull out the counters first and build the indices print("Building the indices...") for name in ourdata['list_of_counters']: if subset is not None: ourdata[name] = f[name][subset[0]:subset[1]] else: ourdata[name] = f[name][:] #counter = f[name].value indexname = "%s_INDEX" % (name) index = np.zeros(len(ourdata[name]),dtype=int) start = 0 nentries = len(index) for i in range(0,nentries): index[i] = start nobjs = ourdata[name][i] start = index[i] + nobjs ourdata[indexname] = index print("Built the indices!") # Loop over the entries we want and pull out the data. for name in entries: # The decode is there because counter is a numpy.bytes object counter = None if name not in ourdata['list_of_counters']: counter = ourdata['datasets_and_counters'][name] if verbose==True: print(f[name]) data = f[name] #for data in f[name]: if type(data)==h5.Dataset: datasetname = name if subset is not None: ourdata[datasetname] = data[subset[0]:subset[1]] else: ourdata[datasetname] = data[:] event[datasetname] = None # This will be filled for individual events if verbose==True: print(data) f.close() print("Data is read in and input file is closed.") return ourdata,event
################################################################################ ################################################################################
[docs]def unpack(event,data,n=0): """ Fills the event dictionary with selected events. Args: **event** (dict): Event dictionary to be filled **data** (dict): Data dictionary used to fill the event dictionary """ keys = event.keys() for key in keys: #if "num" in key: # IS THERE A WAY THAT THIS COULD BE FASTER? #print(data['list_of_counters'],key) if key in data['list_of_counters'] or key in data['_SINGLETON_']: #print("here! ",key) event[key] = data[key][n] elif "INDEX" not in key:# and 'Jets' in key: indexkey = data['datasets_and_indices'][key] numkey = data['datasets_and_counters'][key] if len(data[indexkey])>0: index = data[indexkey][n] if len(data[numkey])>0: nobjs = data[numkey][n] event[key] = data[key][index:index+nobjs]
################################################################################
[docs]def get_nentries(filename): """ Get the number of entries in the file. """ f = h5.File(filename,'r+') a = f.attrs if a.__contains__('nentries'): nentries = a.get('nentries') f.close() return nentries else: print("\nFile does not contain the attribute, \"nentries\"\n") f.close() return None