Source code for h5hep.read

import h5py as h5
import numpy as np

################################################################################
[docs]def load(filename=None, verbose=False, desired_datasets=None, subset=None): """ Reads all, or a subset of the data, from the HDF5 file to fill a data dictionary. Returns an empty dictionary to be filled later with select events. Args: **filename** (string): Name of the input file **verbose** (boolean): True if debug output is required **desired_datasets** (list): Datasets to be read from input file **subset** (int): Number of events to be read from input file Returns: **ourdata (dict): Selected data from HDF5 file **event** (dict): An empty event dictionary to be filled by individual events """ f = None if filename != None: f = h5.File(filename, "r+") else: print("No filename passed in! Can't open file.\n") return None ourdata = {} ourdata["datasets_and_counters"] = {} ourdata["datasets_and_indices"] = {} ourdata["list_of_counters"] = [] ourdata["all_datasets"] = [] ourdata["nentries"] = f.attrs["nentries"] if subset is not None: if type(subset) == int: subset = (0, subset) ourdata["nentries"] = subset[1] - subset[0] event = {} # Get the datasets and counters dc = f["datasets_and_counters"] for vals in dc: # The decode is there because vals were stored as numpy.bytes counter = vals[1].decode() index = "%s_INDEX" % (counter) ourdata["datasets_and_counters"][vals[0].decode()] = counter ourdata["datasets_and_indices"][vals[0].decode()] = index ourdata["list_of_counters"].append(vals[1].decode()) ourdata["all_datasets"].append(vals[0].decode()) ourdata["all_datasets"].append(vals[1].decode()) # Get the counters as well # We may have added some strings (like counters) multiple times. ourdata["list_of_counters"] = np.unique(ourdata["list_of_counters"]).tolist() ourdata["all_datasets"] = np.unique(ourdata["all_datasets"]).tolist() # Pull out the SINGLETON datasets sg = f["_SINGLETONGROUP_"][0] # This is a numpy array of strings decoded_string = sg[1].decode() vals = decoded_string.split("__:__") vals.remove("INDEX") ourdata["_SINGLETON_"] = vals # Get the list of datasets and groups, but remove the # 'datasets_and_counters', as that is a protected key. entries = ourdata["all_datasets"] ######################################################## # Only keep select data from file ######################################################## if desired_datasets is not None: if type(desired_datasets) != list: desired_datasets = list(desired_datasets) # Count backwards because we'll be removing stuff as we go. i = len(entries) - 1 while i >= 0: entry = entries[i] is_dropped = True for desdat in desired_datasets: if desdat in entry: is_dropped = False break if is_dropped == True: print("Not reading out %s from the file...." % (entry)) entries.remove(entry) i -= 1 ####################################################### if verbose == True: print("Datasets and counters:") print(ourdata["datasets_and_counters"]) print("\nDatasets and indices:") print(ourdata["list_of_counters"]) # Pull out the counters first and build the indices print("Building the indices...") for name in ourdata["list_of_counters"]: if subset is not None: ourdata[name] = f[name][subset[0] : subset[1]] else: ourdata[name] = f[name][:] # counter = f[name].value indexname = "%s_INDEX" % (name) index = np.zeros(len(ourdata[name]), dtype=int) start = 0 nentries = len(index) for i in range(0, nentries): index[i] = start nobjs = ourdata[name][i] start = index[i] + nobjs ourdata[indexname] = index print("Built the indices!") # Loop over the entries we want and pull out the data. for name in entries: # The decode is there because counter is a numpy.bytes object counter = None if name not in ourdata["list_of_counters"]: counter = ourdata["datasets_and_counters"][name] if verbose == True: print(f[name]) data = f[name] # for data in f[name]: if type(data) == h5.Dataset: datasetname = name if subset is not None: ourdata[datasetname] = data[subset[0] : subset[1]] else: ourdata[datasetname] = data[:] event[datasetname] = None # This will be filled for individual events if verbose == True: print(data) f.close() print("Data is read in and input file is closed.") return ourdata, event
################################################################################ ################################################################################
[docs]def unpack(event, data, n=0): """ Fills the event dictionary with selected events. Args: **event** (dict): Event dictionary to be filled **data** (dict): Data dictionary used to fill the event dictionary """ keys = event.keys() for key in keys: # if "num" in key: # IS THERE A WAY THAT THIS COULD BE FASTER? # print(data['list_of_counters'],key) if key in data["list_of_counters"] or key in data["_SINGLETON_"]: # print("here! ",key) event[key] = data[key][n] elif "INDEX" not in key: # and 'Jets' in key: indexkey = data["datasets_and_indices"][key] numkey = data["datasets_and_counters"][key] if len(data[indexkey]) > 0: index = data[indexkey][n] if len(data[numkey]) > 0: nobjs = data[numkey][n] event[key] = data[key][index : index + nobjs]
################################################################################
[docs]def get_nentries(filename): """ Get the number of entries in the file. """ f = h5.File(filename, "r+") a = f.attrs if a.__contains__("nentries"): nentries = a.get("nentries") f.close() return nentries else: print('\nFile does not contain the attribute, "nentries"\n') f.close() return None