Source code for h5hep.write

import numpy as np
import h5py as h5


################################################################################
[docs]def initialize(): """ Creates an empty data dictionary Returns: **data** (dict): An empty data dictionary """ data = {} data["groups"] = {} data["datasets_and_counters"] = {} data["list_of_counters"] = [] # For singleton entries, variables with only one entry per event. data["groups"]["_SINGLETON_"] = ["INDEX"] data["datasets_and_counters"]["_SINGLETON_"] = "_SINGLETON_/INDEX" data["list_of_counters"].append("_SINGLETON_/INDEX") data["_SINGLETON_/INDEX"] = [] return data
################################################################################
[docs]def clear_event(data): """ Clears the data from the data dictionary - should the name of the function change? Args: **data** (dict): The dictionary to be cleared """ for key in data.keys(): if type(data[key]) == list: data[key].clear() #''' # Is this the right thing to do here????? elif type(data[key]) == int: data[key] = 0 elif type(data[key]) == float: data[key] = 0.0
#''' ################################################################################ # Create a single event (dictionary) that will eventually be used to fill # the overall dataset ################################################################################
[docs]def create_single_event(data): """ Creates an event dictionary that will be used to collect data and then packed into the the master data dictionary. Args: **data** (dict): Data dictionary that will hold all the data from the events. Returns: **event** (dict): The new event dictionary with keys and no event information """ event = {} for k in data.keys(): if k[-5:] == "index": event[k] = data[k] elif k in data["groups"]["_SINGLETON_"]: event[k] = None elif k in data["list_of_counters"]: event[k] = 0 else: event[k] = data[k].copy() return event
################################################################################ # This adds a group in the dictionary, similar to # a la CreateBranch in ROOT ################################################################################
[docs]def create_group(data, groupname, counter=None): """ Adds a group in the dictionary Args: **data** (dict): Dictionary to which the group will be added **groupname** (string): Name of the group to be added **counter** (string): Name of the counter key. None by default """ keys = data.keys() # Put the counter in the dictionary first. """ if counter is not None: data['datasets_and_counters'][groupname] = counter keyfound = False for k in keys: if counter == k: keyfound = True if keyfound == False: data[counter] = [] """ # Then put the group and any datasets in there next. keyfound = False for k in keys: if groupname == k: print("\033[1m%s\033[0m is already in the dictionary!" % (groupname)) keyfound = True break if keyfound == False: # data[groupname] = [] data["groups"][groupname] = [] print("Adding group \033[1m%s\033[0m" % (groupname)) if counter is not None: data["groups"][groupname].append(counter) name = "%s/%s" % (groupname, counter) # data['datasets_and_counters'][groupname] = counter data["datasets_and_counters"][groupname] = name if name not in data["list_of_counters"]: data["list_of_counters"].append(name) data[name] = [] print( "Adding a counter for \033[1m%s\033[0m as \033[1m%s\033[0m" % (groupname, counter) ) else: print("----------------------------------------------------") print("There is no counter to go with group \033[1m%s\033[0m" % (groupname)) print("Are you sure that's what you want?") print("-----------------------------------------------------")
################################################################################ # This adds a group in the dictionary, similar to # a la CreateBranch in ROOT ################################################################################
[docs]def create_dataset(data, datasets, group=None, dtype=None): """ Adds a dataset to a group in a dictionary. If the group does not exist, it will be created. Args: **data** (dict): Dictionary that contains the group **datasets** (list): Dataset to be added to the group (This doesn't have to be a list) **group** (string): Name of group the dataset will be added to. None by default **dtype** (type): The data type. None by default - I don't think this is every used Returns: **-1**: If the group is None """ keys = data.keys() if group is None: print("-----------------------------------------------") print("You need to assign this dataset(s) to a group!") print("Groups are not added") print("-----------------------------------------------") if type(datasets) != list: datasets = [datasets] for dataset in datasets: keyfound = False for k in data["groups"]["_SINGLETON_"]: if dataset == k: print("\033[1m%s\033[0m is already in the dictionary!" % (dataset)) keyfound = True if keyfound == False: print( "Adding dataset \033[1m%s\033[0m to the dictionary as a SINGLETON." % (dataset) ) data["groups"]["_SINGLETON_"].append(dataset) data[dataset] = [] # counter_name = "%s/%s" % (group,counter) data["datasets_and_counters"][dataset] = "_SINGLETON_/INDEX" return 0 # Put the counter in the dictionary first. keyfound = False for k in data["groups"]: if group == k: keyfound = True # NEED TO FIX THIS PART SO THAT IT FINDS THE RIGHT COUNTER FROM THE GROUP if keyfound == False: print("Your group, \033[1m%s\033[0m is not in the dictionary yet!" % (group)) counter = "n%s" % (group) print("Adding it, along with a counter of \033[1m%s\033[0m" % (counter)) create_group(data, group, counter=counter) # Then put the datasets into the group in there next. if type(datasets) != list: datasets = [datasets] for dataset in datasets: keyfound = False name = "%s/%s" % (group, dataset) for k in keys: if name == k: print("\033[1m%s\033[0m is already in the dictionary!" % (name)) keyfound = True if keyfound == False: print( "Adding dataset \033[1m%s\033[0m to the dictionary under group \033[1m%s\033[0m." % (dataset, group) ) data[name] = [] data["groups"][group].append(dataset) # Add a counter for this dataset for the group with which it is associated. counter = data["datasets_and_counters"][group] # counter_name = "%s/%s" % (group,counter) data["datasets_and_counters"][name] = counter return 0
################################################################################
[docs]def pack(data, event): """ Takes the data from an event and packs it into the data dictionary, intelligently, so that it can be stored and extracted efficiently. (This is analagous to the ROOT TTree::Fill() member function. Args: **data** (dict): Data dictionary to hold the entire dataset. **event** (dict): Event to be packed into data. """ keys = list(event.keys()) for key in keys: # print(key) if ( key == "datasets_and_counters" or key == "groups" or key == "list_of_counters" ): continue # The singletons will only have 1 entry per event if key == "_SINGLETON_/INDEX": data[key].append(1) continue # if key[-5:] == 'counter': # continue if type(event[key]) == list: value = event[key] if len(value) > 0: data[key] += value """ else: # No entries for this event #print(key) counter = data['datasets_and_counters'][key] data[counter].append(0) if counter in keys: keys.remove(counter) """ else: # This is for counters and SINGLETONS if key in data["groups"]["_SINGLETON_"]: if event[key] == None: print( "\n\033[1m%s\033[0m is part of the SINGLETON group and is expected to have a value for each event." % (key) ) print("However it is None...exiting.\n") exit() else: data[key].append(event[key])
################################################################################
[docs]def convert_list_and_key_to_string_data(datalist, key): """ Converts data dictionary to a string Args: **datalist** (list): A list to be saved as a string. Returns: **key** (string): We will assume that this will be unpacked as a dictionary, and this will be the key for the list in that dictionary. """ a = np.string_(key) mydataset = [] b = np.string_("") nvals = len(datalist) for i, val in enumerate(datalist): # print(val,type(val)) b += np.string_(val) if i < nvals - 1: b += np.string_("__:__") mydataset.append([a, b]) return mydataset
################################################################################ ################################################################################
[docs]def convert_dict_to_string_data(dictionary): """ Converts data dictionary to a string Args: **dictionary** (dict): Dictionary to be converted to a string Returns: **mydataset** (string): String representation of the dataset """ keys = dictionary.keys() nkeys = len(keys) mydataset = [] for i, key in enumerate(keys): # print(i,key,dictionary[key]) a = np.string_(key) b = np.string_(dictionary[key]) mydataset.append([a, b]) return mydataset
################################################################################ ################################################################################
[docs]def write_to_file( filename, data, comp_type=None, comp_opts=None, force_single_precision=True ): """ Writes the selected data to an h5hep file Args: **filename** (string): Name of output file **data** (dictionary): Data to be written into output file **comp_type** (string): Type of compression **force_single_precision** (boolean): True if data should be written in single precision Returns: **hdoutfile** (h5hep): File to which the data has been written """ hdoutfile = h5.File(filename, "w") groups = data["groups"].keys() # Convert this to a 2xN array for writing to the hdf5 file. # This gives us one small list of informtion if we need to pull out # small chunks of data mydataset = convert_dict_to_string_data(data["datasets_and_counters"]) dset = hdoutfile.create_dataset( "datasets_and_counters", data=mydataset, dtype="S256", compression=comp_type, compression_opts=comp_opts, ) # Convert this to a 2xN array for writing to the hdf5 file. # This has the groups and the datasets in them. mydataset = convert_list_and_key_to_string_data( data["groups"]["_SINGLETON_"], "_SINGLETONGROUP_" ) dset = hdoutfile.create_dataset( "_SINGLETONGROUP_", data=mydataset, dtype="S256", compression=comp_type, compression_opts=comp_opts, ) for group in groups: # print(group) hdoutfile.create_group(group) hdoutfile[group].attrs["counter"] = np.string_( data["datasets_and_counters"][group] ) datasets = data["groups"][group] # print(datasets) for dataset in datasets: name = None if group == "_SINGLETON_" and dataset is not "INDEX": name = dataset else: name = "%s/%s" % (group, dataset) x = data[name] if type(x) == list: x = np.array(x) # Do single precision only, unless specified if force_single_precision == True: if x.dtype == np.float64: x = x.astype(np.float32) hdoutfile.create_dataset( name, data=x, compression=comp_type, compression_opts=comp_opts ) # Get the number of events counters = data["list_of_counters"] nentries = -1 prevcounter = None for i, countername in enumerate(counters): ncounter = len(data[countername]) print("%-32s has %-12d entries" % (countername, ncounter)) if i > 0 and ncounter != nentries: print("-------- WARNING -----------") print( "%s and %s have differing numbers of entries!" % (countername, prevcounter) ) print("-------- WARNING -----------") # SHOULD WE EXIT ON THIS? if nentries < ncounter: nentries = ncounter prevcounter = countername hdoutfile.attrs["nentries"] = nentries hdoutfile.close() return hdoutfile