Source code for ash_model.utils.profiles

from collections import defaultdict
import numpy as np
from ash_model import ASH, NProfile


[docs]def aggregate_node_profile( h: ASH, node: int, categorical_aggr: str = "mode", numerical_aggr: str = "mean" ) -> NProfile: """ Returns an aggregated profile of a node over all time points. The categorical_aggr parameter specifies the aggregation method for categorical attributes. The numerical_aggr parameter specifies the aggregation method for numerical attributes. :param h: The ASH object :param node: The node id :param categorical_aggr: The aggregation method for categorical attributes. Options: "mode", "first", "last" :param numerical_aggr: The aggregation method for numerical attributes. Options: "mean", "median", "first", "last" :return: The aggregated profile of the node """ name_to_func = { "mode": lambda x: max(set(x), key=x.count), "first": lambda x: x[0], "last": lambda x: x[-1], "mean": lambda x: sum(x) / len(x), "median": lambda x: sorted(x)[len(x) // 2], } aggr_profile = NProfile(node) attr_dicts = [ h.get_node_profile(node, tid=t).get_attributes() for t in sorted(h.node_presence(node)) ] attribute_values = defaultdict(list) for attr_dict in attr_dicts: for name, value in attr_dict.items(): attribute_values[name].append(value) for name, values in attribute_values.items(): if isinstance(values[0], str): aggr_profile.add_attribute(name, name_to_func[categorical_aggr](values)) else: aggr_profile.add_attribute(name, name_to_func[numerical_aggr](values)) return aggr_profile
[docs]def hyperedge_most_frequent_node_attribute_value( h: ASH, hyperedge_id: str, attr_name: str, tid: int = None ) -> dict: """ Returns the most frequent value of a node attribute in a hyperedge. If tid is specified, it returns the value at that time point. If tid is None, it returns the most frequent value across all time points. :param h: The ASH object :param hyperedge_id: The hyperedge id :param attr_name: The attribute name to consider :param tid: The temporal id. If None, considers all time points :return: A dictionary with the most frequent value of the attribute in the hyperedge nodes """ nodes = h.get_hyperedge_nodes(hyperedge_id) values = [] for node in nodes: profile = h.get_node_profile(node, tid) if profile.has_attribute(attr_name): # If tid is specified, get the value at that time point # If tid is None, it's the aggregated value across all time points values.append( profile.get_attribute(attr_name) if tid is not None else profile.get_attribute(attr_name) ) if not values: return {} return { max(set(values), key=values.count): values.count( max(set(values), key=values.count) ) }
[docs]def hyperedge_aggregate_node_profile( h: ASH, hyperedge_id: str, tid: int, attr_name: str = None, categorical_aggr: str = "mode", numerical_aggr: str = "mean", ) -> NProfile: """ Returns an aggregated profile of the nodes in a hyperedge. The categorical_aggr parameter specifies the aggregation method for categorical attributes. The numerical_aggr parameter specifies the aggregation method for numerical attributes. :param h: The ASH object :param hyperedge_id: The hyperedge id :param tid: The temporal id :param attr_name: The attribute name to aggregate. If None, all attributes are aggregated :param categorical_aggr: The aggregation method for categorical attributes. Options: "mode", "first", "last" :param numerical_aggr: The aggregation method for numerical attributes. Options: "mean", "median", "first", "last" :return: The aggregated profile of the hyperedge nodes """ name_to_func = { "mode": lambda x: max(set(x), key=x.count), "first": lambda x: x[0], "last": lambda x: x[-1], "mean": lambda x: sum(x) / len(x), "median": lambda x: np.median(x), } aggr_profile = NProfile(None) nodes = h.get_hyperedge_nodes(hyperedge_id) attribute_values = defaultdict(list) for node in nodes: profile = h.get_node_profile(node, tid) if not attr_name: for name, value in profile.get_attributes().items(): attribute_values[name].append(value) else: attribute_values[attr_name].append(profile.get_attribute(attr_name)) for name, values in attribute_values.items(): if isinstance(values[0], str): val = name_to_func[categorical_aggr](values) aggr_profile.add_attribute(name, val) else: val = name_to_func[numerical_aggr](values) aggr_profile.add_attribute(name, val) aggr_profile.add_statistic(name, "std", np.std(values)) return aggr_profile