Source code for holisticai.explainability.metrics.tree._tree

import numpy as np
from holisticai.utils.surrogate_models import get_features, get_number_of_rules


class WeightedAverageDepth:
    """
    Represents the Weighted Average Depth metric.
    """

    reference: int = 0
    name: str = "Weighted Average Depth"

    def __init__(self, ignore_repeated: bool = True):
        self.ignore_repeated = ignore_repeated

    def __call__(self, tree):
        """
        Calculates the weighted average depth of a tree.

        Parameters
        ----------
        tree: Tree
            The tree to calculate the weighted average depth of.

        Returns:
            float: The weighted average depth value.
        """

        depths, counts = get_depths_counts(0, tree, [], [])
        n_samples = sum(counts)
        depths = np.array(depths)
        weights = np.array(counts) / n_samples
        return (depths * weights).sum()


[docs] def weighted_average_depth(tree): """ Weighted Average Depth calculates the average depth of a tree considering the number of samples that pass through each cut. Parameters ---------- tree: Tree The tree to calculate the weighted average depth of. Returns ------- float The weighted average depth value Examples -------- >>> from sklearn.datasets import load_iris >>> from sklearn.tree import DecisionTreeClassifier >>> from holisticai.explainability.metrics import weighted_average_depth >>> X, y = load_iris(return_X_y=True) >>> clf = DecisionTreeClassifier() >>> clf.fit(X, y) >>> weighted_average_depth(clf.tree_) Reference ---------- Laber, E., Murtinho, L., & Oliveira, F. (2023). Shallow decision trees for explainable k-means clustering. Pattern Recognition, 137, 109239. """ metric = WeightedAverageDepth() return metric(tree)
class WeightedAverageExplainabilityScore: """ Represents the Weighted Average Explainability Score. """ reference: int = 0 name: str = "Weighted Average Explainability Score" def __init__(self, ignore_repeated: bool = True): self.ignore_repeated = ignore_repeated def __call__(self, tree): """ Calculates the weighted average depth of a tree. Parameters ---------- tree: Tree The tree to calculate the weighted average depth of. Returns: float: The weighted average depth value. """ depths, counts = get_cuts_counts(0, tree, [], [], set()) n_samples = sum(counts) depths = np.array(depths) weights = np.array(counts) / n_samples return (depths * weights).sum()
[docs] def weighted_average_explainability_score(tree): """ Weighted Average Explainability Score calculates the average depth of a tree considering the number of samples that pass through each cut. Parameters ---------- tree: Tree The tree to calculate the weighted average depth of. Returns ------- float The weighted average depth value Examples -------- >>> from sklearn.datasets import load_iris >>> from sklearn.tree import DecisionTreeClassifier >>> from holisticai.explainability.metrics import ( ... weighted_average_explainability_score, ... ) >>> X, y = load_iris(return_X_y=True) >>> clf = DecisionTreeClassifier() >>> clf.fit(X, y) >>> weighted_average_explainability_score(clf.tree_) Reference ---------- Laber, E., Murtinho, L., & Oliveira, F. (2023). Shallow decision trees for explainable k-means clustering. Pattern Recognition, 137, 109239. """ metric = WeightedAverageExplainabilityScore() return metric(tree)
def is_leaf(node_index, tree): """ Check if a node is a leaf. Parameters ---------- node_index : int The index of the node to check. tree : Tree The tree to check the node in. Returns ------- bool Whether the node is a leaf or not. """ return tree.children_left[node_index] == -1 and tree.children_right[node_index] == -1 def get_cuts_counts(node_index, tree, cuts, counts, cur_set): """ Get the cuts and counts of a tree. Parameters ---------- node_index : int The index of the node to start from. tree : Tree The tree to get the cuts and counts from. cuts : list The list to store the cuts. counts : list The list to store the counts. cur_set : set The set to store the current cuts. Returns ------- list The list of cuts. list The list of counts. """ if is_leaf(node_index, tree): cuts.append(len(cur_set)) counts.append(tree.n_node_samples[node_index]) else: children_left_set = cur_set.copy() children_left_set.add((tree.feature[node_index], -1)) children_right_set = cur_set.copy() children_right_set.add((tree.feature[node_index], 1)) if tree.children_left[node_index] != -1: get_cuts_counts(tree.children_left[node_index], tree, cuts, counts, children_left_set) if tree.children_right[node_index] != -1: get_cuts_counts(tree.children_right[node_index], tree, cuts, counts, children_right_set) return cuts, counts def get_depths_counts(node_index, tree, depths, counts, h=0): """ Get the depths and counts of a tree. Parameters ---------- node_index : int The index of the node to start from. tree : Tree The tree to get the depths and counts from. depths : list The list to store the depths. counts : list The list to store the counts. h : int, default=0 The current depth. Returns ------- list The list of depths. list The list of counts. """ if is_leaf(node_index, tree): depths.append(h) counts.append(tree.n_node_samples[node_index]) if tree.children_left[node_index] != -1: get_depths_counts(tree.children_left[node_index], tree, depths, counts, h + 1) if tree.children_right[node_index] != -1: get_depths_counts(tree.children_right[node_index], tree, depths, counts, h + 1) return depths, counts class WeightedTreeGini: """ Represents the Weighted Gini Index metric. """ reference: float = 0.0 name: str = "Weighted Gini Index" def __call__(self, tree): """ Calculates the weighted Gini index of a tree. Parameters ---------- tree: Tree The tree to calculate the weighted Gini index of. Returns: float: The weighted Gini index value. """ def gini_impurity(node_index): node_samples = tree.n_node_samples[node_index] if node_samples == 0: return 0.0 node_value = tree.value[node_index, 0, :] p = node_value / node_samples return 1.0 - np.sum(p**2) def variance_impurity(node_index): node_samples = tree.n_node_samples[node_index] if node_samples == 0: return 0.0 node_value = tree.value[node_index, 0, :] mean_value = np.mean(node_value) return np.sum((node_value - mean_value) ** 2) / node_samples is_classification = tree.n_classes[0] > 1 weighted_impurity = 0.0 total_samples = tree.n_node_samples[0] def accumulate_impurity(node_index): nonlocal weighted_impurity if is_leaf(node_index, tree): node_samples = tree.n_node_samples[node_index] impurity = gini_impurity(node_index) if is_classification else variance_impurity(node_index) weighted_impurity += (node_samples / total_samples) * impurity else: accumulate_impurity(tree.children_left[node_index]) accumulate_impurity(tree.children_right[node_index]) accumulate_impurity(0) return weighted_impurity
[docs] def weighted_tree_gini(tree): """ Compute the weighted Gini index for the tree (WGNI). Reference value: 0.0 Parameters ---------- tree : Tree The tree to compute the weighted Gini index of. Returns ------- float The weighted Gini index of the tree. Examples -------- >>> from sklearn.datasets import load_iris >>> from sklearn.tree import DecisionTreeClassifier >>> from holisticai.explainability.metrics import weighted_average_depth >>> X, y = load_iris(return_X_y=True) >>> clf = DecisionTreeClassifier() >>> clf.fit(X, y) >>> weighted_tree_gini(clf.tree_) """ metric = WeightedTreeGini() return metric(tree)
class TreeDepthVariance: """ Represents the Tree Depth Variance metric. """ reference: float = 0.0 name: str = "Tree Depth Variance" def __call__(self, tree): """ Calculates the variance of the depths of the leaves in the tree. Parameters ---------- tree: Tree The tree to calculate the depth variance of. Returns: float: The variance of the leaf depths. """ depths, _ = get_depths_counts(0, tree, [], []) mean_depth = np.mean(depths) variance = np.mean((depths - mean_depth) ** 2) return variance
[docs] def tree_depth_variance(tree): """ Compute the variance of the depths of the leaves in the tree (TDV). Reference value: 0.0 Parameters ---------- tree : Tree The tree to compute the depth variance of. Returns ------- float The variance of the leaf depths. Examples -------- >>> from sklearn.datasets import load_iris >>> from sklearn.tree import DecisionTreeClassifier >>> from holisticai.explainability.metrics import weighted_average_depth >>> X, y = load_iris(return_X_y=True) >>> clf = DecisionTreeClassifier() >>> clf.fit(X, y) >>> tree_depth_variance(clf.tree_) """ metric = TreeDepthVariance() return metric(tree)
class TreeNumberOfRules: """ Represents the Number of Rules metric """ reference: float = 1 name: str = "Number of Rules" def __call__(self, surrogate): return int(get_number_of_rules(surrogate))
[docs] def tree_number_of_rules(surrogate): """ Calculates the number of rules in a decision tree surrogate model. Parameters ---------- surrogate: A surrogate model, typically a decision tree, for which the number of rules is to be calculated. Returns ------- int: The number of rules present in the surrogate model. Examples -------- >>> from sklearn.datasets import load_iris >>> from sklearn.tree import DecisionTreeClassifier >>> from holisticai.explainability.metrics import tree_number_of_rules >>> X, y = load_iris(return_X_y=True) >>> clf = DecisionTreeClassifier() >>> clf.fit(X, y) >>> tree_number_of_rules(clf.tree_) """ m = TreeNumberOfRules() return m(surrogate)
class TreeNumberOfFeatures: """ Represents the Number of Features metric """ reference: float = 1 name: str = "Number of Features" def __call__(self, surrogate): features = get_features(surrogate) features_used = np.unique(features[features >= 0]) F1 = len(features_used) return int(F1)
[docs] def tree_number_of_features(surrogate): """ Calculates the number of features used in a decision tree surrogate model. Parameters ---------- surrogate: A surrogate model, typically a decision tree, for which the number of features is to be calculated. Returns ------- int: The number of features used in the surrogate model. Examples -------- >>> from sklearn.datasets import load_iris >>> from sklearn.tree import DecisionTreeClassifier >>> from holisticai.explainability.metrics import tree_number_of_features >>> X, y = load_iris(return_X_y=True) >>> clf = DecisionTreeClassifier() >>> clf.fit(X, y) >>> tree_number_of_features(clf.tree_) """ m = TreeNumberOfFeatures() return m(surrogate)