Source code for pyarxaas.models.risk_profile

import copy
from collections import Mapping

from pandas import DataFrame


[docs]class RiskProfile: """ Represents the re-identification risks associated with a Dataset """ def __init__(self, metrics: Mapping): self._re_identification_of_risk = copy.deepcopy(metrics["reIdentificationRisk"]["measures"]) self._distribution_of_risk = copy.deepcopy(metrics["distributionOfRisk"]) self._attacker_success_rate = copy.deepcopy(metrics["reIdentificationRisk"]["attackerSuccessRate"]["successRates"]) self._quasi_identifiers = metrics["reIdentificationRisk"]["quasiIdentifiers"] self._population_model = metrics["reIdentificationRisk"]["populationModel"] def __eq__(self, other): if not isinstance(other, self.__class__): return False return hash(self) == hash(other) def __hash__(self): return hash(self._metric_hash(self._re_identification_of_risk) + self._metric_hash(self._distribution_of_risk["riskIntervalList"][0])) def _metric_hash(self, metric): m_hash = hash("") for metric, value in metric.items(): m_hash = hash(m_hash + hash(hash(metric) + hash(str(value)))) return m_hash @property def re_identification_risk(self): """ Re-identification risk metrics for a given Dataset :return: dict containing re-identification metrics """ return copy.deepcopy(self._re_identification_of_risk) @property def distribution_of_risk(self): """ Distribution of risk for a given Dataset :return: dict containing the distribution of risks in a given Dataset """ return copy.deepcopy(self._distribution_of_risk) @property def attacker_success_rate(self): """ Attacker success rates against re-identification for a given Dataset :return: dict containing the attacker success rate. """ return copy.deepcopy(self._attacker_success_rate) @property def quasi_identifiers(self): """ Quasi-identifiers for a given Dataset :return: dict containing a list of all the quasi-identifying attribute in a a given Dataset """ return copy.deepcopy(self._quasi_identifiers) @property def population_model(self): """ Population model used to analyze a given Dataset :return: The Population model name used to analyze a given Dataset """ return self._population_model
[docs] def re_identification_risk_dataframe(self) -> DataFrame: """ Re-identification risk as a pandas.DataFrame :return: pandas.Dataframe with risk metrics """ df = DataFrame([self._re_identification_of_risk]) return df
[docs] def distribution_of_risk_dataframe(self) -> DataFrame: """ Distribution of risk as a pandas.DataFrame :return: pandas.DataFrame """ return DataFrame.from_dict(self._distribution_of_risk["riskIntervalList"])