Rule

Predicate definition holder

Source code in cuallee/__init__.py

@dataclass
class Rule:
    """Predicate definition holder"""

    method: str
    column: Union[str, List[str], Tuple[str, str]]
    value: Optional[Any]
    data_type: CheckDataType
    coverage: float = 1.0
    options: Union[List[Tuple], None] = None
    status: Union[str, None] = None
    violations: int = 0
    pass_rate: float = 0.0
    ordinal: int = 0
    name: str = None

    @property
    def settings(self) -> dict:
        """holds the additional settings for the predicate execution"""
        return dict(self.options)

    @property
    def key(self):
        """blake2s hash of the rule, made of method, column, value, options and coverage"""
        return (
            hashlib.blake2s(
                bytes(
                    f"{self.method}{self.column}{self.value}{self.options}{self.coverage}",
                    "utf-8",
                )
            )
            .hexdigest()
            .upper()
        )

    def __post_init__(self):
        if (self.coverage <= 0) or (self.coverage > 1):
            raise ValueError("Coverage should be between 0 and 1")

        if isinstance(self.column, List):
            self.column = tuple(self.column)

        if isinstance(self.value, List):
            self.value = tuple(self.value)

        if isinstance(self.value, Tuple) & (self.data_type == CheckDataType.AGNOSTIC):
            # All values can only be of one data type in a rule
            if len(Counter(map(type, self.value)).keys()) > 1:
                raise ValueError("Data types in rule values are inconsistent")

        if (
            self.options
            and isinstance(self.options, dict)
            and (rule_name := self.options.get("name"))
        ):
            self.name = rule_name
        else:
            self.name = self.method

    def __repr__(self):
        return f"Rule(method:{self.name}, column:{self.column}, value:{self.value}, data_type:{self.data_type}, coverage:{self.coverage}, ordinal:{self.ordinal}"

    def __rshift__(self, rule_dict: Dict[str, Any]) -> Dict[str, Any]:
        rule_dict[self.key] = self
        return rule_dict

    def evaluate_violations(self, result: Any, rows: int):
        """Calculates the row violations on the rule"""

        if isinstance(result, str):
            if result == "false":
                self.violations = rows
            elif result == "true":
                self.violations = 0
            else:
                self.violations = abs(int(result))
        elif isinstance(result, bool):
            if result is True:
                self.violations = 0
            elif result is False:
                self.violations = rows
        elif isinstance(result, int):
            if result == 0:
                self.violations = rows
            elif result < 0:
                self.violations = abs(result)
            elif (result > 0) and (result < rows):
                self.violations = rows - result

        else:
            self.violations = 0

    def evaluate_pass_rate(self, rows: int):
        """Percentage of successful rows by this rule"""
        if self.violations <= rows:
            try:
                self.pass_rate = 1 - (self.violations / rows)
            except ZeroDivisionError:
                self.pass_rate = 1.0
        else:
            try:
                self.pass_rate = rows / self.violations
            except ZeroDivisionError:
                self.pass_rate = 0.0

    def evaluate_status(self):
        """Overall PASS/FAIL status of the rule"""
        if self.pass_rate >= self.coverage:
            self.status = "PASS"
        else:
            self.status = "FAIL"

    def evaluate(self, result: Any, rows: int):
        """Generic rule evaluation for checks"""
        self.evaluate_violations(result, rows)
        self.evaluate_pass_rate(rows)
        self.evaluate_status()

`key` `property`

blake2s hash of the rule, made of method, column, value, options and coverage

`settings` `property`

holds the additional settings for the predicate execution

`evaluate(result, rows)`

Generic rule evaluation for checks

Source code in cuallee/__init__.py

def evaluate(self, result: Any, rows: int):
    """Generic rule evaluation for checks"""
    self.evaluate_violations(result, rows)
    self.evaluate_pass_rate(rows)
    self.evaluate_status()

`evaluate_pass_rate(rows)`

Percentage of successful rows by this rule

Source code in cuallee/__init__.py

def evaluate_pass_rate(self, rows: int):
    """Percentage of successful rows by this rule"""
    if self.violations <= rows:
        try:
            self.pass_rate = 1 - (self.violations / rows)
        except ZeroDivisionError:
            self.pass_rate = 1.0
    else:
        try:
            self.pass_rate = rows / self.violations
        except ZeroDivisionError:
            self.pass_rate = 0.0

`evaluate_status()`

Overall PASS/FAIL status of the rule

Source code in cuallee/__init__.py

def evaluate_status(self):
    """Overall PASS/FAIL status of the rule"""
    if self.pass_rate >= self.coverage:
        self.status = "PASS"
    else:
        self.status = "FAIL"

`evaluate_violations(result, rows)`

Calculates the row violations on the rule

Source code in cuallee/__init__.py

def evaluate_violations(self, result: Any, rows: int):
    """Calculates the row violations on the rule"""

    if isinstance(result, str):
        if result == "false":
            self.violations = rows
        elif result == "true":
            self.violations = 0
        else:
            self.violations = abs(int(result))
    elif isinstance(result, bool):
        if result is True:
            self.violations = 0
        elif result is False:
            self.violations = rows
    elif isinstance(result, int):
        if result == 0:
            self.violations = rows
        elif result < 0:
            self.violations = abs(result)
        elif (result > 0) and (result < rows):
            self.violations = rows - result

    else:
        self.violations = 0

Rule

key property

settings property

evaluate(result, rows)

evaluate_pass_rate(rows)

evaluate_status()

evaluate_violations(result, rows)

`key` `property`

`settings` `property`

`evaluate(result, rows)`

`evaluate_pass_rate(rows)`

`evaluate_status()`

`evaluate_violations(result, rows)`