Source code for causalpy.checks.mccrary

#   Copyright 2022 - 2026 The PyMC Labs Developers
#
#   Licensed under the Apache License, Version 2.0 (the "License");
#   you may not use this file except in compliance with the License.
#   You may obtain a copy of the License at
#
#       http://www.apache.org/licenses/LICENSE-2.0
#
#   Unless required by applicable law or agreed to in writing, software
#   distributed under the License is distributed on an "AS IS" BASIS,
#   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#   See the License for the specific language governing permissions and
#   limitations under the License.
"""
McCrary density test for Regression Discontinuity designs.

Tests for manipulation of the running variable at the threshold by
checking whether there is a discontinuity in the density.  A
significant density discontinuity suggests that units may have been
able to manipulate their value of the running variable to sort into
(or out of) treatment.

Uses a simple histogram-based approach: compares the count of
observations in bins just below and just above the threshold.
"""

from __future__ import annotations

import numpy as np
import pandas as pd
from scipy.stats import norm

from causalpy.checks.base import CheckResult
from causalpy.experiments.base import BaseExperiment
from causalpy.experiments.regression_discontinuity import RegressionDiscontinuity
from causalpy.pipeline import PipelineContext


[docs] class McCraryDensityTest: """Test for manipulation of the running variable at the threshold. Compares the density of observations just below and just above the treatment threshold using a histogram-based approach. Parameters ---------- n_bins : int, default 20 Number of bins on each side of the threshold. alpha : float, default 0.05 Significance level for the test. Examples -------- >>> import causalpy as cp # doctest: +SKIP >>> check = cp.checks.McCraryDensityTest(n_bins=20) # doctest: +SKIP """ applicable_methods: set[type[BaseExperiment]] = {RegressionDiscontinuity}
[docs] def __init__(self, n_bins: int = 20, alpha: float = 0.05) -> None: self.n_bins = n_bins self.alpha = alpha
[docs] def validate(self, experiment: BaseExperiment) -> None: """Verify the experiment is a RegressionDiscontinuity instance.""" if not isinstance(experiment, RegressionDiscontinuity): raise TypeError( "McCraryDensityTest requires a RegressionDiscontinuity experiment." )
[docs] def run( self, experiment: BaseExperiment, context: PipelineContext, ) -> CheckResult: """Test for manipulation of the running variable at the threshold.""" rd = experiment threshold = rd.treatment_threshold # type: ignore[attr-defined] running_var = rd.running_variable_name # type: ignore[attr-defined] data = rd.data # type: ignore[attr-defined] x = data[running_var].values below = x[x < threshold] above = x[x >= threshold] n_below = len(below) n_above = len(above) n_total = n_below + n_above if n_total == 0: return CheckResult( check_name="McCraryDensityTest", passed=None, text="No observations found around the threshold.", ) prop_below = n_below / n_total prop_above = n_above / n_total se = np.sqrt(prop_below * prop_above / n_total) z_stat = (prop_below - 0.5) / se if se > 0 else 0.0 p_value = 2 * (1 - norm.cdf(abs(z_stat))) passed = bool(p_value > self.alpha) table = pd.DataFrame( [ { "n_below": n_below, "n_above": n_above, "prop_below": prop_below, "prop_above": prop_above, "z_statistic": z_stat, "p_value": p_value, "threshold": threshold, } ] ) if passed: text = ( f"McCrary density test: no evidence of manipulation at " f"threshold {threshold} (z={z_stat:.3f}, p={p_value:.3f}). " f"Observations below: {n_below}, above: {n_above}." ) else: text = ( f"McCrary density test: possible manipulation detected at " f"threshold {threshold} (z={z_stat:.3f}, p={p_value:.3f}). " f"Observations below: {n_below}, above: {n_above}." ) return CheckResult( check_name="McCraryDensityTest", passed=passed, table=table, text=text, metadata={"z_statistic": z_stat, "p_value": p_value}, )