Source code for causalpy.checks.mccrary
# Copyright 2022 - 2026 The PyMC Labs Developers
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
McCrary density test for Regression Discontinuity designs.
Tests for manipulation of the running variable at the threshold by
checking whether there is a discontinuity in the density. A
significant density discontinuity suggests that units may have been
able to manipulate their value of the running variable to sort into
(or out of) treatment.
Uses a simple histogram-based approach: compares the count of
observations in bins just below and just above the threshold.
"""
from __future__ import annotations
import numpy as np
import pandas as pd
from scipy.stats import norm
from causalpy.checks.base import CheckResult
from causalpy.experiments.base import BaseExperiment
from causalpy.experiments.regression_discontinuity import RegressionDiscontinuity
from causalpy.pipeline import PipelineContext
[docs]
class McCraryDensityTest:
"""Test for manipulation of the running variable at the threshold.
Compares the density of observations just below and just above the
treatment threshold using a histogram-based approach.
Parameters
----------
n_bins : int, default 20
Number of bins on each side of the threshold.
alpha : float, default 0.05
Significance level for the test.
Examples
--------
>>> import causalpy as cp # doctest: +SKIP
>>> check = cp.checks.McCraryDensityTest(n_bins=20) # doctest: +SKIP
"""
applicable_methods: set[type[BaseExperiment]] = {RegressionDiscontinuity}
[docs]
def __init__(self, n_bins: int = 20, alpha: float = 0.05) -> None:
self.n_bins = n_bins
self.alpha = alpha
[docs]
def validate(self, experiment: BaseExperiment) -> None:
"""Verify the experiment is a RegressionDiscontinuity instance."""
if not isinstance(experiment, RegressionDiscontinuity):
raise TypeError(
"McCraryDensityTest requires a RegressionDiscontinuity experiment."
)
[docs]
def run(
self,
experiment: BaseExperiment,
context: PipelineContext,
) -> CheckResult:
"""Test for manipulation of the running variable at the threshold."""
rd = experiment
threshold = rd.treatment_threshold # type: ignore[attr-defined]
running_var = rd.running_variable_name # type: ignore[attr-defined]
data = rd.data # type: ignore[attr-defined]
x = data[running_var].values
below = x[x < threshold]
above = x[x >= threshold]
n_below = len(below)
n_above = len(above)
n_total = n_below + n_above
if n_total == 0:
return CheckResult(
check_name="McCraryDensityTest",
passed=None,
text="No observations found around the threshold.",
)
prop_below = n_below / n_total
prop_above = n_above / n_total
se = np.sqrt(prop_below * prop_above / n_total)
z_stat = (prop_below - 0.5) / se if se > 0 else 0.0
p_value = 2 * (1 - norm.cdf(abs(z_stat)))
passed = bool(p_value > self.alpha)
table = pd.DataFrame(
[
{
"n_below": n_below,
"n_above": n_above,
"prop_below": prop_below,
"prop_above": prop_above,
"z_statistic": z_stat,
"p_value": p_value,
"threshold": threshold,
}
]
)
if passed:
text = (
f"McCrary density test: no evidence of manipulation at "
f"threshold {threshold} (z={z_stat:.3f}, p={p_value:.3f}). "
f"Observations below: {n_below}, above: {n_above}."
)
else:
text = (
f"McCrary density test: possible manipulation detected at "
f"threshold {threshold} (z={z_stat:.3f}, p={p_value:.3f}). "
f"Observations below: {n_below}, above: {n_above}."
)
return CheckResult(
check_name="McCraryDensityTest",
passed=passed,
table=table,
text=text,
metadata={"z_statistic": z_stat, "p_value": p_value},
)