python-dte-adjustment/dte_adj/simple.py at b4c441cb8f348cb5bf7e05b6d0cceaffdf51453c · CyberAgentAILab/python-dte-adjustment · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
from __future__ import annotations

import numpy as np
from dte_adj.stratified import (
    SimpleStratifiedDistributionEstimator,
    AdjustedStratifiedDistributionEstimator,
)
from dte_adj.util import ArrayLike, _convert_to_ndarray


class SimpleDistributionEstimator(SimpleStratifiedDistributionEstimator):
    """
    A class for computing the empirical distribution function and distributional treatment effects
    using simple (unadjusted) estimation methods.

    This estimator computes Distribution Treatment Effects (DTE), Probability Treatment Effects (PTE),
    and Quantile Treatment Effects (QTE) without using machine learning models for adjustment.
    It provides a baseline approach suitable when treatment assignment is random or when
    covariate adjustment is not needed.

    Example:
        .. code-block:: python

            import numpy as np
            from dte_adj import SimpleDistributionEstimator

            # Generate sample data
            X = np.random.randn(1000, 5)
            D = np.random.binomial(1, 0.5, 1000)  # Random treatment
            Y = X[:, 0] + 2 * D + np.random.randn(1000)

            # Fit simple estimator
            estimator = SimpleDistributionEstimator()
            estimator.fit(X, D, Y)

            # Compute treatment effects
            locations = np.linspace(Y.min(), Y.max(), 20)
            dte, lower, upper = estimator.predict_dte(1, 0, locations)
            pte, pte_lower, pte_upper = estimator.predict_pte(1, 0, locations)
    """

    def __init__(self):
        """Initializes the SimpleDistributionEstimator.

        Returns:
            SimpleDistributionEstimator: An instance of the estimator.
        """
        super().__init__()

    def fit(
        self, covariates: ArrayLike, treatment_arms: ArrayLike, outcomes: ArrayLike
    ) -> SimpleDistributionEstimator:
        """
        Set parameters.

        Args:
            covariates: Pre-treatment covariates.
            treatment_arms: The index of the treatment arm.
            outcomes: Scalar-valued observed outcome.

        Returns:
            SimpleDistributionEstimator: The fitted estimator.
        """
        covariates = _convert_to_ndarray(covariates)
        treatment_arms = _convert_to_ndarray(treatment_arms)
        outcomes = _convert_to_ndarray(outcomes)

        if covariates.shape[0] != treatment_arms.shape[0]:
            raise ValueError("The shape of covariates and treatment_arm should be same")

        if covariates.shape[0] != outcomes.shape[0]:
            raise ValueError("The shape of covariates and outcome should be same")

        self.covariates = covariates
        self.treatment_arms = treatment_arms
        self.outcomes = outcomes
        self.strata = np.zeros(len(self.covariates))

        return self


class AdjustedDistributionEstimator(AdjustedStratifiedDistributionEstimator):
    """
    A class for computing distribution treatment effects using machine learning adjustment.

    This estimator uses cross-fitting with ML models to adjust for confounding when computing
    Distribution Treatment Effects (DTE), Probability Treatment Effects (PTE), and
    Quantile Treatment Effects (QTE). It provides more precise estimates when treatment
    assignment depends on observed covariates.

    Example:
        .. code-block:: python

            import numpy as np
            from sklearn.ensemble import RandomForestClassifier
            from dte_adj import AdjustedDistributionEstimator

            # Generate confounded data
            X = np.random.randn(1000, 5)
            treatment_prob = 1 / (1 + np.exp(-(X[:, 0] + X[:, 1])))
            D = np.random.binomial(1, treatment_prob, 1000)
            Y = X.sum(axis=1) + 2 * D + np.random.randn(1000)

            # Fit adjusted estimator
            base_model = RandomForestClassifier(n_estimators=100)
            estimator = AdjustedDistributionEstimator(base_model, folds=3)
            estimator.fit(X, D, Y)

            # Compute adjusted treatment effects
            locations = np.linspace(Y.min(), Y.max(), 20)
            dte, lower, upper = estimator.predict_dte(1, 0, locations, variance_type="moment")
    """

    def fit(
        self, covariates: ArrayLike, treatment_arms: ArrayLike, outcomes: ArrayLike
    ) -> AdjustedDistributionEstimator:
        """
        Set parameters.

        Args:
            covariates: Pre-treatment covariates.
            treatment_arms: The index of the treatment arm.
            outcomes: Scalar-valued observed outcome.

        Returns:
            AdjustedDistributionEstimator: The fitted estimator.
        """
        covariates = _convert_to_ndarray(covariates)
        treatment_arms = _convert_to_ndarray(treatment_arms)
        outcomes = _convert_to_ndarray(outcomes)

        if covariates.shape[0] != treatment_arms.shape[0]:
            raise ValueError("The shape of covariates and treatment_arm should be same")

        if covariates.shape[0] != outcomes.shape[0]:
            raise ValueError("The shape of covariates and outcome should be same")

        self.covariates = covariates
        self.treatment_arms = treatment_arms
        self.outcomes = outcomes
        self.strata = np.zeros(len(self.covariates))

        return self