Source code for transitionMatrix.estimators.simple_estimator

# -*- coding: utf-8 -*-

# (c) 2017-2026 Open Risk, all rights reserved
#
# TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
# in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
# third-party software included in this distribution. You may not use this file except in
# compliance with the License.
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
# either express or implied. See the License for the specific language governing permissions and
# limitations under the License.

from __future__ import print_function
import numpy as np

from transitionMatrix.estimators import BaseEstimator
import statsmodels.stats.proportion as st



[docs]
class SimpleEstimator(BaseEstimator):
    """
    Class for implementing a simple estimator suitable for single period transitions

    This is useful for testing, getting a first feel about the transition landscape.

    """

    def __init__(self, states=None, ci=None):
        BaseEstimator.__init__(self)

        if states is not None:
            self.states = states
        if ci is not None:
            assert (ci['method'] in ['goodman', 'sison-glaz', 'binomial'])
            self.ci_method = ci['method']
            self.ci_alpha = ci['alpha']


[docs]
    def fit(self, data):
        """
        Parameters
        ----------
        data : array-like
            The data to use for the estimation

        Returns
        -------
        matrix : estimated transition matrix
        confint_lower: lower confidence interval
        confint_upper: upper confidence interval

        Notes
        ------

        * loop over data rows
        * expected format is (id, state_in, state_out)
        * calculate population count N^i_k per state i
        * calculate migrations count N^{ij}_{kl} from i to j
        * calculate transition matrix as ratio T^{ij}_{kl} = N^{ij}_{kl} / N^i_k

        """

        # In the simple estimator all events are part of the same cohort
        state_count = self.states.cardinality
        state_list = self.states.get_states()

        # create storage for counts and transitions
        tm_count = np.ndarray(state_count)
        tmn_count = np.ndarray((state_count, state_count))
        tm_count.fill(0.0)
        tmn_count.fill(0.0)

        i = 0
        for row in data.itertuples(index=False):
            # state_in = state_list.index(row[2])
            # state_out = state_list.index(row[3])
            state_in = row[2]
            state_out = row[3]
            tm_count[state_in] += 1
            tmn_count[state_in, state_out] += 1
            i += 1

        self.counts = int(tm_count.sum())

        if self.ci_method:
            '''Confidence intervals for multinomial proportions. See the statsmodels URL
            http://www.statsmodels.org/devel/_modules/statsmodels/stats/proportion.html
    
            Parameters
            ----------
            counts : array_like of int, 1-D
                Number of observations in each category.
            alpha : float in (0, 1), optional
                Significance level, defaults to 0.05.
            method : {'goodman', 'sison-glaz'}, optional
                Method to use to compute the confidence intervals; available methods
                are:
    
                 - `goodman`: based on a chi-squared approximation, valid if all
                   values in `counts` are greater or equal to 5 [2]_
                 - `sison-glaz`: less conservative than `goodman`, but only valid if
                   `counts` has 7 or more categories (``len(counts) >= 7``) [3]_
    
            Returns
            -------
            confint : ndarray, 2-D
                Array of [lower, upper] confidence levels for each category, such that
                overall coverage is (approximately) `1-alpha`.
            '''

            confint_lower = np.ndarray((state_count, state_count, 1))
            confint_upper = np.ndarray((state_count, state_count, 1))
            for s1 in range(state_count):
                intervals = st.multinomial_proportions_confint(tmn_count[s1, :], alpha=self.ci_alpha, method=self.ci_method)
                for s2 in range(state_count):
                    confint_lower[s1, s2, 0] = intervals[s2][0]
                    confint_upper[s1, s2, 0] = intervals[s2][1]
            self.confint_lower = confint_lower
            self.confint_upper = confint_upper

        # Normalization of counts to produce family of probability matrices
        for s1 in range(state_count):
            for s2 in range(state_count):
                if tm_count[s1] > 0:
                    tmn_count[(s1, s2)] = tmn_count[(s1, s2)] / tm_count[s1]

        # We store and return the matrix in matrix set (but there is only one instance)
        self.matrix_set.append(tmn_count)

        return self.matrix_set