# -*- coding: utf-8 -*-
# (c) 2017-2026 Open Risk, all rights reserved
#
# TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
# in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
# third-party software included in this distribution. You may not use this file except in
# compliance with the License.
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
# either express or implied. See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
from transitionMatrix.estimators import BaseEstimator
import statsmodels.stats.proportion as st
[docs]
class SimpleEstimator(BaseEstimator):
"""
Class for implementing a simple estimator suitable for single period transitions
This is useful for testing, getting a first feel about the transition landscape.
"""
def __init__(self, states=None, ci=None):
BaseEstimator.__init__(self)
if states is not None:
self.states = states
if ci is not None:
assert (ci['method'] in ['goodman', 'sison-glaz', 'binomial'])
self.ci_method = ci['method']
self.ci_alpha = ci['alpha']
[docs]
def fit(self, data):
"""
Parameters
----------
data : array-like
The data to use for the estimation
Returns
-------
matrix : estimated transition matrix
confint_lower: lower confidence interval
confint_upper: upper confidence interval
Notes
------
* loop over data rows
* expected format is (id, state_in, state_out)
* calculate population count N^i_k per state i
* calculate migrations count N^{ij}_{kl} from i to j
* calculate transition matrix as ratio T^{ij}_{kl} = N^{ij}_{kl} / N^i_k
"""
# In the simple estimator all events are part of the same cohort
state_count = self.states.cardinality
state_list = self.states.get_states()
# create storage for counts and transitions
tm_count = np.ndarray(state_count)
tmn_count = np.ndarray((state_count, state_count))
tm_count.fill(0.0)
tmn_count.fill(0.0)
i = 0
for row in data.itertuples(index=False):
# state_in = state_list.index(row[2])
# state_out = state_list.index(row[3])
state_in = row[2]
state_out = row[3]
tm_count[state_in] += 1
tmn_count[state_in, state_out] += 1
i += 1
self.counts = int(tm_count.sum())
if self.ci_method:
'''Confidence intervals for multinomial proportions. See the statsmodels URL
http://www.statsmodels.org/devel/_modules/statsmodels/stats/proportion.html
Parameters
----------
counts : array_like of int, 1-D
Number of observations in each category.
alpha : float in (0, 1), optional
Significance level, defaults to 0.05.
method : {'goodman', 'sison-glaz'}, optional
Method to use to compute the confidence intervals; available methods
are:
- `goodman`: based on a chi-squared approximation, valid if all
values in `counts` are greater or equal to 5 [2]_
- `sison-glaz`: less conservative than `goodman`, but only valid if
`counts` has 7 or more categories (``len(counts) >= 7``) [3]_
Returns
-------
confint : ndarray, 2-D
Array of [lower, upper] confidence levels for each category, such that
overall coverage is (approximately) `1-alpha`.
'''
confint_lower = np.ndarray((state_count, state_count, 1))
confint_upper = np.ndarray((state_count, state_count, 1))
for s1 in range(state_count):
intervals = st.multinomial_proportions_confint(tmn_count[s1, :], alpha=self.ci_alpha, method=self.ci_method)
for s2 in range(state_count):
confint_lower[s1, s2, 0] = intervals[s2][0]
confint_upper[s1, s2, 0] = intervals[s2][1]
self.confint_lower = confint_lower
self.confint_upper = confint_upper
# Normalization of counts to produce family of probability matrices
for s1 in range(state_count):
for s2 in range(state_count):
if tm_count[s1] > 0:
tmn_count[(s1, s2)] = tmn_count[(s1, s2)] / tm_count[s1]
# We store and return the matrix in matrix set (but there is only one instance)
self.matrix_set.append(tmn_count)
return self.matrix_set