Source code for transitionMatrix.statespaces.statespace

# encoding: utf-8

# (c) 2017-2026 Open Risk (https://www.openriskmanagement.com)
#
# TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
# in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
# third-party software included in this distribution. You may not use this file except in
# compliance with the License.
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
# either express or implied. See the License for the specific language governing permissions and
# limitations under the License.

""" StateSpace holds information about the stochastic system state space


"""



[docs]
class StateSpace(object):
    """  The StateSpace object stores a state space structure as a List of tuples
    The first two elements of each tuple contain the index (base-0) and label of the
    state space respectively.

    Additional fields reserved for further characterisation

    [(index 1, label 1, optional, optional, ...),
     (index 2, label 2, optional, optional, ...)]

    .. Todo:: Implement Absorbing States

    .. Todo:: Implement in estimators

    """

    def __init__(self, definition=None, sticky=False, absorbing=None, originator=None, full_name=None, cqs_mapping=None,
                 transition_data=None):
        """

        :param definition: List of tuples describing the state space
        :param sticky: Sticky = True, measurement data may contain repeat measurements of unchanged states. The default is False, only state changes are recorded
        :param absorbing: List of states that are absorbing
        :param originator: Name of entity defining the State Space (e.g. A Credit Rating Agency)
        :param full_name: Full (formal) name for the State Space (e.g. Concrete Credit Rating Scale)
        :param cqs_mapping: For credit ratings that have a mapping to the simplified EU CQS Scale
        :param transition_data: pandas dataframe with transition data

        """

        if transition_data is not None:
            if definition is None and transition_data.empty:
                definition = []  # allow empty state space
            elif not transition_data.empty and not definition:
                definition = self._infer(transition_data)  # construct from data
            elif not transition_data.empty and definition:
                pass  # explicitly defined state space overrules implicit from data

        if definition:
            self.definition = definition
        else:
            definition = []

        self.cardinality = len(definition)
        self.sticky = sticky
        self.absorbing = absorbing
        self.originator = originator
        self.full_name = full_name
        self.cqs_mapping = cqs_mapping

    def _infer(self, transition_data):
        """ Infer the state space from the data. This uses the State column by default and does an automated sorting by default.

        :return: Definition list

        .. warning: If the state space include ascii characters the order will be arbitrary. If it is important for presentation purposes it needs to be adjusted manually (see examples/python/data_cleaning_example.py)
        """
        unique_states = sorted(transition_data['State'].unique())

        definition = []
        i = 0
        for s in unique_states:
            state = (i, s)
            definition.append(state)
            i += 1
        return definition


[docs]
    def get_states(self):
        """ Return a list with the set of states

        """
        states = []
        for s in self.definition:
            states.append(s[0])
        return states



[docs]
    def get_state_labels(self):
        """ Return a list of state descriptions

        """
        states = []
        for state in self.definition:
            states.append(state[1])
        return states



[docs]
    def generic(self, n=2):
        """ Create a generic state space of size n

        """
        self.cardinality = n
        description = []
        for s in range(n):
            description.append((str(s), str(s)))
        self.definition = description



[docs]
    def validate_dataset(self, dataset, labels=None):
        """  Check that a dataset column is consistent with a given state space. The following tests are implemented

        1: all the states in dataset exist in the state space description (error otherwise)
        2: all the states in state space exist in dataset (warning otherwise)
        3: TODO successive states for the same entity are different, unless the Sticky flag is True

        :param dataset: the dataset to test
        :param labels: the labels of the state space

        :returns: a list of validation messages

        """

        # Select the appropriate State label
        # This covers for case of relabeling or multiple columns with state data
        if labels is not None:
            state_label = labels['State']
        else:
            state_label = 'State'

        # The unique states in the data set
        dataset_states = dataset[state_label].unique()
        state_list = dataset_states.tolist()
        state_list_stringified = [str(s) for s in state_list]
        # print(state_list_stringified)
        ds = set(state_list_stringified)
        # The expected states according to the state space
        expected_states = []
        for state in self.definition:
            expected_states.append(state[1])
        es = set(expected_states)

        if ds.difference(es):
            validation_outcome = ds.difference(es)
            print('Found ', ds)
            print('Expected', es)
            validation_message = "Dataset contains more states than expected. Check the following: " + str(
                validation_outcome)
        elif es.difference(ds):
            validation_outcome = es.difference(ds)
            validation_message = "Dataset contains fewer states than expected. Check the following: " + str(
                validation_outcome)
        else:
            validation_outcome = ''
            validation_message = "Dataset contains the expected states."

        return validation_message, validation_outcome



[docs]
    def describe(self):
        """
        Print the State Space description

        """
        print(80 * '=')
        if self.full_name:
            print('State Space: ', self.full_name)
        else:
            print('State Space')
        print(80 * '-')
        for state in self.definition:
            print("State Index and Label: ", state[0], ", ", state[1])
        print(80 * '-')



[docs]
    def cqs_map(self, label):
        """
        Produce a CQS for a given input label (the cqs_mapping dictionary must exist)

        """
        mapped = None
        for x in self.definition:
            if x[1] == label:
                mapped = self.cqs_mapping[x[0]]
        if mapped:
            return mapped
        else:
            print("ERROR: Mapping failed")