Source code for transitionMatrix.statespaces.statespace

# encoding: utf-8

# (c) 2017-2026 Open Risk (https://www.openriskmanagement.com)
#
# TransitionMatrix is licensed under the Apache 2.0 license a copy of which is included
# in the source distribution of TransitionMatrix. This is notwithstanding any licenses of
# third-party software included in this distribution. You may not use this file except in
# compliance with the License.
#
# Unless required by applicable law or agreed to in writing, software distributed under
# the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
# either express or implied. See the License for the specific language governing permissions and
# limitations under the License.

""" StateSpace holds information about the stochastic system state space


"""


[docs] class StateSpace(object): """ The StateSpace object stores a state space structure as a List of tuples The first two elements of each tuple contain the index (base-0) and label of the state space respectively. Additional fields reserved for further characterisation [(index 1, label 1, optional, optional, ...), (index 2, label 2, optional, optional, ...)] .. Todo:: Implement Absorbing States .. Todo:: Implement in estimators """ def __init__(self, definition=None, sticky=False, absorbing=None, originator=None, full_name=None, cqs_mapping=None, transition_data=None): """ :param definition: List of tuples describing the state space :param sticky: Sticky = True, measurement data may contain repeat measurements of unchanged states. The default is False, only state changes are recorded :param absorbing: List of states that are absorbing :param originator: Name of entity defining the State Space (e.g. A Credit Rating Agency) :param full_name: Full (formal) name for the State Space (e.g. Concrete Credit Rating Scale) :param cqs_mapping: For credit ratings that have a mapping to the simplified EU CQS Scale :param transition_data: pandas dataframe with transition data """ if transition_data is not None: if definition is None and transition_data.empty: definition = [] # allow empty state space elif not transition_data.empty and not definition: definition = self._infer(transition_data) # construct from data elif not transition_data.empty and definition: pass # explicitly defined state space overrules implicit from data if definition: self.definition = definition else: definition = [] self.cardinality = len(definition) self.sticky = sticky self.absorbing = absorbing self.originator = originator self.full_name = full_name self.cqs_mapping = cqs_mapping def _infer(self, transition_data): """ Infer the state space from the data. This uses the State column by default and does an automated sorting by default. :return: Definition list .. warning: If the state space include ascii characters the order will be arbitrary. If it is important for presentation purposes it needs to be adjusted manually (see examples/python/data_cleaning_example.py) """ unique_states = sorted(transition_data['State'].unique()) definition = [] i = 0 for s in unique_states: state = (i, s) definition.append(state) i += 1 return definition
[docs] def get_states(self): """ Return a list with the set of states """ states = [] for s in self.definition: states.append(s[0]) return states
[docs] def get_state_labels(self): """ Return a list of state descriptions """ states = [] for state in self.definition: states.append(state[1]) return states
[docs] def generic(self, n=2): """ Create a generic state space of size n """ self.cardinality = n description = [] for s in range(n): description.append((str(s), str(s))) self.definition = description
[docs] def validate_dataset(self, dataset, labels=None): """ Check that a dataset column is consistent with a given state space. The following tests are implemented 1: all the states in dataset exist in the state space description (error otherwise) 2: all the states in state space exist in dataset (warning otherwise) 3: TODO successive states for the same entity are different, unless the Sticky flag is True :param dataset: the dataset to test :param labels: the labels of the state space :returns: a list of validation messages """ # Select the appropriate State label # This covers for case of relabeling or multiple columns with state data if labels is not None: state_label = labels['State'] else: state_label = 'State' # The unique states in the data set dataset_states = dataset[state_label].unique() state_list = dataset_states.tolist() state_list_stringified = [str(s) for s in state_list] # print(state_list_stringified) ds = set(state_list_stringified) # The expected states according to the state space expected_states = [] for state in self.definition: expected_states.append(state[1]) es = set(expected_states) if ds.difference(es): validation_outcome = ds.difference(es) print('Found ', ds) print('Expected', es) validation_message = "Dataset contains more states than expected. Check the following: " + str( validation_outcome) elif es.difference(ds): validation_outcome = es.difference(ds) validation_message = "Dataset contains fewer states than expected. Check the following: " + str( validation_outcome) else: validation_outcome = '' validation_message = "Dataset contains the expected states." return validation_message, validation_outcome
[docs] def describe(self): """ Print the State Space description """ print(80 * '=') if self.full_name: print('State Space: ', self.full_name) else: print('State Space') print(80 * '-') for state in self.definition: print("State Index and Label: ", state[0], ", ", state[1]) print(80 * '-')
[docs] def cqs_map(self, label): """ Produce a CQS for a given input label (the cqs_mapping dictionary must exist) """ mapped = None for x in self.definition: if x[1] == label: mapped = self.cqs_mapping[x[0]] if mapped: return mapped else: print("ERROR: Mapping failed")