Source code for swmmio.version_control.inp

from collections import OrderedDict

from swmmio.utils.text import get_inp_sections_details
from swmmio.version_control import utils as vc_utils
from swmmio.utils.dataframes import dataframe_from_bi, dataframe_from_inp
import swmmio
import pandas as pd
import os
from copy import deepcopy
pd.options.display.max_colwidth = 200

problem_sections = ['[CURVES]', '[TIMESERIES]', '[RDII]', '[HYDROGRAPHS]']



[docs]
class BuildInstructions(object):
    """
    similar to the INPSectionDiff object, this object contains information used to
    generate an inp based on 'serialized' (though human readable, inp-esque)
    build instructions files. This object is meant to neatly encapsulate things.

    self.instructions attribute contains a dictionary with keys of the headers
    that have changes i.e. build instructions w.r.t baseline model
    """

    def __init__(self, build_instr_file=None):

        # create a change object for each section that is different from baseline
        self.instructions = {}
        self.metadata = {}
        if build_instr_file:
            # read the instructions and create a dictionary of Change objects
            allheaders = get_inp_sections_details(build_instr_file)
            instructions = {}
            for section, _ in allheaders.items():
                change = INPSectionDiff(build_instr_file=build_instr_file, section=section)
                instructions.update({section: change})

            self.instructions = instructions

            # read the meta data
            self.metadata = vc_utils.read_meta_data(build_instr_file)

    def __add__(self, other):
        bi = BuildInstructions()
        for section, change_obj in self.instructions.items():
            if section in other.instructions:
                new_change = change_obj + other.instructions[section]
                bi.instructions[section] = new_change
            else:
                # section doesn't exist in other, maintain current instructions
                bi.instructions[section] = change_obj

        for section, change_obj in other.instructions.items():
            if section not in self.instructions:
                bi.instructions[section] = change_obj

        # combine the metadata
        # deepcopy so child structures aren't linked to original
        bi.metadata = deepcopy(self.metadata)
        otherbaseline = other.metadata['Parent Models']['Baseline']
        otheralternatives = other.metadata['Parent Models']['Alternatives']
        bi.metadata['Parent Models']['Baseline'].update(otherbaseline)
        bi.metadata['Parent Models']['Alternatives'].update(otheralternatives)
        bi.metadata['Log'].update(other.metadata['Log'])

        return bi

    def __radd__(self, other):
        # this is so we can call sum() on a list of build_instructions
        if other == 0:
            return self
        else:
            return self.__add__(other)


[docs]
    def save(self, dir, filename):
        """
        Save the current BuildInstructions instance to a file in a human-readable format.

        Parameters
        ----------
        dir : str
            The directory where the file will be saved. If the directory does not exist, it will be created.
        filename : str
            The name of the file to save the BuildInstructions instance to.

        Notes
        -----
        This method writes metadata and instructions to the specified file. The instructions are written
        by concatenating the removed, altered, and added changes for each section and then writing them
        to the file.
        """
        if not os.path.exists(dir):
            os.makedirs(dir)
        filepath = os.path.join(dir, filename)
        with open(filepath, 'w') as f:
            vc_utils.write_meta_data(f, self.metadata)
            for section, change_obj in self.instructions.items():
                section_df = pd.concat([change_obj.removed, change_obj.altered, change_obj.added])
                vc_utils.write_inp_section(f, allheaders=None, sectionheader=section,
                                           section_data=section_df, pad_top=False, na_fill='NaN')



[docs]
    def build(self, baseline_dir, target_path):
        """
        build a complete INP file with the build instructions committed to a
        baseline model.
        """
        basemodel = swmmio.Model(baseline_dir)
        allheaders = get_inp_sections_details(basemodel.inp.path)
        # new_inp = os.path.join(target_dir, 'model.inp')
        with open(target_path, 'w') as f:
            for section, _ in allheaders.items():

                # check if the section is not in problem_sections and there are changes
                # in self.instructions and commit changes to it from baseline accordingly
                if (section not in problem_sections
                        and allheaders[section]['columns'] != ['blob']
                        and section in self.instructions):

                    # df of baseline model section
                    basedf = dataframe_from_bi(basemodel.inp.path, section)
                    basedf[';'] = ';'

                    # grab the changes to
                    changes = self.instructions[section]

                    # remove elements that have alterations and or tagged for removal
                    remove_ids = changes.removed.index.union(changes.altered.index)
                    new_section = basedf.drop(remove_ids)

                    # add elements
                    # get a list of the dataframes that have changes (omit empty ones)
                    changes_dfs = list(filter(lambda x: not x.empty, [new_section, changes.altered, changes.added]))
                    if len(changes_dfs) > 0:
                        # write the section
                        vc_utils.write_inp_section(f, allheaders, section, pd.concat(changes_dfs))
                else:
                    # section is not well understood or is problematic, just blindly copy
                    new_section = dataframe_from_bi(basemodel.inp.path, section=section)
                    new_section[';'] = ';'
                    vc_utils.write_inp_section(f, allheaders, section, new_section)





[docs]
class INPSectionDiff(object):
    """
    This object represents the 'changes' of a given section of a INP file
    with respect to another INP. Three main dataframes are attributes:
    - **added**: includes elements that are new in model2 (compare to model1)
    - **removed**: elements that do not exist in model2, that were found to model1
    - **altered**: elements whose attributes have changes from model1 to model2

    :param model1: base model for diff
    :param model2: target model for diff
    :param section: section of the inp used for comparison
    :param build_instr_file: optionally instantiate an INPSectionDiff from an existing Build Instructions file

    >>> from swmmio.examples import jersey, jerzey
    >>> mydiff = INPSectionDiff(jersey, jerzey, section='JUNCTIONS')
    >>> print(mydiff)
    <BLANKLINE>
          InvertElev  MaxDepth  InitDepth  SurchargeDepth  PondedArea  ;  Comment                     Origin
    Name                                                                                                    
    1           17.0         0          0               0           0  ;  Removed  model_full_features_b.inp
    2           17.0         0          0               0           0  ;  Removed  model_full_features_b.inp
    3           16.5         0          0               0           0  ;  Removed  model_full_features_b.inp
    4           16.0         0          0               0           0  ;  Removed  model_full_features_b.inp
    5           15.0         0          0               0           0  ;  Removed  model_full_features_b.inp
    """

    def __init__(self, model1=None, model2=None, section='JUNCTIONS', build_instr_file=None):
        self.model1 = model1 if model1 else ""
        self.model2 = model2 if model2 else ""

        if model1 and model2:
            df1 = dataframe_from_inp(model1.inp.path, section)
            df2 = dataframe_from_inp(model2.inp.path, section)
            df1[';'] = ';'
            df2[';'] = ';'
            col_order = list(df2.columns) + ['Comment', 'Origin']
            m2_origin_string = os.path.basename(model2.inp.path).replace(' ', '-')

            # BUG -> this fails if a df1 or df2 is None i.e. if a section doesn't exist in one model
            added_ids = df2.index.difference(df1.index)
            removed_ids = df1.index.difference(df2.index)

            # find where elements were changed (but kept with same ID)
            common_ids = df1.index.difference(removed_ids)  # original - removed = in common
            # both dfs concatenated, with matched indices for each element
            full_set = pd.concat([df1.loc[common_ids], df2.loc[common_ids]], sort=False)
            # remove whitespace
            full_set = full_set.apply(lambda x: x.astype(str).str.strip() if x.dtype == "object" else x)
            # drop dupes on the set, all things that did not changed should have 1 row
            changes_with_dupes = full_set.drop_duplicates()
            # duplicate indicies are rows that have changes, isolate these
            # idx[idx.duplicated()].unique()
            changed_ids = changes_with_dupes.index[changes_with_dupes.index.duplicated()].unique()  # .get_duplicates()
            added = df2.loc[added_ids].copy()

            added['Comment'] = 'Added'  # from model {}'.format(model2.inp.path)
            added['Origin'] = m2_origin_string

            altered = df2.loc[changed_ids].copy()
            altered['Comment'] = 'Altered'  # in model {}'.format(model2.inp.path)
            altered['Origin'] = m2_origin_string

            removed = df1.loc[removed_ids].copy()
            removed['Comment'] = 'Removed'  # in model {}'.format(model2.inp.path)
            removed['Origin'] = m2_origin_string
            # removed = removed[col_order]

            self.old = df1
            self.new = df2
            self.added = added
            self.removed = removed
            self.altered = altered

        if build_instr_file:
            # if generating from a build instructions file, do this (more efficient)
            df = dataframe_from_bi(build_instr_file, section=section)
            self.added = df.loc[df['Comment'] == 'Added']
            self.removed = df.loc[df['Comment'] == 'Removed']
            self.altered = df.loc[df['Comment'] == 'Altered']

    def __add__(self, other):

        # this should be made more robust to catch conflicts
        change = INPSectionDiff()
        change.added = pd.concat([self.added, other.added], axis=0)
        change.removed = pd.concat([self.removed, other.removed], axis=0)
        change.altered = pd.concat([self.altered, other.altered], axis=0)

        return change

    def __str__(self):
        s = ''
        diff = pd.concat([self.removed, self.added, self.altered])
        diffs = '\n{}'.format(diff.head().to_string())
        return s+diffs




[docs]
class INPDiff(object):
    """
    Diff of all INP sections between two models

    :param model1: base model for diff
    :param model2: target model for diff

    >>> from swmmio.tests.data import MODEL_FULL_FEATURES_XY, MODEL_FULL_FEATURES_XY_B
    >>> mydiff = INPDiff(MODEL_FULL_FEATURES_XY, MODEL_FULL_FEATURES_XY_B)
    >>> print(mydiff.diffs['XSECTIONS'])
    <BLANKLINE>
             Shape  Geom1  Geom2  Geom3  Geom4  Barrels  XX  ;  Comment                     Origin
    Link                                                                                          
    1:4   CIRCULAR      1      0      0      0      1.0 NaN  ;  Removed  model_full_features_b.inp
    2:5   CIRCULAR      1      0      0      0      1.0 NaN  ;  Removed  model_full_features_b.inp
    3:4   CIRCULAR      1      0      0      0      1.0 NaN  ;  Removed  model_full_features_b.inp
    4:5   CIRCULAR      1      0      0      0      1.0 NaN  ;  Removed  model_full_features_b.inp
    5:J1  CIRCULAR      1      0      0      0      1.0 NaN  ;  Removed  model_full_features_b.inp
    """
    def __init__(self, model1=None, model2=None):
        m1 = model1
        m2 = model2
        if isinstance(m1, str):
            m1 = swmmio.Model(m1)
        if isinstance(m2, str):
            m2 = swmmio.Model(m2)
        self.m1 = m1
        self.m2 = m2
        self.diffs = OrderedDict()

        m1_sects = get_inp_sections_details(m1.inp.path)
        m2_sects = get_inp_sections_details(m2.inp.path)

        # get union of sections found, maintain order
        sects = list(m1_sects.keys()) + list(m2_sects.keys())
        seen = set()
        self.all_sections = [x for x in sects if not (x in seen or seen.add(x))]
        self.all_inp_objects = OrderedDict(m1_sects)
        self.all_inp_objects.update(m2_sects)
        for section in self.all_sections:
            if section not in problem_sections:
                # calculate the changes in the current section
                changes = INPSectionDiff(m1, m2, section)

                self.diffs[section] = changes

    def __str__(self):
        s = '--- {}\n+++ {}\n\n'.format(self.m1.inp.path, self.m2.inp.path)
        diffs = '\n\n'.join(['{}\n{}'.format(sect, d.__str__()) for sect, d in self.diffs.items()])
        return s+diffs




[docs]
def create_inp_build_instructions(inpA, inpB, path, filename, comments=''):
    """
    pass in two inp file paths and produce a spreadsheet showing the differences
    found in each of the INP sections. These differences should then be used
    whenever we need to rebuild this model from the baseline reference model.


    Note: this should be split into a func that creates a overall model "diff"
    that can then be written as a BI file or used programmatically
    """

    allsections_a = get_inp_sections_details(inpA)
    modela = swmmio.Model(inpA)
    modelb = swmmio.Model(inpB)

    # create build insructions folder
    if not os.path.exists(path):
        os.makedirs(path)
    filepath = os.path.join(path, filename) + '.txt'

    problem_sections = ['TITLE', 'CURVES', 'TIMESERIES', 'RDII', 'HYDROGRAPHS']
    with open(filepath, 'w') as newf:

        # write meta data
        metadata = {
            # 'Baseline Model':modela.inp.path,
            # 'ID':filename,
            'Parent Models': {
                'Baseline': {inpA: vc_utils.modification_date(inpA)},
                'Alternatives': {inpB: vc_utils.modification_date(inpB)}
            },
            'Log': {filename: comments}
        }
        # print metadata
        vc_utils.write_meta_data(newf, metadata)
        for section, _ in allsections_a.items():
            if section not in problem_sections:
                # calculate the changes in the current section
                changes = INPSectionDiff(modela, modelb, section)

                # get a list of the dataframes that have changes
                changes_dfs = list(filter(lambda x: not x.empty, [changes.removed, changes.added, changes.altered]))
                
                # if no changes, don't write the section
                if len(changes_dfs) > 0:
                    data = pd.concat(changes_dfs, axis=0, sort=False)
                    vc_utils.write_inp_section(newf, allsections_a, section, data, pad_top=False,
                                            na_fill='NaN')  # na fill fixes SNOWPACK blanks spaces issue

    return BuildInstructions(filepath)




[docs]
def merge_models(inp1, inp2, target='merged_model.inp'):
    """
    Merge two separate swmm models into one model. This creates a diff, ignores
    removed sections, and uses inp1 settings where conflicts exist (altered sections in diff)
    :param inp1: swmmio.Model.inp object to be combined with inp2
    :param inp2: swmmio.Model.inp object to be combined with inp1
    :param target: path of new model
    :return: path to target
    """
    # model object to store resulting merged model
    m3 = swmmio.Model(inp1)

    inp_diff = INPDiff(inp1, inp2)
    with open(target, 'w') as newf:
        for section, _ in inp_diff.all_inp_objects.items():
            # don't consider the "removed" parts of the diff
            # print('{}: {}'.format(section,inp_diff.all_inp_objects[section]['columns']))
            # check if the section is not in problem_sections and there are changes
            # in self.instructions and commit changes to it from baseline accordingly
            col_order = []
            if (section not in problem_sections
                    and inp_diff.all_inp_objects[section]['columns'] != ['blob']
                    and section in inp_diff.diffs):

                # df of baseline model section
                basedf = dataframe_from_inp(m3.inp.path, section)
                basedf[';'] = ';'
                col_order = basedf.columns
                # grab the changes to
                changes = inp_diff.diffs[section]

                # remove elements that have alterations keep ones tagged for removal
                # (unchanged, but not present in m2)
                remove_ids = changes.altered.index
                new_section = basedf.drop(remove_ids)

                # add elements
                new_section = pd.concat([new_section, changes.altered, changes.added], axis=0, sort=False)
            else:
                # section is not well understood or is problematic, just blindly copy
                new_section = dataframe_from_inp(m3.inp.path, section)
                new_section[';'] = ';'
                # print ('dealing with confusing section: {}\n{}'.format(section, new_section))

            # print(new_section.head())
            # write the section
            new_section = new_section[col_order]
            new_section[';'] = ';'
            vc_utils.write_inp_section(newf, inp_diff.all_inp_objects, section, new_section, pad_top=True)

    return target