Source code for slitflow.tbl.table

import numpy as np
import pandas as pd

from ..data import Data
from .. import setindex



[docs]
class Table(Data):
    """Table Data class using pandas.DataFrame saved as CSV files.

    See also :class:`~slitflow.data.Data` for properties and methods.
    Concrete subclass is mainly in :mod:`slitflow.tbl`,
    :mod:`slitflow.trj` and :mod:`slitflow.loc`.

    """
    EXT = ".csv"

    def __init__(self, info_path=None):
        super().__init__(info_path)


[docs]
    def load_data(self, path):
        """Load CSV file as :class:`pandas.DataFrame`.
        """
        return pd.read_csv(path, dtype=self.info.get_column_type())



[docs]
    def save_data(self, df, path):
        """Save :class:`pandas.DataFrame` data into CSV file.
        """
        df = df.set_axis(self.info.get_column_name("all"), axis=1)
        df.to_csv(path, index=False)



[docs]
    def split_data(self):
        """Split data table according to info.index.
        """
        df_index = self.info.index
        index_cols = [col for col in df_index.columns if col not in
                      ["_file", "_split", "_dest", "_keep", "_load"]]
        self.data = [df for df in self.data if df is not None]
        if len(self.data) == 0:
            # make None list
            dest_abs = df_index["_dest"].abs().values
            dest_abs = dest_abs[dest_abs != 0]
            self.data = [None] * len(np.sort(np.unique(dest_abs)))
            # delete temporary rows
            # find "_file", "_split", "_keep" are all nan and delete its rows
            df_index = df_index.dropna(subset=["_file", "_split", "_keep"],
                                       how="all")
            self.info.index = df_index.reset_index(drop=True)
        else:
            df_data = pd.concat(self.data)
            if len(index_cols) == 0:
                df_dest = df_index[["_dest"]].drop_duplicates()
                df_dest['_key'] = 1
                df_data['_key'] = 1
                df = pd.merge(
                    df_dest, df_data, on='_key').drop('_key', axis=1)
            else:
                df = pd.merge(df_index[index_cols + ["_dest"]],
                              df_data, on=index_cols, how="left")
            df = df[df["_dest"] != 0]
            df['_dest_abs'] = df['_dest'].abs()

            df = df.sort_values(by=['_dest_abs'] +
                                index_cols).reset_index(drop=True)
            df.drop(columns=['_dest_abs'], inplace=True)
            self.data = [None if dest < 0 else group.drop(columns=["_dest"])
                         for dest, group in df.groupby("_dest", sort=False)]



[docs]
    def set_index(self):
        """How to get info.index.

        Default function for Table is
        :func:`slitflow.setindex.from_data`.

        """
        setindex.from_data(self)





[docs]
def merge_different_index(self, req_no):
    """Merge the index table to the split result data.

    This function is used in :meth:`~slitflow.data.Data.post_run`
    to append index information into the result data table.
    For example, if :meth:`process` does not return any ``img_no`` because
    required data is :class:`numpy.ndarray` that do not have ``img_no``
    information, we have to add ``img_no`` from
    :attr:`~slitflow.info.Info.index`.

    """
    df_index = self.reqs[req_no].info.file_index()
    if len(df_index) == 0:
        return
    dfs = []
    for i, (_, row) in enumerate(df_index.groupby(["_file", "_split"])):
        row_index = row.drop_duplicates().reset_index(drop=True)
        for col in "_file", "_split", "_dest", "_keep":
            if col in row_index.columns:
                row_index.drop(col, axis=1, inplace=True)
        df = self.data[i]
        df_mrg = pd.concat([row_index, df], axis=1)
        dfs.append(df_mrg.fillna(method="ffill").astype(
            self.info.get_column_type()))
    self.data = dfs




[docs]
def merge_overlap_index(self, req_no, on_col_name):
    """Merge the index table to the split result data.

    This function is used in :meth:`~slitflow.data.Data.post_run`
    to append index information into the result data table.
    This function merge index tables that have overlapping columns.

    """
    df_index = self.reqs[req_no].info.file_index()
    dfs = []
    for i, (_, row) in enumerate(df_index.groupby(["_file", "_split"])):
        row_index = row.drop_duplicates()\
            .drop(["_file", "_split"], axis=1).reset_index(drop=True)
        df = self.data[i]
        df_mrg = row_index.merge(df, on=on_col_name)
        dfs.append(df_mrg.fillna(method="ffill").astype(
            self.info.get_column_type()).reset_index(drop=True))
    self.data = dfs