Source code for slitflow.tbl.table

import numpy as np
import pandas as pd

from ..data import Data
from .. import setindex


[docs] class Table(Data): """Table Data class using pandas.DataFrame saved as CSV files. See also :class:`~slitflow.data.Data` for properties and methods. Concrete subclass is mainly in :mod:`slitflow.tbl`, :mod:`slitflow.trj` and :mod:`slitflow.loc`. """ EXT = ".csv" def __init__(self, info_path=None): super().__init__(info_path)
[docs] def load_data(self, path): """Load CSV file as :class:`pandas.DataFrame`. """ return pd.read_csv(path, dtype=self.info.get_column_type())
[docs] def save_data(self, df, path): """Save :class:`pandas.DataFrame` data into CSV file. """ df = df.set_axis(self.info.get_column_name("all"), axis=1) df.to_csv(path, index=False)
[docs] def split_data(self): """Split data table according to info.index. """ df_index = self.info.index index_cols = [col for col in df_index.columns if col not in ["_file", "_split", "_dest", "_keep", "_load"]] self.data = [df for df in self.data if df is not None] if len(self.data) == 0: # make None list dest_abs = df_index["_dest"].abs().values dest_abs = dest_abs[dest_abs != 0] self.data = [None] * len(np.sort(np.unique(dest_abs))) # delete temporary rows # find "_file", "_split", "_keep" are all nan and delete its rows df_index = df_index.dropna(subset=["_file", "_split", "_keep"], how="all") self.info.index = df_index.reset_index(drop=True) else: df_data = pd.concat(self.data) if len(index_cols) == 0: df_dest = df_index[["_dest"]].drop_duplicates() df_dest['_key'] = 1 df_data['_key'] = 1 df = pd.merge( df_dest, df_data, on='_key').drop('_key', axis=1) else: df = pd.merge(df_index[index_cols + ["_dest"]], df_data, on=index_cols, how="left") df = df[df["_dest"] != 0] df['_dest_abs'] = df['_dest'].abs() df = df.sort_values(by=['_dest_abs'] + index_cols).reset_index(drop=True) df.drop(columns=['_dest_abs'], inplace=True) self.data = [None if dest < 0 else group.drop(columns=["_dest"]) for dest, group in df.groupby("_dest", sort=False)]
[docs] def set_index(self): """How to get info.index. Default function for Table is :func:`slitflow.setindex.from_data`. """ setindex.from_data(self)
[docs] def merge_different_index(self, req_no): """Merge the index table to the split result data. This function is used in :meth:`~slitflow.data.Data.post_run` to append index information into the result data table. For example, if :meth:`process` does not return any ``img_no`` because required data is :class:`numpy.ndarray` that do not have ``img_no`` information, we have to add ``img_no`` from :attr:`~slitflow.info.Info.index`. """ df_index = self.reqs[req_no].info.file_index() if len(df_index) == 0: return dfs = [] for i, (_, row) in enumerate(df_index.groupby(["_file", "_split"])): row_index = row.drop_duplicates().reset_index(drop=True) for col in "_file", "_split", "_dest", "_keep": if col in row_index.columns: row_index.drop(col, axis=1, inplace=True) df = self.data[i] df_mrg = pd.concat([row_index, df], axis=1) dfs.append(df_mrg.fillna(method="ffill").astype( self.info.get_column_type())) self.data = dfs
[docs] def merge_overlap_index(self, req_no, on_col_name): """Merge the index table to the split result data. This function is used in :meth:`~slitflow.data.Data.post_run` to append index information into the result data table. This function merge index tables that have overlapping columns. """ df_index = self.reqs[req_no].info.file_index() dfs = [] for i, (_, row) in enumerate(df_index.groupby(["_file", "_split"])): row_index = row.drop_duplicates()\ .drop(["_file", "_split"], axis=1).reset_index(drop=True) df = self.data[i] df_mrg = row_index.merge(df, on=on_col_name) dfs.append(df_mrg.fillna(method="ffill").astype( self.info.get_column_type()).reset_index(drop=True)) self.data = dfs