"""
This process module includes classes that change table structure such as
rows and columns.
"""
import numpy as np
import pandas as pd
from .table import Table
[docs]
class MaskFromParam(Table):
"""Create a mask column based on explicit param values.
Args:
reqs[0] (Table): The image to select from.
param["index"] (list of tuple): A list of tuples that contains the
indices to select. The tuple should be (index of depth=1, index of
depth=2, ...). If index is None, all indices of the depth are
selected.
param["mask_col"] (str, optional): The name of the mask column.
Defaults to "mask".
param["split_depth"] (int): The file split depth number.
Returns:
Table: A table containing the mask column.
"""
[docs]
def set_info(self, param={}):
"""Copy info from reqs[0] and add param."""
self.info.copy_req(0)
self.info.add_column(
0, param.get("mask_col", "mask"), "int", "num", "Mask")
self.info.add_param(
"index_cols", self.info.get_column_name("index"),
"list", "Index column names")
self.info.add_param(
"mask_col", param.get("mask_col", "mask"), "str",
"Mask column name")
self.info.add_param(
"index", param.get("index"), "list", "Indices to select")
self.info.set_split_depth(param["split_depth"])
[docs]
@staticmethod
def process(reqs, param):
"""Create a mask column based on explicit param values.
If the element of the tuple is one, a comma must be added.
Args:
reqs[0] (Table): The image to select from.
param["index"] (list of tuple): A list of tuples that contains the
indices to select. The tuple should be (index of depth=1, index
of depth=2, ...). If index is None, all indices of the depth
are selected.
param["index_cols"] (list of str): The index column names.
param["mask_col"] (str, optional): The name of the mask column.
Defaults to "mask".
Returns:
pandas.DataFrame: A table containing the mask column.
"""
df = reqs[0].copy()
index_cols = param.get("index_cols")
mask_col = param.get("mask_col", "mask")
indexes_list = param.get("index", [])
sel_ary = np.empty((0, len(index_cols)), int)
idx_ary = df[index_cols].to_numpy()
for indexes in indexes_list:
depth_ary = idx_ary.copy()
for depth, idx in enumerate(indexes):
if type(idx) is int:
depth_ary = depth_ary[depth_ary[:, depth] == idx, :]
elif idx is None:
pass
elif type(idx) is list:
# range is not supported for exporting params as json
depth_ary_buf = np.empty((0, len(index_cols)), int)
for sub_idx in idx:
sub_depth_ary = depth_ary.copy()
sub_depth_ary = \
sub_depth_ary[sub_depth_ary[:, depth]
== sub_idx, :]
depth_ary_buf = np.vstack(
[depth_ary_buf, sub_depth_ary])
depth_ary = depth_ary_buf
sel_ary = np.vstack([sel_ary, depth_ary])
mask = pd.DataFrame(sel_ary, columns=index_cols)
mask = mask.drop_duplicates()
mask[mask_col] = 1
df = pd.merge(df, mask, on=index_cols, how="left")
df[mask_col] = df[mask_col].fillna(0).astype("int")
return df
[docs]
class SelectParam(MaskFromParam):
"""Select rows using explicit param values.
This class creates a mask column based on explicit param values using
:class:`slitflow.tbl.filter.MaskFromParam` and selects rows using the mask
column.
Args:
reqs[0] (Table): Table for selection.
param["index"] (list of tuple): List of tuples of index numbers to
select. Each tuple should be (index of depth=1, index of depth=2,
...). If index is None, all indices of the depth is selected.
param["mask_col"] (str, optional): Column name of the mask column.
Defaults to "mask".
param["split_depth"] (int): File split depth number.
Returns:
Table: Selected Table.
"""
_temp_index = []
def __init__(self, info_path=None):
super().__init__(info_path)
SelectParam._temp_index = []
[docs]
def set_info(self, param={}):
"""Copy info from reqs[0] and add param."""
MaskFromParam.set_info(self, param)
self.info.delete_column(self.info.get_param_value("mask_col"))
[docs]
@staticmethod
def process(reqs, param):
"""Select rows using explicit param values.
Args:
reqs[0] (pandas.DataFrame): Table for selection.
param["index"] (list of tuple): List of tuple of index numbers to
select. The tuple should be (index of depth=1, index of
depth=2, ...). If index is None, all indices of the depth is
selected.
param["mask_col"] (str, optional): Column name of the mask column.
Defaults to "mask".
param["index_cols"] (list of str): The index column names.
Returns:
Table: Selected Table.
"""
df_mask = MaskFromParam.process(reqs, param)
df_sel = df_mask[df_mask[param["mask_col"]] == 1]
df_sel.reset_index(drop=True, inplace=True)
# Save the index (multi-process is not available)
SelectParam._temp_index.append(
df_mask[param["index_cols"] + [param["mask_col"]]])
df_sel = df_sel.drop(columns=param["mask_col"])
return df_sel
[docs]
def post_run(self):
"""Remove empty data"""
skipped_data = []
for data in self.data:
if data.shape[0] > 0:
skipped_data.append(data)
self.data = skipped_data
[docs]
def set_index(self):
"""Set the index based on the saved temporal index.
File numbers of the _temp_index are added before selecting the index
not to skip the numbers that is not selected during saving.
"""
self.info.index = pd.concat(SelectParam._temp_index)
self.info.set_index_file_no()
mask_col = self.info.get_param_value("mask_col")
self.info.index = self.info.index[self.info.index[mask_col] > 0]
self.info.index = self.info.index.drop(
columns=mask_col).drop_duplicates()