from __future__ import annotations from typing import Dict import polars as pl from tools.base import BaseTool class UniqueTool(BaseTool): def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]: df = inputs.get("Input", pl.DataFrame()) if self.config is None or df.is_empty(): return {"Unique": df, "Duplicate": pl.DataFrame()} key_fields = [ f.attrib["name"] for f in self.config.findall("UniqueFields/Field") if f.attrib["name"] in df.columns ] if not key_fields: key_fields = df.columns df = df.with_row_index("__row_idx__") first_idx_list = ( df.group_by(key_fields, maintain_order=True) .agg(pl.col("__row_idx__").first()) ["__row_idx__"] .to_list() ) unique_df = ( df.filter(pl.col("__row_idx__").is_in(first_idx_list)) .drop("__row_idx__") ) dup_df = ( df.filter(~pl.col("__row_idx__").is_in(first_idx_list)) .drop("__row_idx__") ) return {"Unique": unique_df, "Duplicate": dup_df}