from __future__ import annotations from typing import Dict import polars as pl from tools.base import BaseTool class UniqueTool(BaseTool): def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]: df = inputs.get("Input", pl.DataFrame()) if self.config is None or df.is_empty(): return {"Unique": df, "Duplicate": pl.DataFrame()} key_fields = [ f.attrib["field"] for f in self.config.findall("UniqueFields/Field") if f.attrib.get("field", "") in df.columns ] if not key_fields: key_fields = df.columns df = df.with_row_index("__row_idx__") first_idx_list = ( df.group_by(key_fields, maintain_order=True) .agg(pl.col("__row_idx__").first()) ["__row_idx__"] .to_list() ) unique_df = ( df.filter(pl.col("__row_idx__").is_in(first_idx_list)) .drop("__row_idx__") .sort(key_fields) ) dup_df = ( df.filter(~pl.col("__row_idx__").is_in(first_idx_list)) .drop("__row_idx__") .sort(key_fields) ) return {"Unique": unique_df, "Duplicates": dup_df}