from __future__ import annotations from typing import Dict import polars as pl from tools.base import BaseTool class TextToColumnsTool(BaseTool): def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]: df = inputs.get("Input", pl.DataFrame()) if self.config is None or df.is_empty(): return {"Output": df} field = self._cfg("Field", "") or "" delimiter = self._cfg("Delimiter", ",") or "," num_cols = int(self._cfg_attr("NumCols", "value", "2") or "2") root_name = self._cfg("RootName", f"{field}_") or f"{field}_" split_to_rows = ( self._cfg_attr("SplitToRows", "value", "False") or "False" ).lower() == "true" if not field or field not in df.columns: return {"Output": df} col_str = df[field].cast(pl.String) if split_to_rows: rows_out: list[dict] = [] for row_dict in df.to_dicts(): val = str(row_dict.get(field) or "") for token in val.split(delimiter): new_row = dict(row_dict) new_row[field] = token.strip() rows_out.append(new_row) return {"Output": pl.DataFrame(rows_out) if rows_out else df} # Split to columns split_series = col_str.str.splitn(delimiter, num_cols) struct_df = split_series.struct.unnest() # Polars names them field_0, field_1, ... for i in range(num_cols): pname = f"field_{i}" out_name = f"{root_name}{i+1}" if pname in struct_df.columns: df = df.with_columns(struct_df[pname].alias(out_name)) else: df = df.with_columns(pl.lit(None).cast(pl.String).alias(out_name)) return {"Output": df}