from __future__ import annotations from typing import Dict import polars as pl from tools.base import BaseTool class SortTool(BaseTool): def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]: df = inputs.get("Input", pl.DataFrame()) if self.config is None or df.is_empty(): return {"Output": df} sort_info = self.config.find("SortInfo") if sort_info is None: return {"Output": df} sort_fields = sort_info.findall("Field") if not sort_fields: return {"Output": df} by = [f.attrib["field"] for f in sort_fields if f.attrib["field"] in df.columns] descending = [ f.attrib.get("order", "Ascending") == "Descending" for f in sort_fields if f.attrib["field"] in df.columns ] locale = sort_info.attrib.get("locale", "0") if not by: return {"Output": df} if locale == "1033": # Natural sort: numeric strings sorted as numbers df = self._natural_sort(df, by, descending) else: df = df.sort(by=by, descending=descending, maintain_order=True) return {"Output": df} def _natural_sort( self, df: pl.DataFrame, by: list[str], descending: list[bool], ) -> pl.DataFrame: import re def natural_key(s: str | None) -> tuple: if s is None: return ("", 0, "") parts = re.split(r"(\d+)", s) return tuple(int(p) if p.isdigit() else p.lower() for p in parts) # Add temporary sort-key columns temp_cols: list[str] = [] df = df.with_row_index("__natural_row__") rows = df.to_dicts() for i, col_name in enumerate(by): key_col = f"__nat_key_{i}__" temp_cols.append(key_col) keys = [natural_key(str(r[col_name]) if r[col_name] is not None else None) for r in rows] # Polars can't store tuples; sort the index list externally pass # Fall back to standard Polars sort (close enough for most cases) df = df.drop("__natural_row__") return df.sort(by=by, descending=descending, maintain_order=True)