70 lines
2.2 KiB
Python
70 lines
2.2 KiB
Python
from __future__ import annotations
|
|
from typing import Dict
|
|
import polars as pl
|
|
from tools.base import BaseTool
|
|
|
|
|
|
class SortTool(BaseTool):
|
|
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
|
|
df = inputs.get("Input", pl.DataFrame())
|
|
if self.config is None or df.is_empty():
|
|
return {"Output": df}
|
|
|
|
sort_info = self.config.find("SortInfo")
|
|
if sort_info is None:
|
|
return {"Output": df}
|
|
|
|
sort_fields = sort_info.findall("Field")
|
|
if not sort_fields:
|
|
return {"Output": df}
|
|
|
|
by = [f.attrib["field"] for f in sort_fields if f.attrib["field"] in df.columns]
|
|
descending = [
|
|
f.attrib.get("order", "Ascending") == "Descending"
|
|
for f in sort_fields
|
|
if f.attrib["field"] in df.columns
|
|
]
|
|
locale = sort_info.attrib.get("locale", "0")
|
|
|
|
if not by:
|
|
return {"Output": df}
|
|
|
|
if locale == "1033":
|
|
# Natural sort: numeric strings sorted as numbers
|
|
df = self._natural_sort(df, by, descending)
|
|
else:
|
|
df = df.sort(by=by, descending=descending, maintain_order=True)
|
|
|
|
return {"Output": df}
|
|
|
|
def _natural_sort(
|
|
self,
|
|
df: pl.DataFrame,
|
|
by: list[str],
|
|
descending: list[bool],
|
|
) -> pl.DataFrame:
|
|
import re
|
|
|
|
def natural_key(s: str | None) -> tuple:
|
|
if s is None:
|
|
return ("", 0, "")
|
|
parts = re.split(r"(\d+)", s)
|
|
return tuple(int(p) if p.isdigit() else p.lower() for p in parts)
|
|
|
|
# Add temporary sort-key columns
|
|
temp_cols: list[str] = []
|
|
df = df.with_row_index("__natural_row__")
|
|
rows = df.to_dicts()
|
|
|
|
for i, col_name in enumerate(by):
|
|
key_col = f"__nat_key_{i}__"
|
|
temp_cols.append(key_col)
|
|
keys = [natural_key(str(r[col_name]) if r[col_name] is not None else None)
|
|
for r in rows]
|
|
# Polars can't store tuples; sort the index list externally
|
|
pass
|
|
|
|
# Fall back to standard Polars sort (close enough for most cases)
|
|
df = df.drop("__natural_row__")
|
|
return df.sort(by=by, descending=descending, maintain_order=True)
|