56 lines
2.1 KiB
Python
56 lines
2.1 KiB
Python
from __future__ import annotations
|
|
from typing import Dict
|
|
import polars as pl
|
|
from tools.base import BaseTool
|
|
|
|
|
|
class MultiFieldFormulaTool(BaseTool):
|
|
"""Apply a single expression to multiple selected fields."""
|
|
|
|
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
|
|
df = inputs.get("Input", pl.DataFrame())
|
|
if self.config is None or df.is_empty():
|
|
return {"Output": df}
|
|
|
|
expr_text = self._cfg("Expression", "") or ""
|
|
alteryx_type = self._cfg("OutputType", "") or ""
|
|
size = self._cfg("OutputSize")
|
|
dtype = self.ctx.type_mapper.map(alteryx_type) if alteryx_type else None
|
|
|
|
field_filter_type = self._cfg("Fields/@type") or ""
|
|
selected_fields = [
|
|
f.attrib["name"]
|
|
for f in self.config.findall("Fields/Field")
|
|
if f.attrib.get("selected", "True") == "True"
|
|
and f.attrib["name"] in df.columns
|
|
]
|
|
|
|
# If no explicit field list, apply to all fields of matching type
|
|
if not selected_fields and field_filter_type:
|
|
type_map_rev = {
|
|
"Number": [pl.Float32, pl.Float64, pl.Int16, pl.Int32, pl.Int64],
|
|
"String": [pl.String, pl.Utf8],
|
|
}
|
|
target_types = type_map_rev.get(field_filter_type, [])
|
|
selected_fields = [
|
|
c for c in df.columns
|
|
if any(df[c].dtype == t for t in target_types)
|
|
]
|
|
|
|
if not selected_fields:
|
|
return {"Output": df}
|
|
|
|
for field in selected_fields:
|
|
# Replace [_CurrentField_] placeholder with actual column name
|
|
actual_expr = expr_text.replace("[_CurrentField_]", f"[{field}]")
|
|
target_dtype = dtype or df[field].dtype
|
|
try:
|
|
series = self.ctx.transpiler.eval_series(df, actual_expr, field, target_dtype)
|
|
df = df.with_columns(series.alias(field))
|
|
except Exception as e:
|
|
raise RuntimeError(
|
|
f"MultiFieldFormula field {field!r}: {e}"
|
|
) from e
|
|
|
|
return {"Output": df}
|