Pyteryx/alteryx_runner/tools/preparation/multi_field_formula.py

56 lines
2.1 KiB
Python

from __future__ import annotations
from typing import Dict
import polars as pl
from tools.base import BaseTool
class MultiFieldFormulaTool(BaseTool):
"""Apply a single expression to multiple selected fields."""
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
df = inputs.get("Input", pl.DataFrame())
if self.config is None or df.is_empty():
return {"Output": df}
expr_text = self._cfg("Expression", "") or ""
alteryx_type = self._cfg("OutputType", "") or ""
size = self._cfg("OutputSize")
dtype = self.ctx.type_mapper.map(alteryx_type) if alteryx_type else None
field_filter_type = self._cfg("Fields/@type") or ""
selected_fields = [
f.attrib["name"]
for f in self.config.findall("Fields/Field")
if f.attrib.get("selected", "True") == "True"
and f.attrib["name"] in df.columns
]
# If no explicit field list, apply to all fields of matching type
if not selected_fields and field_filter_type:
type_map_rev = {
"Number": [pl.Float32, pl.Float64, pl.Int16, pl.Int32, pl.Int64],
"String": [pl.String, pl.Utf8],
}
target_types = type_map_rev.get(field_filter_type, [])
selected_fields = [
c for c in df.columns
if any(df[c].dtype == t for t in target_types)
]
if not selected_fields:
return {"Output": df}
for field in selected_fields:
# Replace [_CurrentField_] placeholder with actual column name
actual_expr = expr_text.replace("[_CurrentField_]", f"[{field}]")
target_dtype = dtype or df[field].dtype
try:
series = self.ctx.transpiler.eval_series(df, actual_expr, field, target_dtype)
df = df.with_columns(series.alias(field))
except Exception as e:
raise RuntimeError(
f"MultiFieldFormula field {field!r}: {e}"
) from e
return {"Output": df}