Pyteryx/alteryx_runner/tools/join/find_replace.py

42 lines
1.6 KiB
Python

from __future__ import annotations
from typing import Dict
import polars as pl
from tools.base import BaseTool
class FindReplaceTool(BaseTool):
"""Look up values in a Find/Replace table and replace matching fields."""
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
target = inputs.get("Target", inputs.get("Input", pl.DataFrame()))
find_df = inputs.get("Find", inputs.get("Replace", pl.DataFrame()))
if target.is_empty() or find_df.is_empty() or self.config is None:
return {"Output": target}
field = self._cfg("Field", "") or ""
find_field = self._cfg("FindField", "") or ""
replace_field = self._cfg("ReplaceField", "") or ""
whole_word = (self._cfg("WholeWord", "False") or "False").lower() == "true"
if not (field and find_field and replace_field):
return {"Output": target}
find_values = find_df[find_field].cast(pl.String).to_list()
replace_values = find_df[replace_field].cast(pl.String).to_list()
lookup = dict(zip(find_values, replace_values))
def _replace_fn(val: str | None) -> str | None:
if val is None:
return None
for find, rep in lookup.items():
if whole_word:
import re
val = re.sub(r"\b" + re.escape(find) + r"\b", rep, val)
else:
val = val.replace(find, rep)
return val
series = target[field].cast(pl.String).map_elements(_replace_fn, return_dtype=pl.String)
return {"Output": target.with_columns(series.alias(field))}