42 lines
1.6 KiB
Python
42 lines
1.6 KiB
Python
from __future__ import annotations
|
|
from typing import Dict
|
|
import polars as pl
|
|
from tools.base import BaseTool
|
|
|
|
|
|
class FindReplaceTool(BaseTool):
|
|
"""Look up values in a Find/Replace table and replace matching fields."""
|
|
|
|
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
|
|
target = inputs.get("Target", inputs.get("Input", pl.DataFrame()))
|
|
find_df = inputs.get("Find", inputs.get("Replace", pl.DataFrame()))
|
|
|
|
if target.is_empty() or find_df.is_empty() or self.config is None:
|
|
return {"Output": target}
|
|
|
|
field = self._cfg("Field", "") or ""
|
|
find_field = self._cfg("FindField", "") or ""
|
|
replace_field = self._cfg("ReplaceField", "") or ""
|
|
whole_word = (self._cfg("WholeWord", "False") or "False").lower() == "true"
|
|
|
|
if not (field and find_field and replace_field):
|
|
return {"Output": target}
|
|
|
|
find_values = find_df[find_field].cast(pl.String).to_list()
|
|
replace_values = find_df[replace_field].cast(pl.String).to_list()
|
|
lookup = dict(zip(find_values, replace_values))
|
|
|
|
def _replace_fn(val: str | None) -> str | None:
|
|
if val is None:
|
|
return None
|
|
for find, rep in lookup.items():
|
|
if whole_word:
|
|
import re
|
|
val = re.sub(r"\b" + re.escape(find) + r"\b", rep, val)
|
|
else:
|
|
val = val.replace(find, rep)
|
|
return val
|
|
|
|
series = target[field].cast(pl.String).map_elements(_replace_fn, return_dtype=pl.String)
|
|
return {"Output": target.with_columns(series.alias(field))}
|