Pyteryx/alteryx_runner/expression/functions.py

153 lines
5.0 KiB
Python

"""Mapping of Alteryx built-in functions to DuckDB SQL equivalents."""
from __future__ import annotations
# Single-argument function translations (name → SQL template using {0}, {1}, etc.)
FUNCTION_MAP: dict[str, str] = {
# String
"Uppercase": "UPPER({0})",
"Lowercase": "LOWER({0})",
"Trim": "TRIM({0})",
"LTrim": "LTRIM({0})",
"RTrim": "RTRIM({0})",
"Length": "LENGTH({0})",
"Left": "LEFT({0}, {1})",
"Right": "RIGHT({0}, {1})",
"Substring": "SUBSTR({0}, {1}, {2})",
"FindString": "INSTR({0}, {1})",
"ReplaceChar": "REPLACE({0}, {1}, {2})",
"StringToDate": "STRPTIME({0}, {1})",
"ToString": "PRINTF('%.' || {1} || 'f', {0})",
"Contains": "CONTAINS({0}, {1})",
"StartsWith": "STARTS_WITH({0}, {1})",
"EndsWith": "ENDS_WITH({0}, {1})",
"REGEX_Match": "REGEXP_MATCHES({0}, {1})",
"REGEX_Replace": "REGEXP_REPLACE({0}, {1}, {2})",
"PadLeft": "LPAD({0}, {1}, {2})",
"PadRight": "RPAD({0}, {1}, {2})",
"GetWord": "list_extract(str_split_regex({0}, '\\\\s+'), {1} + 1)",
"CountWords": "array_length(str_split_regex(TRIM({0}), '\\\\s+'))",
"CharFromInt": "chr({0}::INTEGER)",
"IntFromChar": "ascii({0})",
"ConvertFromCodePage": "{0}",
"ReverseString": "reverse({0})",
"DecomposeUnicodeForMatch": "strip_accents(UPPER({0}))",
# Math
"ABS": "ABS({0})",
"Abs": "ABS({0})",
"CEIL": "CEIL({0})",
"Ceil": "CEIL({0})",
"FLOOR": "FLOOR({0})",
"Floor": "FLOOR({0})",
"ROUND": "ROUND({0}, {1})",
"Round": "ROUND({0}, {1})",
"SQRT": "SQRT({0})",
"Sqrt": "SQRT({0})",
"POW": "POWER({0}, {1})",
"Pow": "POWER({0}, {1})",
"LOG": "LN({0})",
"Log": "LN({0})",
"LOG10": "LOG10({0})",
"Log10": "LOG10({0})",
"MOD": "({0} % {1})",
"Mod": "({0} % {1})",
"MIN": "LEAST({0}, {1})",
"Max": "GREATEST({0}, {1})",
"MAX": "GREATEST({0}, {1})",
"Min": "LEAST({0}, {1})",
"RandInt": "FLOOR(RANDOM() * {0})::BIGINT",
"Random": "RANDOM()",
"PI": "PI()",
"SIN": "SIN({0})",
"COS": "COS({0})",
"TAN": "TAN({0})",
"ASIN": "ASIN({0})",
"ACOS": "ACOS({0})",
"ATAN": "ATAN({0})",
"ATAN2": "ATAN2({0}, {1})",
"EXP": "EXP({0})",
"Sign": "SIGN({0})",
# Null handling
"IsNull": "({0} IS NULL)",
"IsEmpty": "({0} IS NULL OR {0} = '')",
"NullConvert": "NULLIF({0}, '')",
"Null": "NULL",
# Type conversion
"ToNumber": "TRY_CAST({0} AS DOUBLE)",
"ToString_num": "CAST({0} AS VARCHAR)",
"TOBOOL": "CAST({0} AS BOOLEAN)",
# Date/Time
"DateTimeNow": "NOW()",
"DateTimeToday": "CURRENT_DATE",
"DateTimeAdd": "({0} + INTERVAL ({1}) {2})",
"DateTimeDiff": "DATEDIFF({2}, {1}, {0})",
"DateTimeFormat": "STRFTIME({0}, {1})",
"ToDate": "CAST({0} AS DATE)",
"DateTimeYear": "YEAR({0})",
"DateTimeMonth": "MONTH({0})",
"DateTimeDay": "DAY({0})",
"DateTimeHour": "HOUR({0})",
"DateTimeMinute": "MINUTE({0})",
"DateTimeSecond": "SECOND({0})",
"DateTimeFirstOfMonth": "DATE_TRUNC('month', {0})",
"DateTimeLastOfMonth": "(DATE_TRUNC('month', {0}) + INTERVAL '1 month' - INTERVAL '1 day')::DATE",
"DateTimeFirstOfYear": "DATE_TRUNC('year', {0})",
"DateTimeQuarter": "QUARTER({0})",
"DateTimeTrim": "DATE_TRUNC({1}, {0})",
# Conditional
"IIF": "(CASE WHEN {0} THEN {1} ELSE {2} END)",
"Switch": None, # handled separately
# Misc
"TOPN": None, # not a scalar function
}
def get_function_sql(name: str, args: list[str]) -> str:
"""Render a function call to DuckDB SQL given evaluated argument SQL strings."""
# Case-insensitive lookup
template = FUNCTION_MAP.get(name)
if template is None:
canon = name.lower()
for k, v in FUNCTION_MAP.items():
if k.lower() == canon:
template = v
break
if template is None:
# Unknown function — pass through as-is (may work in DuckDB natively)
args_joined = ", ".join(args)
return f"{name}({args_joined})"
if name in ("Switch", "switch"):
return _render_switch(args)
try:
result = template
for i, arg in enumerate(args):
result = result.replace(f"{{{i}}}", arg)
return result
except Exception:
args_joined = ", ".join(args)
return f"{name}({args_joined})"
def _render_switch(args: list[str]) -> str:
"""Switch(val, default, v1, r1, v2, r2, ...) → CASE val WHEN v1 THEN r1 ... ELSE default END"""
if len(args) < 2:
return "NULL"
val = args[0]
default = args[1]
pairs = args[2:]
cases = []
for i in range(0, len(pairs) - 1, 2):
cases.append(f"WHEN {pairs[i]} THEN {pairs[i+1]}")
cases_sql = " ".join(cases)
return f"CASE {val} {cases_sql} ELSE {default} END"
def titlecase_sql(col: str) -> str:
"""Approximate Titlecase via DuckDB: capitalise first letter of each word."""
return (
f"array_to_string(list_transform(str_split({col}, ' '), "
f"x -> UPPER(LEFT(x,1)) || LOWER(SUBSTR(x,2))), ' ')"
)