153 lines
5.0 KiB
Python
153 lines
5.0 KiB
Python
"""Mapping of Alteryx built-in functions to DuckDB SQL equivalents."""
|
|
from __future__ import annotations
|
|
|
|
# Single-argument function translations (name → SQL template using {0}, {1}, etc.)
|
|
FUNCTION_MAP: dict[str, str] = {
|
|
# String
|
|
"Uppercase": "UPPER({0})",
|
|
"Lowercase": "LOWER({0})",
|
|
"Trim": "TRIM({0})",
|
|
"LTrim": "LTRIM({0})",
|
|
"RTrim": "RTRIM({0})",
|
|
"Length": "LENGTH({0})",
|
|
"Left": "LEFT({0}, {1})",
|
|
"Right": "RIGHT({0}, {1})",
|
|
"Substring": "SUBSTR({0}, {1}, {2})",
|
|
"FindString": "INSTR({0}, {1})",
|
|
"ReplaceChar": "REPLACE({0}, {1}, {2})",
|
|
"StringToDate": "STRPTIME({0}, {1})",
|
|
"ToString": "PRINTF('%.' || {1} || 'f', {0})",
|
|
"Contains": "CONTAINS({0}, {1})",
|
|
"StartsWith": "STARTS_WITH({0}, {1})",
|
|
"EndsWith": "ENDS_WITH({0}, {1})",
|
|
"REGEX_Match": "REGEXP_MATCHES({0}, {1})",
|
|
"REGEX_Replace": "REGEXP_REPLACE({0}, {1}, {2})",
|
|
"PadLeft": "LPAD({0}, {1}, {2})",
|
|
"PadRight": "RPAD({0}, {1}, {2})",
|
|
"GetWord": "list_extract(str_split_regex({0}, '\\\\s+'), {1} + 1)",
|
|
"CountWords": "array_length(str_split_regex(TRIM({0}), '\\\\s+'))",
|
|
"CharFromInt": "chr({0}::INTEGER)",
|
|
"IntFromChar": "ascii({0})",
|
|
"ConvertFromCodePage": "{0}",
|
|
"ReverseString": "reverse({0})",
|
|
"DecomposeUnicodeForMatch": "strip_accents(UPPER({0}))",
|
|
# Math
|
|
"ABS": "ABS({0})",
|
|
"Abs": "ABS({0})",
|
|
"CEIL": "CEIL({0})",
|
|
"Ceil": "CEIL({0})",
|
|
"FLOOR": "FLOOR({0})",
|
|
"Floor": "FLOOR({0})",
|
|
"ROUND": "ROUND({0}, {1})",
|
|
"Round": "ROUND({0}, {1})",
|
|
"SQRT": "SQRT({0})",
|
|
"Sqrt": "SQRT({0})",
|
|
"POW": "POWER({0}, {1})",
|
|
"Pow": "POWER({0}, {1})",
|
|
"LOG": "LN({0})",
|
|
"Log": "LN({0})",
|
|
"LOG10": "LOG10({0})",
|
|
"Log10": "LOG10({0})",
|
|
"MOD": "({0} % {1})",
|
|
"Mod": "({0} % {1})",
|
|
"MIN": "LEAST({0}, {1})",
|
|
"Max": "GREATEST({0}, {1})",
|
|
"MAX": "GREATEST({0}, {1})",
|
|
"Min": "LEAST({0}, {1})",
|
|
"RandInt": "FLOOR(RANDOM() * {0})::BIGINT",
|
|
"Random": "RANDOM()",
|
|
"PI": "PI()",
|
|
"SIN": "SIN({0})",
|
|
"COS": "COS({0})",
|
|
"TAN": "TAN({0})",
|
|
"ASIN": "ASIN({0})",
|
|
"ACOS": "ACOS({0})",
|
|
"ATAN": "ATAN({0})",
|
|
"ATAN2": "ATAN2({0}, {1})",
|
|
"EXP": "EXP({0})",
|
|
"Sign": "SIGN({0})",
|
|
# Null handling
|
|
"IsNull": "({0} IS NULL)",
|
|
"IsEmpty": "({0} IS NULL OR {0} = '')",
|
|
"NullConvert": "NULLIF({0}, '')",
|
|
"Null": "NULL",
|
|
# Type conversion
|
|
"ToNumber": "TRY_CAST({0} AS DOUBLE)",
|
|
"ToString_num": "CAST({0} AS VARCHAR)",
|
|
"TOBOOL": "CAST({0} AS BOOLEAN)",
|
|
# Date/Time
|
|
"DateTimeNow": "NOW()",
|
|
"DateTimeToday": "CURRENT_DATE",
|
|
"DateTimeAdd": "({0} + INTERVAL ({1}) {2})",
|
|
"DateTimeDiff": "DATEDIFF({2}, {1}, {0})",
|
|
"DateTimeFormat": "STRFTIME({0}, {1})",
|
|
"ToDate": "CAST({0} AS DATE)",
|
|
"DateTimeYear": "YEAR({0})",
|
|
"DateTimeMonth": "MONTH({0})",
|
|
"DateTimeDay": "DAY({0})",
|
|
"DateTimeHour": "HOUR({0})",
|
|
"DateTimeMinute": "MINUTE({0})",
|
|
"DateTimeSecond": "SECOND({0})",
|
|
"DateTimeFirstOfMonth": "DATE_TRUNC('month', {0})",
|
|
"DateTimeLastOfMonth": "(DATE_TRUNC('month', {0}) + INTERVAL '1 month' - INTERVAL '1 day')::DATE",
|
|
"DateTimeFirstOfYear": "DATE_TRUNC('year', {0})",
|
|
"DateTimeQuarter": "QUARTER({0})",
|
|
"DateTimeTrim": "DATE_TRUNC({1}, {0})",
|
|
# Conditional
|
|
"IIF": "(CASE WHEN {0} THEN {1} ELSE {2} END)",
|
|
"Switch": None, # handled separately
|
|
# Misc
|
|
"TOPN": None, # not a scalar function
|
|
}
|
|
|
|
|
|
def get_function_sql(name: str, args: list[str]) -> str:
|
|
"""Render a function call to DuckDB SQL given evaluated argument SQL strings."""
|
|
# Case-insensitive lookup
|
|
template = FUNCTION_MAP.get(name)
|
|
if template is None:
|
|
canon = name.lower()
|
|
for k, v in FUNCTION_MAP.items():
|
|
if k.lower() == canon:
|
|
template = v
|
|
break
|
|
|
|
if template is None:
|
|
# Unknown function — pass through as-is (may work in DuckDB natively)
|
|
args_joined = ", ".join(args)
|
|
return f"{name}({args_joined})"
|
|
|
|
if name in ("Switch", "switch"):
|
|
return _render_switch(args)
|
|
|
|
try:
|
|
result = template
|
|
for i, arg in enumerate(args):
|
|
result = result.replace(f"{{{i}}}", arg)
|
|
return result
|
|
except Exception:
|
|
args_joined = ", ".join(args)
|
|
return f"{name}({args_joined})"
|
|
|
|
|
|
def _render_switch(args: list[str]) -> str:
|
|
"""Switch(val, default, v1, r1, v2, r2, ...) → CASE val WHEN v1 THEN r1 ... ELSE default END"""
|
|
if len(args) < 2:
|
|
return "NULL"
|
|
val = args[0]
|
|
default = args[1]
|
|
pairs = args[2:]
|
|
cases = []
|
|
for i in range(0, len(pairs) - 1, 2):
|
|
cases.append(f"WHEN {pairs[i]} THEN {pairs[i+1]}")
|
|
cases_sql = " ".join(cases)
|
|
return f"CASE {val} {cases_sql} ELSE {default} END"
|
|
|
|
|
|
def titlecase_sql(col: str) -> str:
|
|
"""Approximate Titlecase via DuckDB: capitalise first letter of each word."""
|
|
return (
|
|
f"array_to_string(list_transform(str_split({col}, ' '), "
|
|
f"x -> UPPER(LEFT(x,1)) || LOWER(SUBSTR(x,2))), ' ')"
|
|
)
|