From e10e5da9f2072006ab79bd548cb2c06f0c0bb577 Mon Sep 17 00:00:00 2001 From: casey Date: Sat, 13 Jun 2026 09:55:00 +1000 Subject: [PATCH] added sum functions and fixed rounding --- .../Output/expected/ProductCostMinMax.csv | 6 + .../Output/expected/ProductCountMinMax.csv | 6 + .../Output/expected/ProductCountSumCost.csv | 6 + .../Output/expected/ProductMostExpensive.csv | 6 + .../TransformTesting/SumToolTesting.bak | 422 ++++++++++++++++++ .../TransformTesting/SumToolTesting.yxmd | 422 ++++++++++++++++++ alteryx_runner/engine/type_mapper.py | 4 +- alteryx_runner/expression/functions.py | 29 +- .../tools/transform/summarize_tool.py | 14 + 9 files changed, 911 insertions(+), 4 deletions(-) create mode 100644 Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCostMinMax.csv create mode 100644 Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCountMinMax.csv create mode 100644 Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCountSumCost.csv create mode 100644 Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductMostExpensive.csv create mode 100644 Alteryx_TestWorkflows/TransformTesting/SumToolTesting.bak create mode 100644 Alteryx_TestWorkflows/TransformTesting/SumToolTesting.yxmd diff --git a/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCostMinMax.csv b/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCostMinMax.csv new file mode 100644 index 0000000..087722d --- /dev/null +++ b/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCostMinMax.csv @@ -0,0 +1,6 @@ +Product_Category,Min_Product_Cost,Max_Product_Cost +Art & Crafts,1.99,20.99 +Electronics,6.99,20.99 +Games,2.99,17.99 +Sports & Outdoors,6.99,14.99 +Toys,3.99,34.99 diff --git a/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCountMinMax.csv b/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCountMinMax.csv new file mode 100644 index 0000000..087722d --- /dev/null +++ b/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCountMinMax.csv @@ -0,0 +1,6 @@ +Product_Category,Min_Product_Cost,Max_Product_Cost +Art & Crafts,1.99,20.99 +Electronics,6.99,20.99 +Games,2.99,17.99 +Sports & Outdoors,6.99,14.99 +Toys,3.99,34.99 diff --git a/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCountSumCost.csv b/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCountSumCost.csv new file mode 100644 index 0000000..2c9de05 --- /dev/null +++ b/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductCountSumCost.csv @@ -0,0 +1,6 @@ +Product_Category,Count,Sum_Product_Cost +Art & Crafts,8,71.92 +Electronics,3,42.97 +Games,8,66.92 +Sports & Outdoors,7,71.93 +Toys,9,104.91 diff --git a/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductMostExpensive.csv b/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductMostExpensive.csv new file mode 100644 index 0000000..c9d318a --- /dev/null +++ b/Alteryx_TestWorkflows/TransformTesting/Output/expected/ProductMostExpensive.csv @@ -0,0 +1,6 @@ +Product_Category,Last_Product_Cost,Last_Product_Name +Art & Crafts,20.99,PlayDoh Playset +Electronics,20.99,Toy Robot +Games,17.99,Rubik's Cube +Sports & Outdoors,14.99,Nerf Gun +Toys,34.99,Lego Bricks diff --git a/Alteryx_TestWorkflows/TransformTesting/SumToolTesting.bak b/Alteryx_TestWorkflows/TransformTesting/SumToolTesting.bak new file mode 100644 index 0000000..2efc779 --- /dev/null +++ b/Alteryx_TestWorkflows/TransformTesting/SumToolTesting.bak @@ -0,0 +1,422 @@ + + + + + + + + + + + Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv + + True + False + False + 1 + 254 + False + DoubleQuotes + , + False + 28591 + + + + + products.csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Z:\Pyteryx\Alteryx_TestWorkflows\TransformTesting\Output\expected\ProductCountSumCost.csv + + False + + CRLF + , + False + True + 28591 + True + + + + + + ProductCountSumCost.csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Z:\Pyteryx\Alteryx_TestWorkflows\TransformTesting\Output\expected\ProductCountMinMax.csv + + False + + CRLF + , + False + True + 28591 + True + + + + + + ProductCountMinMax.csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Product_Category - Ascending +Product_Cost - Ascending + + + + + + + + + + + + + + + + + + + Product_Cost = ReplaceChar([Product_Cost], "$", "") +Product_Price = ReplaceChar(... + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Z:\Pyteryx\Alteryx_TestWorkflows\TransformTesting\Output\expected\ProductCostMinMax.csv + + False + + CRLF + , + False + True + 28591 + True + + + + + + ProductCostMinMax.csv + + + + + + + + + + + + + + + Z:\Pyteryx\Alteryx_TestWorkflows\TransformTesting\Output\expected\ProductMostExpensive.csv + + False + + CRLF + , + False + True + 28591 + True + + + + + + ProductMostExpensive.csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Horizontal + + + SumToolTesting + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/Alteryx_TestWorkflows/TransformTesting/SumToolTesting.yxmd b/Alteryx_TestWorkflows/TransformTesting/SumToolTesting.yxmd new file mode 100644 index 0000000..4a8f241 --- /dev/null +++ b/Alteryx_TestWorkflows/TransformTesting/SumToolTesting.yxmd @@ -0,0 +1,422 @@ + + + + + + + + + + + ..\Maven Toys Data\products.csv + + True + False + False + 1 + 254 + False + DoubleQuotes + , + False + 28591 + + + + + products.csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .\Output\expected\ProductCountSumCost.csv + + False + + CRLF + , + False + True + 28591 + True + + + + + + ProductCountSumCost.csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .\Output\expected\ProductCountMinMax.csv + + False + + CRLF + , + False + True + 28591 + True + + + + + + ProductCountMinMax.csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Product_Category - Ascending +Product_Cost - Ascending + + + + + + + + + + + + + + + + + + + Product_Cost = ReplaceChar([Product_Cost], "$", "") +Product_Price = ReplaceChar(... + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + .\Output\expected\ProductCostMinMax.csv + + False + + CRLF + , + False + True + 28591 + True + + + + + + ProductCostMinMax.csv + + + + + + + + + + + + + + + .\Output\expected\ProductMostExpensive.csv + + False + + CRLF + , + False + True + 28591 + True + + + + + + ProductMostExpensive.csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Horizontal + + + SumToolTesting + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/alteryx_runner/engine/type_mapper.py b/alteryx_runner/engine/type_mapper.py index 4a4afb8..90a28af 100644 --- a/alteryx_runner/engine/type_mapper.py +++ b/alteryx_runner/engine/type_mapper.py @@ -12,7 +12,7 @@ class TypeMapper: "Int16": pl.Int16, "Int32": pl.Int32, "Int64": pl.Int64, - "Float": pl.Float32, + "Float": pl.Float64, "Double": pl.Float64, "String": pl.String, "V_String": pl.String, @@ -31,7 +31,7 @@ class TypeMapper: "Int16": "SMALLINT", "Int32": "INTEGER", "Int64": "BIGINT", - "Float": "FLOAT", + "Float": "DOUBLE", "Double": "DOUBLE", "String": "VARCHAR", "V_String": "VARCHAR", diff --git a/alteryx_runner/expression/functions.py b/alteryx_runner/expression/functions.py index 5e6597a..53ca86c 100644 --- a/alteryx_runner/expression/functions.py +++ b/alteryx_runner/expression/functions.py @@ -38,8 +38,8 @@ FUNCTION_MAP: dict[str, str] = { "Ceil": "CEIL({0})", "FLOOR": "FLOOR({0})", "Floor": "FLOOR({0})", - "ROUND": "ROUND({0}, {1})", - "Round": "ROUND({0}, {1})", + "ROUND": None, # handled in get_function_sql + "Round": None, # handled in get_function_sql "SQRT": "SQRT({0})", "Sqrt": "SQRT({0})", "POW": "POWER({0}, {1})", @@ -113,6 +113,12 @@ def get_function_sql(name: str, args: list[str]) -> str: break if template is None: + # Check for specially-handled functions + upper = name.upper() + if upper == "ROUND": + return _render_round(args) + if upper == "SWITCH": + return _render_switch(args) # Unknown function — pass through as-is (may work in DuckDB natively) args_joined = ", ".join(args) return f"{name}({args_joined})" @@ -130,6 +136,25 @@ def get_function_sql(name: str, args: list[str]) -> str: return f"{name}({args_joined})" +def _render_round(args: list[str]) -> str: + """Alteryx Round(value, multiple). + + Round(x, 0.01) → round to 2 decimal places + Round(x, 1) → round to 0 decimal places + Round(x, 100) → round to nearest 100 + Round(x) → round to 0 decimal places + """ + if len(args) == 0: + return "NULL" + if len(args) == 1: + return f"ROUND({args[0]}, 0)" + # Second arg is the rounding multiple as a decimal literal. + # Convert to number of decimal places: n = -log10(multiple) + # e.g. 0.01 → 2, 1 → 0, 100 → -2 + # Use CAST to INTEGER because DuckDB ROUND requires an int precision. + return f"ROUND({args[0]}, CAST(-LOG10({args[1]}) AS INTEGER))" + + def _render_switch(args: list[str]) -> str: """Switch(val, default, v1, r1, v2, r2, ...) → CASE val WHEN v1 THEN r1 ... ELSE default END""" if len(args) < 2: diff --git a/alteryx_runner/tools/transform/summarize_tool.py b/alteryx_runner/tools/transform/summarize_tool.py index 8c633f9..2e7927a 100644 --- a/alteryx_runner/tools/transform/summarize_tool.py +++ b/alteryx_runner/tools/transform/summarize_tool.py @@ -37,12 +37,26 @@ class SummarizeTool(BaseTool): return {"Output": df.select(group_fields).unique(maintain_order=True)} return {"Output": pl.DataFrame()} + # Upcast Float32 columns to Float64 before aggregation to avoid + # floating-point precision noise (matches Alteryx behaviour). + float32_cols = [ + c for c in df.columns + if df[c].dtype == pl.Float32 + ] + if float32_cols: + df = df.with_columns( + [pl.col(c).cast(pl.Float64) for c in float32_cols] + ) + if group_fields: result = df.group_by(group_fields, maintain_order=True).agg(agg_exprs) # Restore group column order all_cols = group_fields + [e.meta.output_name() for e in agg_exprs] existing = [c for c in all_cols if c in result.columns] result = result.select(existing) + # Sort by group columns for deterministic output + # (Alteryx Summarize sorts groups alphabetically) + result = result.sort(group_fields) else: result = df.select(agg_exprs)