Pyteryx/alteryx_runner/tests/test_tools.py

267 lines
10 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters!

This file contains ambiguous Unicode characters that may be confused with others in your current locale. If your use case is intentional and legitimate, you can safely ignore this warning. Use the Escape button to highlight these characters.

"""Integration-level tool tests using in-memory DataFrames (no .yxmd required)."""
from __future__ import annotations
import sys
from pathlib import Path
import xml.etree.ElementTree as ET
import pytest
import polars as pl
PKG = Path(__file__).parent.parent # alteryx_runner/
if str(PKG) not in sys.path:
sys.path.insert(0, str(PKG))
from engine.graph import NodeDef
from engine.context import RunContext
def make_node(tool_id: int, plugin: str, config_xml: str) -> NodeDef:
config = ET.fromstring(config_xml)
return NodeDef(tool_id=tool_id, plugin=plugin, config=config)
@pytest.fixture
def ctx(tmp_path):
return RunContext(workflow_dir=str(tmp_path), verbose=False)
# ---------------------------------------------------------------------------
# TextInput
# ---------------------------------------------------------------------------
class TestTextInput:
def test_basic(self, ctx):
from tools.inout.text_input import TextInputTool
xml = """<Configuration>
<Fields><Field name="A"/><Field name="B"/></Fields>
<Data>
<r><c>1</c><c>hello</c></r>
<r><c>2</c><c></c></r>
</Data>
</Configuration>"""
node = make_node(1, "", xml)
result = TextInputTool(node, ctx).execute({})
df = result["Output"]
assert df.shape == (2, 2)
assert df["A"].to_list() == ["1", "2"]
assert df["B"][1] is None # empty → NULL
# ---------------------------------------------------------------------------
# Filter
# ---------------------------------------------------------------------------
class TestFilter:
def _df(self) -> pl.DataFrame:
return pl.DataFrame({"ID": [1, 2, 3, 4], "Region": ["North", "South", "North", "South"]})
def test_simple_gt(self, ctx):
from tools.preparation.filter_tool import FilterTool
xml = """<Configuration>
<Mode>Simple</Mode>
<Simple>
<Operator>&gt;</Operator>
<Field>ID</Field>
<Operands><Operand>2</Operand><DateType>fixed</DateType></Operands>
</Simple>
<Expression>[ID] &gt; 2</Expression>
</Configuration>"""
node = make_node(1, "", xml)
result = FilterTool(node, ctx).execute({"Input": self._df()})
assert len(result["True"]) == 2
assert len(result["False"]) == 2
def test_custom_expr(self, ctx):
from tools.preparation.filter_tool import FilterTool
xml = """<Configuration>
<Mode>Custom</Mode>
<Expression>[Region] == "South"</Expression>
</Configuration>"""
node = make_node(1, "", xml)
result = FilterTool(node, ctx).execute({"Input": self._df()})
assert len(result["True"]) == 2
assert all(v == "South" for v in result["True"]["Region"].to_list())
# ---------------------------------------------------------------------------
# Select
# ---------------------------------------------------------------------------
class TestSelect:
def test_drop_and_rename(self, ctx):
from tools.preparation.select_tool import SelectTool
df = pl.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
xml = """<Configuration OrderChanged="False">
<SelectFields>
<SelectField field="A" selected="True" rename="Alpha"/>
<SelectField field="B" selected="False"/>
<SelectField field="*Unknown" selected="True"/>
</SelectFields>
</Configuration>"""
node = make_node(1, "", xml)
result = SelectTool(node, ctx).execute({"Input": df})["Output"]
assert "Alpha" in result.columns
assert "B" not in result.columns
assert "C" in result.columns # *Unknown passes through
# ---------------------------------------------------------------------------
# Sort
# ---------------------------------------------------------------------------
class TestSort:
def test_ascending(self, ctx):
from tools.preparation.sort_tool import SortTool
df = pl.DataFrame({"Name": ["Charlie", "Alice", "Bob"], "Score": [3, 1, 2]})
xml = """<Configuration>
<SortInfo locale="0">
<Field field="Name" order="Ascending"/>
</SortInfo>
</Configuration>"""
node = make_node(1, "", xml)
result = SortTool(node, ctx).execute({"Input": df})["Output"]
assert result["Name"].to_list() == ["Alice", "Bob", "Charlie"]
def test_descending(self, ctx):
from tools.preparation.sort_tool import SortTool
df = pl.DataFrame({"Score": [3, 1, 2]})
xml = """<Configuration>
<SortInfo locale="0">
<Field field="Score" order="Descending"/>
</SortInfo>
</Configuration>"""
node = make_node(1, "", xml)
result = SortTool(node, ctx).execute({"Input": df})["Output"]
assert result["Score"].to_list() == [3, 2, 1]
# ---------------------------------------------------------------------------
# Unique
# ---------------------------------------------------------------------------
class TestUnique:
def test_unique_and_duplicate(self, ctx):
from tools.preparation.unique_tool import UniqueTool
df = pl.DataFrame({"Name": ["Alice", "Bob", "Alice", "Charlie", "Bob"]})
xml = """<Configuration>
<UniqueFields><Field name="Name"/></UniqueFields>
</Configuration>"""
node = make_node(1, "", xml)
result = UniqueTool(node, ctx).execute({"Input": df})
assert len(result["Unique"]) == 3
assert len(result["Duplicate"]) == 2
# ---------------------------------------------------------------------------
# Sample
# ---------------------------------------------------------------------------
class TestSample:
def _df(self) -> pl.DataFrame:
return pl.DataFrame({"N": list(range(10))})
def test_first(self, ctx):
from tools.preparation.sample_tool import SampleTool
xml = "<Configuration><Mode>First</Mode><N>3</N><GroupFields/></Configuration>"
node = make_node(1, "", xml)
result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
assert len(result) == 3
assert result["N"].to_list() == [0, 1, 2]
def test_last(self, ctx):
from tools.preparation.sample_tool import SampleTool
xml = "<Configuration><Mode>Last</Mode><N>2</N><GroupFields/></Configuration>"
node = make_node(1, "", xml)
result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
assert result["N"].to_list() == [8, 9]
# ---------------------------------------------------------------------------
# Union
# ---------------------------------------------------------------------------
class TestUnion:
def test_by_name(self, ctx):
from tools.join.union_tool import UnionTool
df1 = pl.DataFrame({"A": [1], "B": [2]})
df2 = pl.DataFrame({"B": [4], "A": [3]})
xml = "<Configuration><Mode>Auto</Mode></Configuration>"
node = make_node(1, "", xml)
result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
assert len(result) == 2
def test_by_position(self, ctx):
from tools.join.union_tool import UnionTool
df1 = pl.DataFrame({"X": [1], "Y": [2]})
df2 = pl.DataFrame({"P": [3], "Q": [4]})
xml = "<Configuration><Mode>ByPosition</Mode></Configuration>"
node = make_node(1, "", xml)
result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
assert result.columns == ["X", "Y"]
assert len(result) == 2
# ---------------------------------------------------------------------------
# Summarize
# ---------------------------------------------------------------------------
class TestSummarize:
def test_group_sum(self, ctx):
from tools.transform.summarize_tool import SummarizeTool
df = pl.DataFrame({
"Region": ["N", "S", "N", "S"],
"Sales": [100, 200, 150, 250],
})
xml = """<Configuration>
<SummarizeFields>
<SummarizeField field="Region" action="GroupBy" rename="Region"/>
<SummarizeField field="Sales" action="Sum" rename="Total"/>
</SummarizeFields>
</Configuration>"""
node = make_node(1, "", xml)
result = SummarizeTool(node, ctx).execute({"Input": df})["Output"]
assert set(result.columns) == {"Region", "Total"}
totals = dict(zip(result["Region"].to_list(), result["Total"].to_list()))
assert totals["N"] == 250
assert totals["S"] == 450
# ---------------------------------------------------------------------------
# Transpose
# ---------------------------------------------------------------------------
class TestTranspose:
def test_unpivot(self, ctx):
from tools.transform.transpose_tool import TransposeTool
df = pl.DataFrame({"ID": [1, 2], "Visits": [5, 10], "Spend": [100, 200]})
xml = """<Configuration>
<KeyFields><Field name="ID"/></KeyFields>
<DataFields>
<Field name="Visits"/>
<Field name="Spend"/>
</DataFields>
</Configuration>"""
node = make_node(1, "", xml)
result = TransposeTool(node, ctx).execute({"Input": df})["Output"]
assert "Name" in result.columns
assert "Value" in result.columns
assert len(result) == 4 # 2 rows × 2 data cols
# ---------------------------------------------------------------------------
# RecordID
# ---------------------------------------------------------------------------
class TestRecordID:
def test_starts_at_one(self, ctx):
from tools.preparation.record_id import RecordIDTool
df = pl.DataFrame({"Name": ["A", "B", "C"]})
xml = """<Configuration>
<Field>ID</Field>
<StartValue>1</StartValue>
<FieldType>Int32</FieldType>
</Configuration>"""
node = make_node(1, "", xml)
result = RecordIDTool(node, ctx).execute({"Input": df})["Output"]
assert result["ID"].to_list() == [1, 2, 3]
assert result.columns[0] == "ID"