267 lines
10 KiB
Python
267 lines
10 KiB
Python
"""Integration-level tool tests using in-memory DataFrames (no .yxmd required)."""
|
||
from __future__ import annotations
|
||
import sys
|
||
from pathlib import Path
|
||
import xml.etree.ElementTree as ET
|
||
import pytest
|
||
import polars as pl
|
||
|
||
PKG = Path(__file__).parent.parent # alteryx_runner/
|
||
if str(PKG) not in sys.path:
|
||
sys.path.insert(0, str(PKG))
|
||
|
||
from engine.graph import NodeDef
|
||
from engine.context import RunContext
|
||
|
||
|
||
def make_node(tool_id: int, plugin: str, config_xml: str) -> NodeDef:
|
||
config = ET.fromstring(config_xml)
|
||
return NodeDef(tool_id=tool_id, plugin=plugin, config=config)
|
||
|
||
|
||
@pytest.fixture
|
||
def ctx(tmp_path):
|
||
return RunContext(workflow_dir=str(tmp_path), verbose=False)
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# TextInput
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestTextInput:
|
||
def test_basic(self, ctx):
|
||
from tools.inout.text_input import TextInputTool
|
||
xml = """<Configuration>
|
||
<Fields><Field name="A"/><Field name="B"/></Fields>
|
||
<Data>
|
||
<r><c>1</c><c>hello</c></r>
|
||
<r><c>2</c><c></c></r>
|
||
</Data>
|
||
</Configuration>"""
|
||
node = make_node(1, "", xml)
|
||
result = TextInputTool(node, ctx).execute({})
|
||
df = result["Output"]
|
||
assert df.shape == (2, 2)
|
||
assert df["A"].to_list() == ["1", "2"]
|
||
assert df["B"][1] is None # empty → NULL
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Filter
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestFilter:
|
||
def _df(self) -> pl.DataFrame:
|
||
return pl.DataFrame({"ID": [1, 2, 3, 4], "Region": ["North", "South", "North", "South"]})
|
||
|
||
def test_simple_gt(self, ctx):
|
||
from tools.preparation.filter_tool import FilterTool
|
||
xml = """<Configuration>
|
||
<Mode>Simple</Mode>
|
||
<Simple>
|
||
<Operator>></Operator>
|
||
<Field>ID</Field>
|
||
<Operands><Operand>2</Operand><DateType>fixed</DateType></Operands>
|
||
</Simple>
|
||
<Expression>[ID] > 2</Expression>
|
||
</Configuration>"""
|
||
node = make_node(1, "", xml)
|
||
result = FilterTool(node, ctx).execute({"Input": self._df()})
|
||
assert len(result["True"]) == 2
|
||
assert len(result["False"]) == 2
|
||
|
||
def test_custom_expr(self, ctx):
|
||
from tools.preparation.filter_tool import FilterTool
|
||
xml = """<Configuration>
|
||
<Mode>Custom</Mode>
|
||
<Expression>[Region] == "South"</Expression>
|
||
</Configuration>"""
|
||
node = make_node(1, "", xml)
|
||
result = FilterTool(node, ctx).execute({"Input": self._df()})
|
||
assert len(result["True"]) == 2
|
||
assert all(v == "South" for v in result["True"]["Region"].to_list())
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Select
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestSelect:
|
||
def test_drop_and_rename(self, ctx):
|
||
from tools.preparation.select_tool import SelectTool
|
||
df = pl.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
|
||
xml = """<Configuration OrderChanged="False">
|
||
<SelectFields>
|
||
<SelectField field="A" selected="True" rename="Alpha"/>
|
||
<SelectField field="B" selected="False"/>
|
||
<SelectField field="*Unknown" selected="True"/>
|
||
</SelectFields>
|
||
</Configuration>"""
|
||
node = make_node(1, "", xml)
|
||
result = SelectTool(node, ctx).execute({"Input": df})["Output"]
|
||
assert "Alpha" in result.columns
|
||
assert "B" not in result.columns
|
||
assert "C" in result.columns # *Unknown passes through
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Sort
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestSort:
|
||
def test_ascending(self, ctx):
|
||
from tools.preparation.sort_tool import SortTool
|
||
df = pl.DataFrame({"Name": ["Charlie", "Alice", "Bob"], "Score": [3, 1, 2]})
|
||
xml = """<Configuration>
|
||
<SortInfo locale="0">
|
||
<Field field="Name" order="Ascending"/>
|
||
</SortInfo>
|
||
</Configuration>"""
|
||
node = make_node(1, "", xml)
|
||
result = SortTool(node, ctx).execute({"Input": df})["Output"]
|
||
assert result["Name"].to_list() == ["Alice", "Bob", "Charlie"]
|
||
|
||
def test_descending(self, ctx):
|
||
from tools.preparation.sort_tool import SortTool
|
||
df = pl.DataFrame({"Score": [3, 1, 2]})
|
||
xml = """<Configuration>
|
||
<SortInfo locale="0">
|
||
<Field field="Score" order="Descending"/>
|
||
</SortInfo>
|
||
</Configuration>"""
|
||
node = make_node(1, "", xml)
|
||
result = SortTool(node, ctx).execute({"Input": df})["Output"]
|
||
assert result["Score"].to_list() == [3, 2, 1]
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Unique
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestUnique:
|
||
def test_unique_and_duplicate(self, ctx):
|
||
from tools.preparation.unique_tool import UniqueTool
|
||
df = pl.DataFrame({"Name": ["Alice", "Bob", "Alice", "Charlie", "Bob"]})
|
||
xml = """<Configuration>
|
||
<UniqueFields><Field name="Name"/></UniqueFields>
|
||
</Configuration>"""
|
||
node = make_node(1, "", xml)
|
||
result = UniqueTool(node, ctx).execute({"Input": df})
|
||
assert len(result["Unique"]) == 3
|
||
assert len(result["Duplicate"]) == 2
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Sample
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestSample:
|
||
def _df(self) -> pl.DataFrame:
|
||
return pl.DataFrame({"N": list(range(10))})
|
||
|
||
def test_first(self, ctx):
|
||
from tools.preparation.sample_tool import SampleTool
|
||
xml = "<Configuration><Mode>First</Mode><N>3</N><GroupFields/></Configuration>"
|
||
node = make_node(1, "", xml)
|
||
result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
|
||
assert len(result) == 3
|
||
assert result["N"].to_list() == [0, 1, 2]
|
||
|
||
def test_last(self, ctx):
|
||
from tools.preparation.sample_tool import SampleTool
|
||
xml = "<Configuration><Mode>Last</Mode><N>2</N><GroupFields/></Configuration>"
|
||
node = make_node(1, "", xml)
|
||
result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
|
||
assert result["N"].to_list() == [8, 9]
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Union
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestUnion:
|
||
def test_by_name(self, ctx):
|
||
from tools.join.union_tool import UnionTool
|
||
df1 = pl.DataFrame({"A": [1], "B": [2]})
|
||
df2 = pl.DataFrame({"B": [4], "A": [3]})
|
||
xml = "<Configuration><Mode>Auto</Mode></Configuration>"
|
||
node = make_node(1, "", xml)
|
||
result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
|
||
assert len(result) == 2
|
||
|
||
def test_by_position(self, ctx):
|
||
from tools.join.union_tool import UnionTool
|
||
df1 = pl.DataFrame({"X": [1], "Y": [2]})
|
||
df2 = pl.DataFrame({"P": [3], "Q": [4]})
|
||
xml = "<Configuration><Mode>ByPosition</Mode></Configuration>"
|
||
node = make_node(1, "", xml)
|
||
result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
|
||
assert result.columns == ["X", "Y"]
|
||
assert len(result) == 2
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Summarize
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestSummarize:
|
||
def test_group_sum(self, ctx):
|
||
from tools.transform.summarize_tool import SummarizeTool
|
||
df = pl.DataFrame({
|
||
"Region": ["N", "S", "N", "S"],
|
||
"Sales": [100, 200, 150, 250],
|
||
})
|
||
xml = """<Configuration>
|
||
<SummarizeFields>
|
||
<SummarizeField field="Region" action="GroupBy" rename="Region"/>
|
||
<SummarizeField field="Sales" action="Sum" rename="Total"/>
|
||
</SummarizeFields>
|
||
</Configuration>"""
|
||
node = make_node(1, "", xml)
|
||
result = SummarizeTool(node, ctx).execute({"Input": df})["Output"]
|
||
assert set(result.columns) == {"Region", "Total"}
|
||
totals = dict(zip(result["Region"].to_list(), result["Total"].to_list()))
|
||
assert totals["N"] == 250
|
||
assert totals["S"] == 450
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# Transpose
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestTranspose:
|
||
def test_unpivot(self, ctx):
|
||
from tools.transform.transpose_tool import TransposeTool
|
||
df = pl.DataFrame({"ID": [1, 2], "Visits": [5, 10], "Spend": [100, 200]})
|
||
xml = """<Configuration>
|
||
<KeyFields><Field name="ID"/></KeyFields>
|
||
<DataFields>
|
||
<Field name="Visits"/>
|
||
<Field name="Spend"/>
|
||
</DataFields>
|
||
</Configuration>"""
|
||
node = make_node(1, "", xml)
|
||
result = TransposeTool(node, ctx).execute({"Input": df})["Output"]
|
||
assert "Name" in result.columns
|
||
assert "Value" in result.columns
|
||
assert len(result) == 4 # 2 rows × 2 data cols
|
||
|
||
|
||
# ---------------------------------------------------------------------------
|
||
# RecordID
|
||
# ---------------------------------------------------------------------------
|
||
|
||
class TestRecordID:
|
||
def test_starts_at_one(self, ctx):
|
||
from tools.preparation.record_id import RecordIDTool
|
||
df = pl.DataFrame({"Name": ["A", "B", "C"]})
|
||
xml = """<Configuration>
|
||
<Field>ID</Field>
|
||
<StartValue>1</StartValue>
|
||
<FieldType>Int32</FieldType>
|
||
</Configuration>"""
|
||
node = make_node(1, "", xml)
|
||
result = RecordIDTool(node, ctx).execute({"Input": df})["Output"]
|
||
assert result["ID"].to_list() == [1, 2, 3]
|
||
assert result.columns[0] == "ID"
|