"""Integration-level tool tests using in-memory DataFrames (no .yxmd required)."""
from __future__ import annotations
import sys
from pathlib import Path
import xml.etree.ElementTree as ET
import pytest
import polars as pl
PKG = Path(__file__).parent.parent # alteryx_runner/
if str(PKG) not in sys.path:
sys.path.insert(0, str(PKG))
from engine.graph import NodeDef
from engine.context import RunContext
def make_node(tool_id: int, plugin: str, config_xml: str) -> NodeDef:
config = ET.fromstring(config_xml)
return NodeDef(tool_id=tool_id, plugin=plugin, config=config)
@pytest.fixture
def ctx(tmp_path):
return RunContext(workflow_dir=str(tmp_path), verbose=False)
# ---------------------------------------------------------------------------
# TextInput
# ---------------------------------------------------------------------------
class TestTextInput:
def test_basic(self, ctx):
from tools.inout.text_input import TextInputTool
xml = """
1hello
2
"""
node = make_node(1, "", xml)
result = TextInputTool(node, ctx).execute({})
df = result["Output"]
assert df.shape == (2, 2)
assert df["A"].to_list() == ["1", "2"]
assert df["B"][1] is None # empty → NULL
# ---------------------------------------------------------------------------
# Filter
# ---------------------------------------------------------------------------
class TestFilter:
def _df(self) -> pl.DataFrame:
return pl.DataFrame({"ID": [1, 2, 3, 4], "Region": ["North", "South", "North", "South"]})
def test_simple_gt(self, ctx):
from tools.preparation.filter_tool import FilterTool
xml = """
Simple
>
ID
2fixed
[ID] > 2
"""
node = make_node(1, "", xml)
result = FilterTool(node, ctx).execute({"Input": self._df()})
assert len(result["True"]) == 2
assert len(result["False"]) == 2
def test_custom_expr(self, ctx):
from tools.preparation.filter_tool import FilterTool
xml = """
Custom
[Region] == "South"
"""
node = make_node(1, "", xml)
result = FilterTool(node, ctx).execute({"Input": self._df()})
assert len(result["True"]) == 2
assert all(v == "South" for v in result["True"]["Region"].to_list())
# ---------------------------------------------------------------------------
# Select
# ---------------------------------------------------------------------------
class TestSelect:
def test_drop_and_rename(self, ctx):
from tools.preparation.select_tool import SelectTool
df = pl.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
xml = """
"""
node = make_node(1, "", xml)
result = SelectTool(node, ctx).execute({"Input": df})["Output"]
assert "Alpha" in result.columns
assert "B" not in result.columns
assert "C" in result.columns # *Unknown passes through
# ---------------------------------------------------------------------------
# Sort
# ---------------------------------------------------------------------------
class TestSort:
def test_ascending(self, ctx):
from tools.preparation.sort_tool import SortTool
df = pl.DataFrame({"Name": ["Charlie", "Alice", "Bob"], "Score": [3, 1, 2]})
xml = """
"""
node = make_node(1, "", xml)
result = SortTool(node, ctx).execute({"Input": df})["Output"]
assert result["Name"].to_list() == ["Alice", "Bob", "Charlie"]
def test_descending(self, ctx):
from tools.preparation.sort_tool import SortTool
df = pl.DataFrame({"Score": [3, 1, 2]})
xml = """
"""
node = make_node(1, "", xml)
result = SortTool(node, ctx).execute({"Input": df})["Output"]
assert result["Score"].to_list() == [3, 2, 1]
# ---------------------------------------------------------------------------
# Unique
# ---------------------------------------------------------------------------
class TestUnique:
def test_unique_and_duplicate(self, ctx):
from tools.preparation.unique_tool import UniqueTool
df = pl.DataFrame({"Name": ["Alice", "Bob", "Alice", "Charlie", "Bob"]})
xml = """
"""
node = make_node(1, "", xml)
result = UniqueTool(node, ctx).execute({"Input": df})
assert len(result["Unique"]) == 3
assert len(result["Duplicate"]) == 2
# ---------------------------------------------------------------------------
# Sample
# ---------------------------------------------------------------------------
class TestSample:
def _df(self) -> pl.DataFrame:
return pl.DataFrame({"N": list(range(10))})
def test_first(self, ctx):
from tools.preparation.sample_tool import SampleTool
xml = "First3"
node = make_node(1, "", xml)
result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
assert len(result) == 3
assert result["N"].to_list() == [0, 1, 2]
def test_last(self, ctx):
from tools.preparation.sample_tool import SampleTool
xml = "Last2"
node = make_node(1, "", xml)
result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
assert result["N"].to_list() == [8, 9]
# ---------------------------------------------------------------------------
# Union
# ---------------------------------------------------------------------------
class TestUnion:
def test_by_name(self, ctx):
from tools.join.union_tool import UnionTool
df1 = pl.DataFrame({"A": [1], "B": [2]})
df2 = pl.DataFrame({"B": [4], "A": [3]})
xml = "Auto"
node = make_node(1, "", xml)
result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
assert len(result) == 2
def test_by_position(self, ctx):
from tools.join.union_tool import UnionTool
df1 = pl.DataFrame({"X": [1], "Y": [2]})
df2 = pl.DataFrame({"P": [3], "Q": [4]})
xml = "ByPosition"
node = make_node(1, "", xml)
result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
assert result.columns == ["X", "Y"]
assert len(result) == 2
# ---------------------------------------------------------------------------
# Summarize
# ---------------------------------------------------------------------------
class TestSummarize:
def test_group_sum(self, ctx):
from tools.transform.summarize_tool import SummarizeTool
df = pl.DataFrame({
"Region": ["N", "S", "N", "S"],
"Sales": [100, 200, 150, 250],
})
xml = """
"""
node = make_node(1, "", xml)
result = SummarizeTool(node, ctx).execute({"Input": df})["Output"]
assert set(result.columns) == {"Region", "Total"}
totals = dict(zip(result["Region"].to_list(), result["Total"].to_list()))
assert totals["N"] == 250
assert totals["S"] == 450
# ---------------------------------------------------------------------------
# Transpose
# ---------------------------------------------------------------------------
class TestTranspose:
def test_unpivot(self, ctx):
from tools.transform.transpose_tool import TransposeTool
df = pl.DataFrame({"ID": [1, 2], "Visits": [5, 10], "Spend": [100, 200]})
xml = """
"""
node = make_node(1, "", xml)
result = TransposeTool(node, ctx).execute({"Input": df})["Output"]
assert "Name" in result.columns
assert "Value" in result.columns
assert len(result) == 4 # 2 rows × 2 data cols
# ---------------------------------------------------------------------------
# RecordID
# ---------------------------------------------------------------------------
class TestRecordID:
def test_starts_at_one(self, ctx):
from tools.preparation.record_id import RecordIDTool
df = pl.DataFrame({"Name": ["A", "B", "C"]})
xml = """
ID
1
Int32
"""
node = make_node(1, "", xml)
result = RecordIDTool(node, ctx).execute({"Input": df})["Output"]
assert result["ID"].to_list() == [1, 2, 3]
assert result.columns[0] == "ID"