initial commit with runner kinda working

2026-06-13 08:27:38 +10:00 · 2026-06-13 08:27:38 +10:00 · 02e71a857c
commit 02e71a857c
167 changed files with 841206 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,157 @@
+# uv
+uv.lock
+
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+bin/
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.toast/
+.cache/
+.pytest_cache/
+.noscript/
+.htmlcov/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.mutmut-cache
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+.pybuilder/
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+#   For a discussion that covers why this should be excluded, see:
+#   https://stackoverflow.com/questions/54315206/should-we-gitignore-the-python-version-file
+# .python-version
+
+# pipenv
+#   According to pypa/pipenv#118, it is recommended to include Pipfile.lock in version control.
+#   However, if you are executing a library instead of an application, you might skip it.
+#Pipfile.lock
+
+# poetry
+#   Using Poetry requires committing poetry.lock alongside pyproject.toml
+#   https://python-poetry.org
+#poetry.lock
+
+# pdm
+#   https://fming.dev
+.pdm-plugins/
+.pdm-build/
+
+# Hatch
+.hatch/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site/
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype
+.pytype/
+
+# Cython debug symbols
+cython_debug/
+
+# IDEs and Editors (Optional but recommended)
+.vscode/
+.idea/
+*.swp
+*.swo
+.DS_Store
--- a/AGENTS.md
+++ b/AGENTS.md
@ -0,0 +1,44 @@
+# Project Overview
+Pyteryx is a python-first reimplementation of the Alteryx tool.
+The intent is to keep the same features of obserability and a rich DAG based user interface, but take advantage of the ease of deploying python.
+
+Pyteryx should have a near identical look-and-feel of Alteryx, but have the option of exporting a workflow (yxmd) as a python script.
+
+### Alteryx Tools Documentation / UI Examples
+https://help.alteryx.com/current/en/designer/tools.html
+
+### Alteryx Tool Logic / Conversion code
+./alteryx-to-python-migration-strategy-main
+This migration system helps organizations transition from Alteryx's visual workflow platform to Python-based data processing pipelines. It automatically converts Alteryx workflow XML files (.yxmd) into equivalent Python code using pandas, numpy, and other standard data science libraries.
+
+./alteryx-to-python-migration-strategy-main/migration_toolkit.py
+This file contains conversion logic that could be used to create an Alteryx execution engine that can run within the Pyteryx app.
+
+# Back End
+This is a Python project using `uv` for dependency management and environment setup.
+
+## Agent instructions
+When interacting with this project, AI agents should adhere to the following guidelines:
+- Always use uv for dependency management and environment activation. Avoid using pip directly.
+- Do not manually edit pyproject.toml or uv.lock files. Use uv add or uv lock --upgrade for dependency changes.
+- Ensure the virtual environment is activated before executing Python scripts or commands. Use uv run for this purpose.
+- Prioritize using uv commands over direct Python or system commands for package management.
+- If suggesting package installations, always recommend using uv add <package-name> over pip install <package-name>
+
+## Setup Commands
+### Install dependencies
+uv add <package-name>
+
+### Running files
+uv run python <your_script.py>
+
+### Clean cache
+uv clean
+
+### Environment Variables / Auth
+.env file in the project root folder contains all required Auth strings in the following format
+name = 'string',
+
+# Front End
+GoLang and the fyne UI library (http://fyne.io/) for the front end.
+
--- a/Alteryx_TestWorkflows/JoinTesting/JoinTesting.bak
+++ b/Alteryx_TestWorkflows/JoinTesting/JoinTesting.bak
@ -0,0 +1,388 @@
+<?xml version="1.0"?>
+<AlteryxDocument yxmdVer="2022.3">
+  <Nodes>
+    <Node ToolID="2">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
+        <Position x="162" y="378" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <Passwords />
+          <File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv</File>
+          <FormatSpecificOptions>
+            <HeaderRow>True</HeaderRow>
+            <IgnoreErrors>False</IgnoreErrors>
+            <AllowShareWrite>False</AllowShareWrite>
+            <ImportLine>1</ImportLine>
+            <FieldLen>254</FieldLen>
+            <SingleThreadRead>False</SingleThreadRead>
+            <IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
+            <Delimeter>,</Delimeter>
+            <QuoteRecordBreak>False</QuoteRecordBreak>
+            <CodePage>28591</CodePage>
+          </FormatSpecificOptions>
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>products.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+        <MetaInfo connection="Output">
+          <RecordInfo>
+            <Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
+            <Field name="Product_Name" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
+            <Field name="Product_Category" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
+            <Field name="Product_Cost" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
+            <Field name="Product_Price" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
+          </RecordInfo>
+        </MetaInfo>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
+    </Node>
+    <Node ToolID="3">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.Join.Join">
+        <Position x="558" y="282" />
+      </GuiSettings>
+      <Properties>
+        <Configuration joinByRecordPos="False">
+          <JoinInfo connection="Left">
+            <Field field="Product_ID" />
+          </JoinInfo>
+          <JoinInfo connection="Right">
+            <Field field="Product_ID" />
+          </JoinInfo>
+          <SelectConfiguration>
+            <Configuration outputConnection="Join">
+              <OrderChanged value="False" />
+              <CommaDecimal value="False" />
+              <SelectFields>
+                <SelectField field="Right_Product_ID" selected="True" rename="Right_Product_ID" input="Right_" />
+                <SelectField field="*Unknown" selected="True" />
+              </SelectFields>
+            </Configuration>
+          </SelectConfiguration>
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText />
+          <Left value="False" />
+        </Annotation>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxJoin" />
+    </Node>
+    <Node ToolID="4">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
+        <Position x="162" y="270" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <Passwords />
+          <File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv</File>
+          <FormatSpecificOptions>
+            <HeaderRow>True</HeaderRow>
+            <IgnoreErrors>False</IgnoreErrors>
+            <AllowShareWrite>False</AllowShareWrite>
+            <ImportLine>1</ImportLine>
+            <FieldLen>254</FieldLen>
+            <SingleThreadRead>False</SingleThreadRead>
+            <IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
+            <Delimeter>,</Delimeter>
+            <QuoteRecordBreak>False</QuoteRecordBreak>
+            <CodePage>28591</CodePage>
+          </FormatSpecificOptions>
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>inventory.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+        <MetaInfo connection="Output">
+          <RecordInfo>
+            <Field name="Store_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
+            <Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
+            <Field name="Stock_On_Hand" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
+          </RecordInfo>
+        </MetaInfo>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
+    </Node>
+    <Node ToolID="6">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.Union.Union">
+        <Position x="402" y="378" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <ByName_ErrorMode>Warning</ByName_ErrorMode>
+          <ByName_OutputMode>All</ByName_OutputMode>
+          <Mode>ByName</Mode>
+          <SetOutputOrder value="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText />
+          <Left value="False" />
+        </Annotation>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxUnion" />
+    </Node>
+    <Node ToolID="7">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.Sample.Sample">
+        <Position x="258" y="378" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <Mode>First</Mode>
+          <N>30</N>
+          <GroupFields orderChanged="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>First 30 rows</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxSample" />
+    </Node>
+    <Node ToolID="8">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
+        <Position x="258" y="474" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <NumRows value="1" />
+          <Fields>
+            <Field name="Product_ID" />
+            <Field name="Product_Name" />
+            <Field name="Product_Category" />
+            <Field name="Product_Cost" />
+            <Field name="Product_Price" />
+          </Fields>
+          <Data>
+            <r>
+              <c>100</c>
+              <c>Non-product</c>
+              <c>NoCat</c>
+              <c>$1</c>
+              <c>$1</c>
+            </r>
+          </Data>
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText />
+          <Left value="False" />
+        </Annotation>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxTextInput" />
+    </Node>
+    <Node ToolID="9">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
+        <Position x="510" y="438" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Products_before_join.csv</File>
+          <Passwords />
+          <Disable>False</Disable>
+          <FormatSpecificOptions>
+            <LineEndStyle>CRLF</LineEndStyle>
+            <Delimeter>,</Delimeter>
+            <ForceQuotes>False</ForceQuotes>
+            <HeaderRow>True</HeaderRow>
+            <CodePage>28591</CodePage>
+            <WriteBOM>True</WriteBOM>
+          </FormatSpecificOptions>
+          <MultiFile value="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>Products_before_join.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
+    </Node>
+    <Node ToolID="10">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
+        <Position x="774" y="414" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Join_out_R.csv</File>
+          <Passwords />
+          <Disable>False</Disable>
+          <FormatSpecificOptions>
+            <LineEndStyle>CRLF</LineEndStyle>
+            <Delimeter>,</Delimeter>
+            <ForceQuotes>False</ForceQuotes>
+            <HeaderRow>True</HeaderRow>
+            <CodePage>28591</CodePage>
+            <WriteBOM>True</WriteBOM>
+          </FormatSpecificOptions>
+          <MultiFile value="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>Join_out_R.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
+    </Node>
+    <Node ToolID="11">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
+        <Position x="774" y="282" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Join_out_J.csv</File>
+          <Passwords />
+          <Disable>False</Disable>
+          <FormatSpecificOptions>
+            <LineEndStyle>CRLF</LineEndStyle>
+            <Delimeter>,</Delimeter>
+            <ForceQuotes>False</ForceQuotes>
+            <HeaderRow>True</HeaderRow>
+            <CodePage>28591</CodePage>
+            <WriteBOM>True</WriteBOM>
+          </FormatSpecificOptions>
+          <MultiFile value="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>Join_out_J.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
+    </Node>
+    <Node ToolID="12">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
+        <Position x="774" y="138" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Join_out_L.csv</File>
+          <Passwords />
+          <Disable>False</Disable>
+          <FormatSpecificOptions>
+            <LineEndStyle>CRLF</LineEndStyle>
+            <Delimeter>,</Delimeter>
+            <ForceQuotes>False</ForceQuotes>
+            <HeaderRow>True</HeaderRow>
+            <CodePage>28591</CodePage>
+            <WriteBOM>True</WriteBOM>
+          </FormatSpecificOptions>
+          <MultiFile value="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>Join_out_L.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
+    </Node>
+  </Nodes>
+  <Connections>
+    <Connection>
+      <Origin ToolID="2" Connection="Output" />
+      <Destination ToolID="7" Connection="Input" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="3" Connection="Right" />
+      <Destination ToolID="10" Connection="Input" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="3" Connection="Join" />
+      <Destination ToolID="11" Connection="Input" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="3" Connection="Left" />
+      <Destination ToolID="12" Connection="Input" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="4" Connection="Output" />
+      <Destination ToolID="3" Connection="Left" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="6" Connection="Output" />
+      <Destination ToolID="3" Connection="Right" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="6" Connection="Output" />
+      <Destination ToolID="9" Connection="Input" />
+    </Connection>
+    <Connection name="#1">
+      <Origin ToolID="7" Connection="Output" />
+      <Destination ToolID="6" Connection="Input" />
+    </Connection>
+    <Connection name="#2">
+      <Origin ToolID="8" Connection="Output" />
+      <Destination ToolID="6" Connection="Input" />
+    </Connection>
+  </Connections>
+  <Properties>
+    <Memory default="True" />
+    <GlobalRecordLimit value="0" />
+    <TempFiles default="True" />
+    <Annotation on="True" includeToolName="False" />
+    <ConvErrorLimit value="10" />
+    <ConvErrorLimit_Stop value="False" />
+    <CancelOnError value="False" />
+    <DisableBrowse value="False" />
+    <EnablePerformanceProfiling value="False" />
+    <RunWithE2 value="True" />
+    <PredictiveToolsCodePage value="1252" />
+    <DisableAllOutput value="False" />
+    <ShowAllMacroMessages value="False" />
+    <ShowConnectionStatusIsOn value="True" />
+    <ShowConnectionStatusOnlyWhenRunning value="True" />
+    <ZoomLevel value="0" />
+    <LayoutType>Horizontal</LayoutType>
+    <MetaInfo>
+      <NameIsFileName value="True" />
+      <Name>JoinTesting</Name>
+      <Description />
+      <RootToolName />
+      <ToolVersion />
+      <ToolInDb value="False" />
+      <CategoryName />
+      <SearchTags />
+      <Author />
+      <Company />
+      <Copyright />
+      <DescriptionLink actual="" displayed="" />
+      <Example>
+        <Description />
+        <File />
+      </Example>
+      <WorkflowId value="b5723901-81a1-4ff2-8b26-42700d914cb2" />
+      <Telemetry>
+        <PreviousWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
+        <OriginWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
+      </Telemetry>
+    </MetaInfo>
+    <Events>
+      <Enabled value="True" />
+    </Events>
+  </Properties>
+</AlteryxDocument>
--- a/Alteryx_TestWorkflows/JoinTesting/JoinTesting.yxmd
+++ b/Alteryx_TestWorkflows/JoinTesting/JoinTesting.yxmd
@ -0,0 +1,388 @@
+<?xml version="1.0"?>
+<AlteryxDocument yxmdVer="2022.3">
+  <Nodes>
+    <Node ToolID="2">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
+        <Position x="162" y="378" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <Passwords />
+          <File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">..\Maven Toys Data\products.csv</File>
+          <FormatSpecificOptions>
+            <HeaderRow>True</HeaderRow>
+            <IgnoreErrors>False</IgnoreErrors>
+            <AllowShareWrite>False</AllowShareWrite>
+            <ImportLine>1</ImportLine>
+            <FieldLen>254</FieldLen>
+            <SingleThreadRead>False</SingleThreadRead>
+            <IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
+            <Delimeter>,</Delimeter>
+            <QuoteRecordBreak>False</QuoteRecordBreak>
+            <CodePage>28591</CodePage>
+          </FormatSpecificOptions>
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>products.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+        <MetaInfo connection="Output">
+          <RecordInfo>
+            <Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
+            <Field name="Product_Name" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
+            <Field name="Product_Category" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
+            <Field name="Product_Cost" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
+            <Field name="Product_Price" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
+          </RecordInfo>
+        </MetaInfo>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
+    </Node>
+    <Node ToolID="3">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.Join.Join">
+        <Position x="558" y="282" />
+      </GuiSettings>
+      <Properties>
+        <Configuration joinByRecordPos="False">
+          <JoinInfo connection="Left">
+            <Field field="Product_ID" />
+          </JoinInfo>
+          <JoinInfo connection="Right">
+            <Field field="Product_ID" />
+          </JoinInfo>
+          <SelectConfiguration>
+            <Configuration outputConnection="Join">
+              <OrderChanged value="False" />
+              <CommaDecimal value="False" />
+              <SelectFields>
+                <SelectField field="Right_Product_ID" selected="True" rename="Right_Product_ID" input="Right_" />
+                <SelectField field="*Unknown" selected="True" />
+              </SelectFields>
+            </Configuration>
+          </SelectConfiguration>
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText />
+          <Left value="False" />
+        </Annotation>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxJoin" />
+    </Node>
+    <Node ToolID="4">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
+        <Position x="162" y="270" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <Passwords />
+          <File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">..\Maven Toys Data\inventory.csv</File>
+          <FormatSpecificOptions>
+            <HeaderRow>True</HeaderRow>
+            <IgnoreErrors>False</IgnoreErrors>
+            <AllowShareWrite>False</AllowShareWrite>
+            <ImportLine>1</ImportLine>
+            <FieldLen>254</FieldLen>
+            <SingleThreadRead>False</SingleThreadRead>
+            <IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
+            <Delimeter>,</Delimeter>
+            <QuoteRecordBreak>False</QuoteRecordBreak>
+            <CodePage>28591</CodePage>
+          </FormatSpecificOptions>
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>inventory.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+        <MetaInfo connection="Output">
+          <RecordInfo>
+            <Field name="Store_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
+            <Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
+            <Field name="Stock_On_Hand" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
+          </RecordInfo>
+        </MetaInfo>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
+    </Node>
+    <Node ToolID="6">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.Union.Union">
+        <Position x="402" y="378" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <ByName_ErrorMode>Warning</ByName_ErrorMode>
+          <ByName_OutputMode>All</ByName_OutputMode>
+          <Mode>ByName</Mode>
+          <SetOutputOrder value="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText />
+          <Left value="False" />
+        </Annotation>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxUnion" />
+    </Node>
+    <Node ToolID="7">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.Sample.Sample">
+        <Position x="258" y="378" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <Mode>First</Mode>
+          <N>30</N>
+          <GroupFields orderChanged="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>First 30 rows</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxSample" />
+    </Node>
+    <Node ToolID="8">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
+        <Position x="258" y="474" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <NumRows value="1" />
+          <Fields>
+            <Field name="Product_ID" />
+            <Field name="Product_Name" />
+            <Field name="Product_Category" />
+            <Field name="Product_Cost" />
+            <Field name="Product_Price" />
+          </Fields>
+          <Data>
+            <r>
+              <c>100</c>
+              <c>Non-product</c>
+              <c>NoCat</c>
+              <c>$1</c>
+              <c>$1</c>
+            </r>
+          </Data>
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText />
+          <Left value="False" />
+        </Annotation>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxTextInput" />
+    </Node>
+    <Node ToolID="9">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
+        <Position x="510" y="438" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <File MaxRecords="" FileFormat="0">.\Output\Products_before_join.csv</File>
+          <Passwords />
+          <Disable>False</Disable>
+          <FormatSpecificOptions>
+            <LineEndStyle>CRLF</LineEndStyle>
+            <Delimeter>,</Delimeter>
+            <ForceQuotes>False</ForceQuotes>
+            <HeaderRow>True</HeaderRow>
+            <CodePage>28591</CodePage>
+            <WriteBOM>True</WriteBOM>
+          </FormatSpecificOptions>
+          <MultiFile value="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>Products_before_join.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
+    </Node>
+    <Node ToolID="10">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
+        <Position x="774" y="414" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <File MaxRecords="" FileFormat="0">.\Output\Join_out_R.csv</File>
+          <Passwords />
+          <Disable>False</Disable>
+          <FormatSpecificOptions>
+            <LineEndStyle>CRLF</LineEndStyle>
+            <Delimeter>,</Delimeter>
+            <ForceQuotes>False</ForceQuotes>
+            <HeaderRow>True</HeaderRow>
+            <CodePage>28591</CodePage>
+            <WriteBOM>True</WriteBOM>
+          </FormatSpecificOptions>
+          <MultiFile value="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>Join_out_R.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
+    </Node>
+    <Node ToolID="11">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
+        <Position x="774" y="282" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <File MaxRecords="" FileFormat="0">.\Output\Join_out_J.csv</File>
+          <Passwords />
+          <Disable>False</Disable>
+          <FormatSpecificOptions>
+            <LineEndStyle>CRLF</LineEndStyle>
+            <Delimeter>,</Delimeter>
+            <ForceQuotes>False</ForceQuotes>
+            <HeaderRow>True</HeaderRow>
+            <CodePage>28591</CodePage>
+            <WriteBOM>True</WriteBOM>
+          </FormatSpecificOptions>
+          <MultiFile value="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>Join_out_J.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
+    </Node>
+    <Node ToolID="12">
+      <GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
+        <Position x="774" y="138" />
+      </GuiSettings>
+      <Properties>
+        <Configuration>
+          <File MaxRecords="" FileFormat="0">.\Output\Join_out_L.csv</File>
+          <Passwords />
+          <Disable>False</Disable>
+          <FormatSpecificOptions>
+            <LineEndStyle>CRLF</LineEndStyle>
+            <Delimeter>,</Delimeter>
+            <ForceQuotes>False</ForceQuotes>
+            <HeaderRow>True</HeaderRow>
+            <CodePage>28591</CodePage>
+            <WriteBOM>True</WriteBOM>
+          </FormatSpecificOptions>
+          <MultiFile value="False" />
+        </Configuration>
+        <Annotation DisplayMode="0">
+          <Name />
+          <DefaultAnnotationText>Join_out_L.csv</DefaultAnnotationText>
+          <Left value="False" />
+        </Annotation>
+        <Dependencies>
+          <Implicit />
+        </Dependencies>
+      </Properties>
+      <EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
+    </Node>
+  </Nodes>
+  <Connections>
+    <Connection>
+      <Origin ToolID="2" Connection="Output" />
+      <Destination ToolID="7" Connection="Input" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="3" Connection="Right" />
+      <Destination ToolID="10" Connection="Input" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="3" Connection="Join" />
+      <Destination ToolID="11" Connection="Input" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="3" Connection="Left" />
+      <Destination ToolID="12" Connection="Input" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="4" Connection="Output" />
+      <Destination ToolID="3" Connection="Left" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="6" Connection="Output" />
+      <Destination ToolID="3" Connection="Right" />
+    </Connection>
+    <Connection>
+      <Origin ToolID="6" Connection="Output" />
+      <Destination ToolID="9" Connection="Input" />
+    </Connection>
+    <Connection name="#1">
+      <Origin ToolID="7" Connection="Output" />
+      <Destination ToolID="6" Connection="Input" />
+    </Connection>
+    <Connection name="#2">
+      <Origin ToolID="8" Connection="Output" />
+      <Destination ToolID="6" Connection="Input" />
+    </Connection>
+  </Connections>
+  <Properties>
+    <Memory default="True" />
+    <GlobalRecordLimit value="0" />
+    <TempFiles default="True" />
+    <Annotation on="True" includeToolName="False" />
+    <ConvErrorLimit value="10" />
+    <ConvErrorLimit_Stop value="False" />
+    <CancelOnError value="False" />
+    <DisableBrowse value="False" />
+    <EnablePerformanceProfiling value="False" />
+    <RunWithE2 value="True" />
+    <PredictiveToolsCodePage value="1252" />
+    <DisableAllOutput value="False" />
+    <ShowAllMacroMessages value="False" />
+    <ShowConnectionStatusIsOn value="True" />
+    <ShowConnectionStatusOnlyWhenRunning value="True" />
+    <ZoomLevel value="0" />
+    <LayoutType>Horizontal</LayoutType>
+    <MetaInfo>
+      <NameIsFileName value="True" />
+      <Name>JoinTesting</Name>
+      <Description />
+      <RootToolName />
+      <ToolVersion />
+      <ToolInDb value="False" />
+      <CategoryName />
+      <SearchTags />
+      <Author />
+      <Company />
+      <Copyright />
+      <DescriptionLink actual="" displayed="" />
+      <Example>
+        <Description />
+        <File />
+      </Example>
+      <WorkflowId value="b5723901-81a1-4ff2-8b26-42700d914cb2" />
+      <Telemetry>
+        <PreviousWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
+        <OriginWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
+      </Telemetry>
+    </MetaInfo>
+    <Events>
+      <Enabled value="True" />
+    </Events>
+  </Properties>
+</AlteryxDocument>
--- a/Alteryx_TestWorkflows/JoinTesting/Output/Join_out_J.csv
+++ b/Alteryx_TestWorkflows/JoinTesting/Output/Join_out_J.csv
--- a/Alteryx_TestWorkflows/JoinTesting/Output/Join_out_L.csv
+++ b/Alteryx_TestWorkflows/JoinTesting/Output/Join_out_L.csv
@ -0,0 +1,226 @@
+Left_Store_ID,Product_ID,Left_Stock_On_Hand
+15,31,4
+15,32,16
+15,33,8
+15,34,7
+16,31,14
+16,32,7
+16,33,6
+16,34,2
+16,35,6
+17,31,20
+17,32,15
+17,33,27
+17,34,11
+18,31,4
+18,32,9
+18,33,9
+18,34,8
+18,35,10
+19,31,4
+19,32,5
+19,33,0
+19,34,15
+19,35,14
+20,31,10
+20,32,9
+20,33,28
+20,34,19
+21,31,19
+21,32,3
+21,33,16
+21,34,16
+22,31,34
+22,32,38
+22,33,8
+22,34,6
+22,35,2
+23,31,19
+23,32,11
+23,33,6
+23,34,18
+23,35,4
+24,31,10
+24,32,10
+24,33,4
+24,34,17
+24,35,19
+25,31,0
+25,32,10
+25,33,4
+25,34,23
+26,31,4
+26,32,2
+26,33,2
+26,34,17
+26,35,8
+27,31,13
+27,32,6
+27,33,7
+27,34,9
+28,31,18
+28,32,3
+28,33,9
+28,34,19
+29,31,3
+29,32,7
+29,33,6
+29,34,16
+30,31,20
+30,32,13
+30,33,10
+30,34,18
+31,31,39
+31,32,12
+31,33,20
+31,34,20
+32,31,4
+32,32,8
+32,33,13
+32,34,20
+33,31,7
+33,32,15
+33,33,9
+33,34,14
+33,35,18
+34,31,30
+34,32,19
+34,33,9
+34,34,17
+34,35,20
+35,31,74
+35,32,20
+35,33,14
+35,34,9
+36,31,6
+36,32,7
+36,33,21
+36,34,2
+36,35,12
+37,31,14
+37,32,0
+37,33,10
+37,34,13
+37,35,14
+38,31,17
+38,32,20
+38,33,9
+38,34,18
+38,35,2
+39,31,15
+39,32,5
+39,33,14
+39,34,4
+40,31,5
+40,32,7
+40,33,16
+40,34,5
+41,31,18
+41,32,29
+41,33,13
+41,34,15
+41,35,10
+1,31,7
+1,32,4
+1,33,2
+1,34,0
+1,35,12
+2,31,18
+2,32,10
+2,33,11
+2,34,18
+3,31,29
+3,32,4
+3,33,4
+3,34,7
+4,31,35
+4,32,6
+4,33,2
+4,34,0
+4,35,4
+5,31,31
+5,32,10
+5,33,17
+5,34,10
+6,31,17
+6,32,7
+6,33,7
+6,34,8
+6,35,3
+7,31,15
+7,32,3
+7,33,18
+7,34,2
+7,35,17
+8,31,27
+8,32,7
+8,33,17
+8,34,18
+8,35,8
+9,31,6
+9,32,3
+9,33,9
+9,34,5
+9,35,4
+10,31,7
+10,32,13
+10,33,12
+10,34,16
+10,35,2
+11,31,20
+11,32,4
+11,33,6
+11,34,9
+12,31,13
+12,32,9
+12,33,5
+12,34,9
+12,35,9
+13,31,24
+13,32,7
+13,33,3
+13,34,3
+14,31,5
+14,32,2
+14,33,2
+14,34,8
+42,31,11
+42,32,4
+42,33,18
+42,34,34
+42,35,13
+43,31,18
+43,32,38
+43,33,5
+43,34,7
+44,31,8
+44,32,29
+44,33,0
+44,34,22
+45,31,6
+45,32,6
+45,33,7
+45,34,3
+46,31,13
+46,32,8
+46,33,11
+46,34,24
+47,31,48
+47,32,6
+47,33,13
+47,34,3
+48,31,41
+48,32,7
+48,33,0
+48,34,39
+48,35,3
+49,31,51
+49,32,11
+49,33,15
+49,34,2
+49,35,19
+50,31,18
+50,32,9
+50,33,1
+50,34,17
+50,35,8
--- a/Alteryx_TestWorkflows/JoinTesting/Output/Join_out_R.csv
+++ b/Alteryx_TestWorkflows/JoinTesting/Output/Join_out_R.csv
@ -0,0 +1,2 @@
+Product_ID,Right_Product_Name,Right_Product_Category,Right_Product_Cost,Right_Product_Price
+100,Non-product,NoCat,$1,$1
--- a/Alteryx_TestWorkflows/JoinTesting/Output/Products_before_join.csv
+++ b/Alteryx_TestWorkflows/JoinTesting/Output/Products_before_join.csv
@ -0,0 +1,32 @@
+Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
+1,Action Figure,Toys,$9.99,$15.99
+2,Animal Figures,Toys,$9.99,$12.99
+3,Barrel O' Slime,Art & Crafts,$1.99,$3.99
+4,Chutes & Ladders,Games,$9.99,$12.99
+5,Classic Dominoes,Games,$7.99,$9.99
+6,Colorbuds,Electronics,$6.99,$14.99
+7,Dart Gun,Sports & Outdoors,$11.99,$15.99
+8,Deck Of Cards,Games,$3.99,$6.99
+9,Dino Egg,Toys,$9.99,$10.99
+10,Dinosaur Figures,Toys,$10.99,$14.99
+11,Etch A Sketch,Art & Crafts,$10.99,$20.99
+12,Foam Disk Launcher,Sports & Outdoors,$8.99,$11.99
+13,Gamer Headphones,Electronics,$14.99,$20.99
+14,Glass Marbles,Games,$5.99,$10.99
+15,Hot Wheels 5-Pack,Toys,$3.99,$5.99
+16,Jenga,Games,$2.99,$9.99
+17,Kids Makeup Kit,Art & Crafts,$13.99,$19.99
+18,Lego Bricks,Toys,$34.99,$39.99
+19,Magic Sand,Art & Crafts,$13.99,$15.99
+20,Mini Basketball Hoop,Sports & Outdoors,$8.99,$24.99
+21,Mini Ping Pong Set,Sports & Outdoors,$6.99,$9.99
+22,Monopoly,Games,$13.99,$19.99
+23,Mr. Potatohead,Toys,$4.99,$9.99
+24,Nerf Gun,Sports & Outdoors,$14.99,$19.99
+25,PlayDoh Can,Art & Crafts,$1.99,$2.99
+26,PlayDoh Playset,Art & Crafts,$20.99,$24.99
+27,PlayDoh Toolkit,Art & Crafts,$3.99,$4.99
+28,Playfoam,Art & Crafts,$3.99,$10.99
+29,Plush Pony,Toys,$8.99,$19.99
+30,Rubik's Cube,Games,$17.99,$19.99
+100,Non-product,NoCat,$1,$1
--- a/Alteryx_TestWorkflows/JoinTesting/Output/expected/Join_out_J.csv
+++ b/Alteryx_TestWorkflows/JoinTesting/Output/expected/Join_out_J.csv
--- a/Alteryx_TestWorkflows/JoinTesting/Output/expected/Join_out_L.csv
+++ b/Alteryx_TestWorkflows/JoinTesting/Output/expected/Join_out_L.csv
@ -0,0 +1,226 @@
+Store_ID,Product_ID,Stock_On_Hand
+1,32,4
+2,32,10
+3,32,4
+4,32,6
+5,32,10
+6,32,7
+7,32,3
+8,32,7
+9,32,3
+10,32,13
+11,32,4
+12,32,9
+13,32,7
+14,32,2
+15,32,16
+16,32,7
+17,32,15
+18,32,9
+19,32,5
+20,32,9
+21,32,3
+22,32,38
+23,32,11
+24,32,10
+25,32,10
+26,32,2
+27,32,6
+28,32,3
+29,32,7
+30,32,13
+31,32,12
+32,32,8
+33,32,15
+34,32,19
+35,32,20
+36,32,7
+37,32,0
+38,32,20
+39,32,5
+40,32,7
+41,32,29
+42,32,4
+43,32,38
+44,32,29
+45,32,6
+46,32,8
+47,32,6
+48,32,7
+49,32,11
+50,32,9
+1,31,7
+2,31,18
+3,31,29
+4,31,35
+5,31,31
+6,31,17
+7,31,15
+8,31,27
+9,31,6
+10,31,7
+11,31,20
+12,31,13
+13,31,24
+14,31,5
+15,31,4
+16,31,14
+17,31,20
+18,31,4
+19,31,4
+20,31,10
+21,31,19
+22,31,34
+23,31,19
+24,31,10
+25,31,0
+26,31,4
+27,31,13
+28,31,18
+29,31,3
+30,31,20
+31,31,39
+32,31,4
+33,31,7
+34,31,30
+35,31,74
+36,31,6
+37,31,14
+38,31,17
+39,31,15
+40,31,5
+41,31,18
+42,31,11
+43,31,18
+44,31,8
+45,31,6
+46,31,13
+47,31,48
+48,31,41
+49,31,51
+50,31,18
+1,35,12
+4,35,4
+6,35,3
+7,35,17
+8,35,8
+9,35,4
+10,35,2
+12,35,9
+16,35,6
+18,35,10
+19,35,14
+22,35,2
+23,35,4
+24,35,19
+26,35,8
+33,35,18
+34,35,20
+36,35,12
+37,35,14
+38,35,2
+41,35,10
+42,35,13
+48,35,3
+49,35,19
+50,35,8
+1,34,0
+2,34,18
+3,34,7
+4,34,0
+5,34,10
+6,34,8
+7,34,2
+8,34,18
+9,34,5
+10,34,16
+11,34,9
+12,34,9
+13,34,3
+14,34,8
+15,34,7
+16,34,2
+17,34,11
+18,34,8
+19,34,15
+20,34,19
+21,34,16
+22,34,6
+23,34,18
+24,34,17
+25,34,23
+26,34,17
+27,34,9
+28,34,19
+29,34,16
+30,34,18
+31,34,20
+32,34,20
+33,34,14
+34,34,17
+35,34,9
+36,34,2
+37,34,13
+38,34,18
+39,34,4
+40,34,5
+41,34,15
+42,34,34
+43,34,7
+44,34,22
+45,34,3
+46,34,24
+47,34,3
+48,34,39
+49,34,2
+50,34,17
+1,33,2
+2,33,11
+3,33,4
+4,33,2
+5,33,17
+6,33,7
+7,33,18
+8,33,17
+9,33,9
+10,33,12
+11,33,6
+12,33,5
+13,33,3
+14,33,2
+15,33,8
+16,33,6
+17,33,27
+18,33,9
+19,33,0
+20,33,28
+21,33,16
+22,33,8
+23,33,6
+24,33,4
+25,33,4
+26,33,2
+27,33,7
+28,33,9
+29,33,6
+30,33,10
+31,33,20
+32,33,13
+33,33,9
+34,33,9
+35,33,14
+36,33,21
+37,33,10
+38,33,9
+39,33,14
+40,33,16
+41,33,13
+42,33,18
+43,33,5
+44,33,0
+45,33,7
+46,33,11
+47,33,13
+48,33,0
+49,33,15
+50,33,1
--- a/Alteryx_TestWorkflows/JoinTesting/Output/expected/Join_out_R.csv
+++ b/Alteryx_TestWorkflows/JoinTesting/Output/expected/Join_out_R.csv
@ -0,0 +1,2 @@
+Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
+100,Non-product,NoCat,$1,$1
--- a/Alteryx_TestWorkflows/JoinTesting/Output/expected/Products_before_join.csv
+++ b/Alteryx_TestWorkflows/JoinTesting/Output/expected/Products_before_join.csv
@ -0,0 +1,2 @@
+Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
+100,Non-product,NoCat,$1,$1
--- a/Data/SCHEMA_RELATIONSHIPS.md
+++ b/Data/SCHEMA_RELATIONSHIPS.md
@ -0,0 +1,100 @@
+# 📚 Maven Toys Dataset Schema Relationship Guide
+
+This document outlines the schema relationships and foreign key connections between all CSV files in this directory, suggesting how they can be joined for comprehensive data analysis.
+
+## 🧩 Entity/Dimension Tables (The "Who" and "What")
+
+These tables define core entities and are typically used as lookup tables.
+
+1.  **`stores`**: Information about the physical retail locations.
+    *   **Primary Key (PK):** `Store_ID`
+2.  **`products`**: Master list of all items sold.
+    *   **Primary Key (PK):** `Product_ID`
+3.  **`calendar`**: Time dimension data for the business.
+    *   **Primary Key (PK):** `Date` (Assuming unique dates are recorded)
+
+## 📊 Fact/Snapshot Tables (The "When" and "How Much")
+
+These tables record events, measurements, or snapshots in time that link the dimensions together.
+
+1.  **`sales`**: The core transaction log. *This is the most frequently joined table.*
+    *   **Foreign Keys (FKs):** `Store_ID` (references `stores`), `Product_ID` (references `products`).
+2.  **`inventory`**: Snapshot of stock levels at a point in time.
+    *   **Composite Key/FKs:** (`Store_ID`, `Product_ID`) $\to$ Links to both `stores` and `products`.
+3.  **`data_dictionary`**: Metadata describing the other fields (Not used for joins, but crucial for understanding column definitions).
+
+## 🗓️ Time Dimension
+
+*   The **`calendar`** table provides temporal context, which can be joined with `sales` records to analyze performance around holidays or specific periods.
+
+---
+
+# 🔗 Relationship Map and Join Paths
+
+The following sections show the explicit paths you can use for joining data in SQL or Python (Pandas/DuckDB).
+
+### 1. Sales Analysis Path
+*   **Goal:** Analyzing a transaction's details, location, and item description.
+*   **Join Chain:** `sales` $\to$ (`stores`, `products`)
+*   **Example Join:** `FROM sales s JOIN stores st ON s.Store_ID = st.Store_ID JOIN products p ON s.Product_ID = p.Product_ID;`
+
+### 2. Inventory Valuation Path
+*   **Goal:** Calculating the total value of current stock across all stores.
+*   **Join Chain:** `inventory` $\to$ (`stores`, `products`)
+*   **Example Join:** `FROM inventory i JOIN stores st ON i.Store_ID = st.Store_ID JOIN products p ON i.Product_ID = p.Product_ID;`
+
+### 3. Comprehensive Performance Path (The Full Picture)
+*   **Goal:** Linking sales performance to store location details and calendar dates.
+*   **Join Chain:** `sales` $\to$ (`stores`, `products`, `calendar`)
+*   **Notes:** You can join on the date field from both `sales` and `calendar`.
+
+---
+
+# 💡 Example Queries (Ready for Use)
+
+These queries demonstrate how to combine the tables.
+
+### 1. Total Revenue Over Time
+Calculate the total revenue generated month-by-month, showing store performance over time.
+
+```sql
+SELECT
+    strftime('%Y-%m', s.Date) AS sales_month, -- Grouping by Year and Month
+    st.Store_Name,
+    COUNT(DISTINCT p.Product_ID) AS distinct_products_sold,
+    SUM(s.Units * p.Product_Price) AS total_monthly_revenue
+FROM sales s
+JOIN stores st ON s.Store_ID = st.Store_ID
+JOIN products p ON s.Product_ID = p.Product_ID
+GROUP BY 1, 2
+ORDER BY 1 DESC, total_monthly_revenue DESC;
+```
+
+### 2. Top Performing Product/Category Analysis
+Identify the best-selling categories and the top 5 specific products by units sold.
+
+```sql
+SELECT
+    p.Product_Name,
+    p.Product_Category,
+    SUM(s.Units) AS total_units_sold
+FROM sales s
+JOIN products p ON s.Product_ID = p.Product_ID
+GROUP BY 1, 2
+ORDER BY total_units_sold DESC
+LIMIT 5;
+```
+
+### 3. Low Stock Alerts (Inventory Management)
+List all stores and products where the current stock is below a specified threshold (e.g., < 50 units).
+
+```sql
+SELECT
+    st.Store_Name,
+    p.Product_Name,
+    i.Stock_On_Hand
+FROM inventory i
+JOIN stores st ON i.Store_ID = st.Store_ID
+JOIN products p ON i.Product_ID = p.Product_ID
+WHERE i.Stock_On_Hand < 50;
+```
--- a/Alteryx_TestWorkflows/Maven
+++ b/Alteryx_TestWorkflows/Maven
@ -0,0 +1,639 @@
+Date
+1/1/2022
+1/2/2022
+1/3/2022
+1/4/2022
+1/5/2022
+1/6/2022
+1/7/2022
+1/8/2022
+1/9/2022
+1/10/2022
+1/11/2022
+1/12/2022
+1/13/2022
+1/14/2022
+1/15/2022
+1/16/2022
+1/17/2022
+1/18/2022
+1/19/2022
+1/20/2022
+1/21/2022
+1/22/2022
+1/23/2022
+1/24/2022
+1/25/2022
+1/26/2022
+1/27/2022
+1/28/2022
+1/29/2022
+1/30/2022
+1/31/2022
+2/1/2022
+2/2/2022
+2/3/2022
+2/4/2022
+2/5/2022
+2/6/2022
+2/7/2022
+2/8/2022
+2/9/2022
+2/10/2022
+2/11/2022
+2/12/2022
+2/13/2022
+2/14/2022
+2/15/2022
+2/16/2022
+2/17/2022
+2/18/2022
+2/19/2022
+2/20/2022
+2/21/2022
+2/22/2022
+2/23/2022
+2/24/2022
+2/25/2022
+2/26/2022
+2/27/2022
+2/28/2022
+3/1/2022
+3/2/2022
+3/3/2022
+3/4/2022
+3/5/2022
+3/6/2022
+3/7/2022
+3/8/2022
+3/9/2022
+3/10/2022
+3/11/2022
+3/12/2022
+3/13/2022
+3/14/2022
+3/15/2022
+3/16/2022
+3/17/2022
+3/18/2022
+3/19/2022
+3/20/2022
+3/21/2022
+3/22/2022
+3/23/2022
+3/24/2022
+3/25/2022
+3/26/2022
+3/27/2022
+3/28/2022
+3/29/2022
+3/30/2022
+3/31/2022
+4/1/2022
+4/2/2022
+4/3/2022
+4/4/2022
+4/5/2022
+4/6/2022
+4/7/2022
+4/8/2022
+4/9/2022
+4/10/2022
+4/11/2022
+4/12/2022
+4/13/2022
+4/14/2022
+4/15/2022
+4/16/2022
+4/17/2022
+4/18/2022
+4/19/2022
+4/20/2022
+4/21/2022
+4/22/2022
+4/23/2022
+4/24/2022
+4/25/2022
+4/26/2022
+4/27/2022
+4/28/2022
+4/29/2022
+4/30/2022
+5/1/2022
+5/2/2022
+5/3/2022
+5/4/2022
+5/5/2022
+5/6/2022
+5/7/2022
+5/8/2022
+5/9/2022
+5/10/2022
+5/11/2022
+5/12/2022
+5/13/2022
+5/14/2022
+5/15/2022
+5/16/2022
+5/17/2022
+5/18/2022
+5/19/2022
+5/20/2022
+5/21/2022
+5/22/2022
+5/23/2022
+5/24/2022
+5/25/2022
+5/26/2022
+5/27/2022
+5/28/2022
+5/29/2022
+5/30/2022
+5/31/2022
+6/1/2022
+6/2/2022
+6/3/2022
+6/4/2022
+6/5/2022
+6/6/2022
+6/7/2022
+6/8/2022
+6/9/2022
+6/10/2022
+6/11/2022
+6/12/2022
+6/13/2022
+6/14/2022
+6/15/2022
+6/16/2022
+6/17/2022
+6/18/2022
+6/19/2022
+6/20/2022
+6/21/2022
+6/22/2022
+6/23/2022
+6/24/2022
+6/25/2022
+6/26/2022
+6/27/2022
+6/28/2022
+6/29/2022
+6/30/2022
+7/1/2022
+7/2/2022
+7/3/2022
+7/4/2022
+7/5/2022
+7/6/2022
+7/7/2022
+7/8/2022
+7/9/2022
+7/10/2022
+7/11/2022
+7/12/2022
+7/13/2022
+7/14/2022
+7/15/2022
+7/16/2022
+7/17/2022
+7/18/2022
+7/19/2022
+7/20/2022
+7/21/2022
+7/22/2022
+7/23/2022
+7/24/2022
+7/25/2022
+7/26/2022
+7/27/2022
+7/28/2022
+7/29/2022
+7/30/2022
+7/31/2022
+8/1/2022
+8/2/2022
+8/3/2022
+8/4/2022
+8/5/2022
+8/6/2022
+8/7/2022
+8/8/2022
+8/9/2022
+8/10/2022
+8/11/2022
+8/12/2022
+8/13/2022
+8/14/2022
+8/15/2022
+8/16/2022
+8/17/2022
+8/18/2022
+8/19/2022
+8/20/2022
+8/21/2022
+8/22/2022
+8/23/2022
+8/24/2022
+8/25/2022
+8/26/2022
+8/27/2022
+8/28/2022
+8/29/2022
+8/30/2022
+8/31/2022
+9/1/2022
+9/2/2022
+9/3/2022
+9/4/2022
+9/5/2022
+9/6/2022
+9/7/2022
+9/8/2022
+9/9/2022
+9/10/2022
+9/11/2022
+9/12/2022
+9/13/2022
+9/14/2022
+9/15/2022
+9/16/2022
+9/17/2022
+9/18/2022
+9/19/2022
+9/20/2022
+9/21/2022
+9/22/2022
+9/23/2022
+9/24/2022
+9/25/2022
+9/26/2022
+9/27/2022
+9/28/2022
+9/29/2022
+9/30/2022
+10/1/2022
+10/2/2022
+10/3/2022
+10/4/2022
+10/5/2022
+10/6/2022
+10/7/2022
+10/8/2022
+10/9/2022
+10/10/2022
+10/11/2022
+10/12/2022
+10/13/2022
+10/14/2022
+10/15/2022
+10/16/2022
+10/17/2022
+10/18/2022
+10/19/2022
+10/20/2022
+10/21/2022
+10/22/2022
+10/23/2022
+10/24/2022
+10/25/2022
+10/26/2022
+10/27/2022
+10/28/2022
+10/29/2022
+10/30/2022
+10/31/2022
+11/1/2022
+11/2/2022
+11/3/2022
+11/4/2022
+11/5/2022
+11/6/2022
+11/7/2022
+11/8/2022
+11/9/2022
+11/10/2022
+11/11/2022
+11/12/2022
+11/13/2022
+11/14/2022
+11/15/2022
+11/16/2022
+11/17/2022
+11/18/2022
+11/19/2022
+11/20/2022
+11/21/2022
+11/22/2022
+11/23/2022
+11/24/2022
+11/25/2022
+11/26/2022
+11/27/2022
+11/28/2022
+11/29/2022
+11/30/2022
+12/1/2022
+12/2/2022
+12/3/2022
+12/4/2022
+12/5/2022
+12/6/2022
+12/7/2022
+12/8/2022
+12/9/2022
+12/10/2022
+12/11/2022
+12/12/2022
+12/13/2022
+12/14/2022
+12/15/2022
+12/16/2022
+12/17/2022
+12/18/2022
+12/19/2022
+12/20/2022
+12/21/2022
+12/22/2022
+12/23/2022
+12/24/2022
+12/25/2022
+12/26/2022
+12/27/2022
+12/28/2022
+12/29/2022
+12/30/2022
+12/31/2022
+1/1/2023
+1/2/2023
+1/3/2023
+1/4/2023
+1/5/2023
+1/6/2023
+1/7/2023
+1/8/2023
+1/9/2023
+1/10/2023
+1/11/2023
+1/12/2023
+1/13/2023
+1/14/2023
+1/15/2023
+1/16/2023
+1/17/2023
+1/18/2023
+1/19/2023
+1/20/2023
+1/21/2023
+1/22/2023
+1/23/2023
+1/24/2023
+1/25/2023
+1/26/2023
+1/27/2023
+1/28/2023
+1/29/2023
+1/30/2023
+1/31/2023
+2/1/2023
+2/2/2023
+2/3/2023
+2/4/2023
+2/5/2023
+2/6/2023
+2/7/2023
+2/8/2023
+2/9/2023
+2/10/2023
+2/11/2023
+2/12/2023
+2/13/2023
+2/14/2023
+2/15/2023
+2/16/2023
+2/17/2023
+2/18/2023
+2/19/2023
+2/20/2023
+2/21/2023
+2/22/2023
+2/23/2023
+2/24/2023
+2/25/2023
+2/26/2023
+2/27/2023
+2/28/2023
+3/1/2023
+3/2/2023
+3/3/2023
+3/4/2023
+3/5/2023
+3/6/2023
+3/7/2023
+3/8/2023
+3/9/2023
+3/10/2023
+3/11/2023
+3/12/2023
+3/13/2023
+3/14/2023
+3/15/2023
+3/16/2023
+3/17/2023
+3/18/2023
+3/19/2023
+3/20/2023
+3/21/2023
+3/22/2023
+3/23/2023
+3/24/2023
+3/25/2023
+3/26/2023
+3/27/2023
+3/28/2023
+3/29/2023
+3/30/2023
+3/31/2023
+4/1/2023
+4/2/2023
+4/3/2023
+4/4/2023
+4/5/2023
+4/6/2023
+4/7/2023
+4/8/2023
+4/9/2023
+4/10/2023
+4/11/2023
+4/12/2023
+4/13/2023
+4/14/2023
+4/15/2023
+4/16/2023
+4/17/2023
+4/18/2023
+4/19/2023
+4/20/2023
+4/21/2023
+4/22/2023
+4/23/2023
+4/24/2023
+4/25/2023
+4/26/2023
+4/27/2023
+4/28/2023
+4/29/2023
+4/30/2023
+5/1/2023
+5/2/2023
+5/3/2023
+5/4/2023
+5/5/2023
+5/6/2023
+5/7/2023
+5/8/2023
+5/9/2023
+5/10/2023
+5/11/2023
+5/12/2023
+5/13/2023
+5/14/2023
+5/15/2023
+5/16/2023
+5/17/2023
+5/18/2023
+5/19/2023
+5/20/2023
+5/21/2023
+5/22/2023
+5/23/2023
+5/24/2023
+5/25/2023
+5/26/2023
+5/27/2023
+5/28/2023
+5/29/2023
+5/30/2023
+5/31/2023
+6/1/2023
+6/2/2023
+6/3/2023
+6/4/2023
+6/5/2023
+6/6/2023
+6/7/2023
+6/8/2023
+6/9/2023
+6/10/2023
+6/11/2023
+6/12/2023
+6/13/2023
+6/14/2023
+6/15/2023
+6/16/2023
+6/17/2023
+6/18/2023
+6/19/2023
+6/20/2023
+6/21/2023
+6/22/2023
+6/23/2023
+6/24/2023
+6/25/2023
+6/26/2023
+6/27/2023
+6/28/2023
+6/29/2023
+6/30/2023
+7/1/2023
+7/2/2023
+7/3/2023
+7/4/2023
+7/5/2023
+7/6/2023
+7/7/2023
+7/8/2023
+7/9/2023
+7/10/2023
+7/11/2023
+7/12/2023
+7/13/2023
+7/14/2023
+7/15/2023
+7/16/2023
+7/17/2023
+7/18/2023
+7/19/2023
+7/20/2023
+7/21/2023
+7/22/2023
+7/23/2023
+7/24/2023
+7/25/2023
+7/26/2023
+7/27/2023
+7/28/2023
+7/29/2023
+7/30/2023
+7/31/2023
+8/1/2023
+8/2/2023
+8/3/2023
+8/4/2023
+8/5/2023
+8/6/2023
+8/7/2023
+8/8/2023
+8/9/2023
+8/10/2023
+8/11/2023
+8/12/2023
+8/13/2023
+8/14/2023
+8/15/2023
+8/16/2023
+8/17/2023
+8/18/2023
+8/19/2023
+8/20/2023
+8/21/2023
+8/22/2023
+8/23/2023
+8/24/2023
+8/25/2023
+8/26/2023
+8/27/2023
+8/28/2023
+8/29/2023
+8/30/2023
+8/31/2023
+9/1/2023
+9/2/2023
+9/3/2023
+9/4/2023
+9/5/2023
+9/6/2023
+9/7/2023
+9/8/2023
+9/9/2023
+9/10/2023
+9/11/2023
+9/12/2023
+9/13/2023
+9/14/2023
+9/15/2023
+9/16/2023
+9/17/2023
+9/18/2023
+9/19/2023
+9/20/2023
+9/21/2023
+9/22/2023
+9/23/2023
+9/24/2023
+9/25/2023
+9/26/2023
+9/27/2023
+9/28/2023
+9/29/2023
+9/30/2023
--- a/Alteryx_TestWorkflows/Maven
+++ b/Alteryx_TestWorkflows/Maven
@ -0,0 +1,20 @@
+Table,Field,Description
+Products,Product_ID,Product ID
+Products,Product_Name,Product name
+Products,Product_Category,Product Category
+Products,Product_Cost,Product cost ($USD)
+Products,Product_Price,Product retail price ($USD)
+Inventory,Store_ID,Store ID
+Inventory,Product_ID,Product ID
+Inventory,Stock_On_Hand,Stock quantity of the product in the store (inventory)
+Stores,Store_ID,Store ID
+Stores,Store_Name,Store name
+Stores,Store_City,City in Mexico where the store is located
+Stores,Store_Location,Location in the city where the store is located
+Stores,Store_Open_Date,Date when the store was opened
+Sales,Sale_ID,Sale ID
+Sales,Date,Date of the transaction
+Sales,Store_ID,Store ID
+Sales,Product_ID,Product ID
+Sales,Units,Units sold
+Calendar,Date,Calendar date
--- a/Alteryx_TestWorkflows/Maven
+++ b/Alteryx_TestWorkflows/Maven
--- a/Alteryx_TestWorkflows/Maven
+++ b/Alteryx_TestWorkflows/Maven
@ -0,0 +1,36 @@
+Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
+1,Action Figure,Toys,$9.99 ,$15.99 
+2,Animal Figures,Toys,$9.99 ,$12.99 
+3,Barrel O' Slime,Art & Crafts,$1.99 ,$3.99 
+4,Chutes & Ladders,Games,$9.99 ,$12.99 
+5,Classic Dominoes,Games,$7.99 ,$9.99 
+6,Colorbuds,Electronics,$6.99 ,$14.99 
+7,Dart Gun,Sports & Outdoors,$11.99 ,$15.99 
+8,Deck Of Cards,Games,$3.99 ,$6.99 
+9,Dino Egg,Toys,$9.99 ,$10.99 
+10,Dinosaur Figures,Toys,$10.99 ,$14.99 
+11,Etch A Sketch,Art & Crafts,$10.99 ,$20.99 
+12,Foam Disk Launcher,Sports & Outdoors,$8.99 ,$11.99 
+13,Gamer Headphones,Electronics,$14.99 ,$20.99 
+14,Glass Marbles,Games,$5.99 ,$10.99 
+15,Hot Wheels 5-Pack,Toys,$3.99 ,$5.99 
+16,Jenga,Games,$2.99 ,$9.99 
+17,Kids Makeup Kit,Art & Crafts,$13.99 ,$19.99 
+18,Lego Bricks,Toys,$34.99 ,$39.99 
+19,Magic Sand,Art & Crafts,$13.99 ,$15.99 
+20,Mini Basketball Hoop,Sports & Outdoors,$8.99 ,$24.99 
+21,Mini Ping Pong Set,Sports & Outdoors,$6.99 ,$9.99 
+22,Monopoly,Games,$13.99 ,$19.99 
+23,Mr. Potatohead,Toys,$4.99 ,$9.99 
+24,Nerf Gun,Sports & Outdoors,$14.99 ,$19.99 
+25,PlayDoh Can,Art & Crafts,$1.99 ,$2.99 
+26,PlayDoh Playset,Art & Crafts,$20.99 ,$24.99 
+27,PlayDoh Toolkit,Art & Crafts,$3.99 ,$4.99 
+28,Playfoam,Art & Crafts,$3.99 ,$10.99 
+29,Plush Pony,Toys,$8.99 ,$19.99 
+30,Rubik's Cube,Games,$17.99 ,$19.99 
+31,Splash Balls,Sports & Outdoors,$7.99 ,$8.99 
+32,Supersoaker Water Gun,Sports & Outdoors,$11.99 ,$14.99 
+33,Teddy Bear,Toys,$10.99 ,$12.99 
+34,Toy Robot,Electronics,$20.99 ,$25.99 
+35,Uno Card Game,Games,$3.99 ,$7.99 
--- a/Alteryx_TestWorkflows/Maven
+++ b/Alteryx_TestWorkflows/Maven
--- a/Alteryx_TestWorkflows/Maven
+++ b/Alteryx_TestWorkflows/Maven
@ -0,0 +1,51 @@
+Store_ID,Store_Name,Store_City,Store_Location,Store_Open_Date
+1,Maven Toys Guadalajara 1,Guadalajara,Residential,1992-09-18
+2,Maven Toys Monterrey 1,Monterrey,Residential,1995-04-27
+3,Maven Toys Guadalajara 2,Guadalajara,Commercial,1999-12-27
+4,Maven Toys Saltillo 1,Saltillo,Downtown,2000-01-01
+5,Maven Toys La Paz 1,La Paz,Downtown,2001-05-31
+6,Maven Toys Mexicali 1,Mexicali,Commercial,2003-12-13
+7,Maven Toys Monterrey 2,Monterrey,Downtown,2003-12-25
+8,Maven Toys Pachuca 1,Pachuca,Downtown,2004-10-14
+9,Maven Toys Ciudad de Mexico 1,Cuidad de Mexico,Downtown,2004-10-15
+10,Maven Toys Campeche 1,Campeche,Downtown,2005-01-14
+11,Maven Toys Cuernavaca 1,Cuernavaca,Downtown,2005-04-19
+12,Maven Toys Chetumal 1,Chetumal,Downtown,2006-05-05
+13,Maven Toys Mexicali 2,Mexicali,Downtown,2006-08-30
+14,Maven Toys Guanajuato 1,Guanajuato,Downtown,2007-01-31
+15,Maven Toys Tuxtla Gutierrez 1,Tuxtla Gutierrez,Downtown,2007-03-05
+16,Maven Toys San Luis Potosi 1,San Luis Potosi,Downtown,2007-05-19
+17,Maven Toys Toluca 1,Toluca,Downtown,2007-12-09
+18,Maven Toys Merida 1,Merida,Downtown,2008-08-22
+19,Maven Toys Puebla 1,Puebla,Commercial,2008-12-16
+20,Maven Toys Zacatecas 1,Zacatecas,Downtown,2009-05-29
+21,Maven Toys Santiago 1,Santiago,Downtown,2009-11-23
+22,Maven Toys Guanajuato 2,Guanajuato,Commercial,2010-03-29
+23,Maven Toys Chihuahua 1,Chihuahua,Commercial,2010-06-12
+24,Maven Toys Aguascalientes 1,Aguascalientes,Downtown,2010-07-31
+25,Maven Toys Ciudad Victoria 1,Ciudad Victoria,Downtown,2010-09-08
+26,Maven Toys Campeche 2,Campeche,Commercial,2010-09-15
+27,Maven Toys Oaxaca 1,Oaxaca,Downtown,2010-10-02
+28,Maven Toys Puebla 2,Puebla,Downtown,2011-04-01
+29,Maven Toys Xalapa 1,Xalapa,Commercial,2011-06-21
+30,Maven Toys Guadalajara 3,Guadalajara,Airport,2011-10-20
+31,Maven Toys Ciudad de Mexico 2,Cuidad de Mexico,Airport,2012-05-04
+32,Maven Toys Hermosillo 1,Hermosillo,Residential,2012-08-31
+33,Maven Toys Monterrey 3,Monterrey,Airport,2013-03-17
+34,Maven Toys Villahermosa 1,Villahermosa,Downtown,2013-06-07
+35,Maven Toys Chilpancingo 1,Chilpancingo,Downtown,2013-06-11
+36,Maven Toys Morelia 1,Morelia,Downtown,2013-07-01
+37,Maven Toys Ciudad de Mexico 3,Cuidad de Mexico,Residential,2013-11-28
+38,Maven Toys Chihuahua 2,Chihuahua,Downtown,2014-03-18
+39,Maven Toys Xalapa 2,Xalapa,Downtown,2014-04-21
+40,Maven Toys Toluca 2,Toluca,Commercial,2014-05-27
+41,Maven Toys Hermosillo 2,Hermosillo,Downtown,2014-06-01
+42,Maven Toys Hermosillo 3,Hermosillo,Commercial,2014-06-27
+43,Maven Toys Durango 1,Durango,Downtown,2014-06-30
+44,Maven Toys Puebla 3,Puebla,Residential,2014-12-27
+45,Maven Toys Ciudad de Mexico 4,Cuidad de Mexico,Commercial,2015-06-21
+46,Maven Toys Guadalajara 4,Guadalajara,Downtown,2015-10-31
+47,Maven Toys Monterrey 4,Monterrey,Commercial,2015-11-21
+48,Maven Toys Saltillo 2,Saltillo,Commercial,2016-03-23
+49,Maven Toys Culiacan 1,Culiacan,Downtown,2016-05-10
+50,Maven Toys Guanajuato 3,Guanajuato,Residential,2016-05-18
--- a/alteryx_runner/.DS_Store
+++ b/alteryx_runner/.DS_Store
--- a/alteryx_runner/init.py
+++ b/alteryx_runner/init.py
@ -0,0 +1,2 @@
+"""Alteryx workflow runner — Python-native .yxmd executor."""
+__version__ = "0.1.0"
--- a/alteryx_runner/main.py
+++ b/alteryx_runner/main.py
@ -0,0 +1,13 @@
+import sys
+from pathlib import Path
+
+# Add the package root to sys.path so imports resolve correctly when run with
+# `python -m alteryx_runner` from the project root.
+_pkg_dir = Path(__file__).parent  # alteryx_runner/
+if str(_pkg_dir) not in sys.path:
+    sys.path.insert(0, str(_pkg_dir))
+
+from cli import main
+
+if __name__ == "__main__":
+    main()
--- a/alteryx_runner/pycache/init.cpython-312.pyc
+++ b/alteryx_runner/pycache/init.cpython-312.pyc
--- a/alteryx_runner/pycache/init.cpython-313.pyc
+++ b/alteryx_runner/pycache/init.cpython-313.pyc
--- a/alteryx_runner/pycache/main.cpython-312.pyc
+++ b/alteryx_runner/pycache/main.cpython-312.pyc
--- a/alteryx_runner/pycache/main.cpython-313.pyc
+++ b/alteryx_runner/pycache/main.cpython-313.pyc
--- a/alteryx_runner/pycache/cli.cpython-312.pyc
+++ b/alteryx_runner/pycache/cli.cpython-312.pyc
--- a/alteryx_runner/pycache/cli.cpython-313.pyc
+++ b/alteryx_runner/pycache/cli.cpython-313.pyc
--- a/alteryx_runner/cli.py
+++ b/alteryx_runner/cli.py
@ -0,0 +1,94 @@
+"""CLI entry point: python -m alteryx_runner run workflow.yxmd [options]"""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import click
+import polars as pl
+
+
+@click.group()
+def main():
+    """Alteryx workflow runner — execute .yxmd files without Alteryx."""
+
+
+@main.command()
+@click.argument("workflow", type=click.Path(exists=True, path_type=Path))
+@click.option("--output-dir", default=None, type=click.Path(path_type=Path),
+              help="Write output files to this directory.")
+@click.option("--param", multiple=True, metavar="KEY=VALUE",
+              help="Set workflow constant (repeatable).")
+@click.option("--verbose", is_flag=True, default=False,
+              help="Print Browse results and execution log.")
+@click.option("--dry-run", is_flag=True, default=False,
+              help="Parse and validate only; do not execute.")
+@click.option("--format", "fmt",
+              type=click.Choice(["json", "csv", "parquet"]), default="csv",
+              help="Default output format for Browse nodes.")
+def run(
+    workflow: Path,
+    output_dir: Path | None,
+    param: tuple[str, ...],
+    verbose: bool,
+    dry_run: bool,
+    fmt: str,
+) -> None:
+    """Execute WORKFLOW (.yxmd file)."""
+    # Import here so CLI loads fast even if deps are missing
+    from engine.parser import parse_workflow
+    from engine.executor import execute
+    from engine.context import RunContext
+
+    params: dict[str, str] = {}
+    for p in param:
+        if "=" in p:
+            k, v = p.split("=", 1)
+            params[k.strip()] = v.strip()
+        else:
+            click.echo(f"Warning: --param {p!r} ignored (no '=' found)", err=True)
+
+    click.echo(f"Parsing {workflow} …")
+    try:
+        graph = parse_workflow(str(workflow))
+    except Exception as e:
+        click.echo(f"Parse error: {e}", err=True)
+        sys.exit(1)
+
+    click.echo(
+        f"  {len(graph.nodes)} nodes, {len(graph.connections)} connections"
+    )
+
+    if dry_run:
+        click.echo("Dry run complete — no execution.")
+        return
+
+    ctx = RunContext(
+        workflow_dir=str(workflow.parent),
+        verbose=verbose,
+        output_dir=str(output_dir) if output_dir else None,
+        params=params,
+    )
+
+    click.echo("Executing …")
+    try:
+        outputs = execute(graph, ctx)
+    except Exception as e:
+        click.echo(f"Execution error: {e}", err=True)
+        if verbose:
+            import traceback
+            traceback.print_exc()
+        sys.exit(1)
+
+    n_frames = sum(1 for df in outputs.values() if isinstance(df, pl.DataFrame) and len(df) > 0)
+    click.echo(f"Done. {n_frames} non-empty output frames produced.")
+
+
+@main.command("list-tools")
+def list_tools() -> None:
+    """List all registered tool Plugin strings."""
+    from tools import _REGISTRY
+    for plugin, cls in sorted(_REGISTRY.items()):
+        click.echo(f"  {plugin:<70} → {cls.__name__}")
+
+
+if __name__ == "__main__":
+    main()
--- a/alteryx_runner/engine/init.py
+++ b/alteryx_runner/engine/init.py
@ -0,0 +1,10 @@
+from .parser import parse_workflow
+from .executor import execute
+from .context import RunContext
+from .graph import WorkflowGraph, NodeDef, ConnectionDef, FieldDef
+from .type_mapper import TypeMapper
+
+__all__ = [
+    "parse_workflow", "execute", "RunContext",
+    "WorkflowGraph", "NodeDef", "ConnectionDef", "FieldDef", "TypeMapper",
+]
--- a/alteryx_runner/engine/pycache/init.cpython-312.pyc
+++ b/alteryx_runner/engine/pycache/init.cpython-312.pyc
--- a/alteryx_runner/engine/pycache/init.cpython-313.pyc
+++ b/alteryx_runner/engine/pycache/init.cpython-313.pyc
--- a/alteryx_runner/engine/pycache/context.cpython-312.pyc
+++ b/alteryx_runner/engine/pycache/context.cpython-312.pyc
--- a/alteryx_runner/engine/pycache/context.cpython-313.pyc
+++ b/alteryx_runner/engine/pycache/context.cpython-313.pyc
--- a/alteryx_runner/engine/pycache/executor.cpython-312.pyc
+++ b/alteryx_runner/engine/pycache/executor.cpython-312.pyc
--- a/alteryx_runner/engine/pycache/executor.cpython-313.pyc
+++ b/alteryx_runner/engine/pycache/executor.cpython-313.pyc
--- a/alteryx_runner/engine/pycache/graph.cpython-312.pyc
+++ b/alteryx_runner/engine/pycache/graph.cpython-312.pyc
--- a/alteryx_runner/engine/pycache/graph.cpython-313.pyc
+++ b/alteryx_runner/engine/pycache/graph.cpython-313.pyc
--- a/alteryx_runner/engine/pycache/parser.cpython-312.pyc
+++ b/alteryx_runner/engine/pycache/parser.cpython-312.pyc
--- a/alteryx_runner/engine/pycache/parser.cpython-313.pyc
+++ b/alteryx_runner/engine/pycache/parser.cpython-313.pyc
--- a/alteryx_runner/engine/pycache/type_mapper.cpython-312.pyc
+++ b/alteryx_runner/engine/pycache/type_mapper.cpython-312.pyc
--- a/alteryx_runner/engine/pycache/type_mapper.cpython-313.pyc
+++ b/alteryx_runner/engine/pycache/type_mapper.cpython-313.pyc
--- a/alteryx_runner/engine/context.py
+++ b/alteryx_runner/engine/context.py
@ -0,0 +1,52 @@
+from __future__ import annotations
+import tempfile
+from pathlib import Path
+import duckdb
+
+from .type_mapper import TypeMapper
+from expression.transpiler import ExpressionTranspiler
+
+
+class RunContext:
+    def __init__(
+        self,
+        workflow_dir: str,
+        verbose: bool = False,
+        output_dir: str | None = None,
+        params: dict | None = None,
+    ):
+        self.workflow_dir = Path(workflow_dir)
+        self.verbose = verbose
+        self.output_dir: Path | None = Path(output_dir) if output_dir else None
+        self.duckdb_con = duckdb.connect(":memory:")
+        self.temp_dir = Path(tempfile.mkdtemp(prefix="alteryx_runner_"))
+        self.type_mapper = TypeMapper()
+        self.transpiler = ExpressionTranspiler(self.duckdb_con)
+        self.constants: dict = params or {}
+
+    def resolve_path(self, path: str) -> Path:
+        # Normalise Windows backslashes so relative segments like .. work on
+        # POSIX platforms (workflow XMLs are authored on Windows).
+        path = path.replace("\\", "/")
+        path = path.replace("%temp%", str(self.temp_dir) + "/")
+        path = path.replace("%Desktop%", str(Path.home() / "Desktop") + "/")
+        # Substitute workflow constants
+        for k, v in self.constants.items():
+            path = path.replace(f"%{k}%", v)
+        p = Path(path)
+        if not p.is_absolute():
+            p = self.workflow_dir / p
+        # If output_dir override active, remap file-write destinations
+        return p
+
+    def resolve_output_path(self, path: str) -> Path:
+        p = self.resolve_path(path)
+        if self.output_dir is not None:
+            return self.output_dir / p.name
+        return p
+
+    def __del__(self):
+        try:
+            self.duckdb_con.close()
+        except Exception:
+            pass
--- a/alteryx_runner/engine/executor.py
+++ b/alteryx_runner/engine/executor.py
@ -0,0 +1,95 @@
+from __future__ import annotations
+from collections import defaultdict, deque
+import polars as pl
+
+from .graph import WorkflowGraph, ConnectionDef
+from .context import RunContext
+from tools import get_tool_class
+
+
+def execute(graph: WorkflowGraph, ctx: RunContext) -> dict[tuple, pl.DataFrame]:
+    """Execute a WorkflowGraph in topological (BFS) order."""
+    in_degree: dict[int, int] = defaultdict(int)
+    successors: dict[int, list[ConnectionDef]] = defaultdict(list)
+    predecessors: dict[int, list[ConnectionDef]] = defaultdict(list)
+
+    for c in graph.connections:
+        in_degree[c.dest_id] += 1
+        successors[c.origin_id].append(c)
+        predecessors[c.dest_id].append(c)
+
+    for tid in graph.nodes:
+        if tid not in in_degree:
+            in_degree[tid] = 0
+
+    # (tool_id, anchor) → DataFrame
+    outputs: dict[tuple[int, str], pl.DataFrame] = {}
+
+    queue: deque[int] = deque(
+        tid for tid, deg in in_degree.items() if deg == 0
+    )
+
+    while queue:
+        tid = queue.popleft()
+        node = graph.nodes[tid]
+        tool_cls = get_tool_class(node.plugin)
+
+        if tool_cls is None:
+            if ctx.verbose:
+                print(f"[SKIP] ToolID={tid} plugin={node.plugin!r} (unsupported)")
+            _passthrough(tid, predecessors, outputs, successors, in_degree, queue)
+            continue
+
+        tool = tool_cls(node, ctx)
+
+        inputs: dict[str, pl.DataFrame] = {}
+        # Track duplicate dest_anchors to handle multi-input tools like Union
+        anchor_counts: dict[str, int] = defaultdict(int)
+        for c in predecessors[tid]:
+            anchor_counts[c.dest_anchor] += 1
+        
+        for c in predecessors[tid]:
+            df = outputs.get((c.origin_id, c.origin_anchor))
+            if df is not None:
+                key = c.dest_anchor
+                # If multiple connections share the same dest_anchor,
+                # use the connection name (e.g., '#1', '#2') as the key
+                if anchor_counts[c.dest_anchor] > 1 and c.name:
+                    key = c.name
+                inputs[key] = df
+
+        if ctx.verbose:
+            print(f"[RUN ] ToolID={tid} plugin={node.plugin!r}")
+
+        result = tool.execute(inputs)
+
+        for anchor, df in result.items():
+            outputs[(tid, anchor)] = df
+
+        for c in successors[tid]:
+            in_degree[c.dest_id] -= 1
+            if in_degree[c.dest_id] == 0:
+                queue.append(c.dest_id)
+
+    return outputs
+
+
+def _passthrough(
+    tid: int,
+    predecessors: dict[int, list[ConnectionDef]],
+    outputs: dict[tuple[int, str], pl.DataFrame],
+    successors: dict[int, list[ConnectionDef]],
+    in_degree: dict[int, int],
+    queue: deque[int],
+) -> None:
+    """Propagate a single upstream output through a no-op node."""
+    preds = predecessors.get(tid, [])
+    df = pl.DataFrame()
+    if preds:
+        first = preds[0]
+        df = outputs.get((first.origin_id, first.origin_anchor), pl.DataFrame())
+    outputs[(tid, "Output")] = df
+    for c in successors.get(tid, []):
+        in_degree[c.dest_id] -= 1
+        if in_degree[c.dest_id] == 0:
+            queue.append(c.dest_id)
--- a/alteryx_runner/engine/graph.py
+++ b/alteryx_runner/engine/graph.py
@ -0,0 +1,38 @@
+from __future__ import annotations
+from dataclasses import dataclass, field
+from typing import Dict, List, Optional
+import xml.etree.ElementTree as ET
+
+
+@dataclass
+class FieldDef:
+    name: str
+    type: str
+    size: Optional[int] = None
+    source: Optional[str] = None
+
+
+@dataclass
+class NodeDef:
+    tool_id: int
+    plugin: str
+    config: Optional[ET.Element]
+    output_schema: List[FieldDef] = field(default_factory=list)
+    position: tuple = (0, 0)
+
+
+@dataclass
+class ConnectionDef:
+    origin_id: int
+    origin_anchor: str
+    dest_id: int
+    dest_anchor: str
+    name: Optional[str] = None
+    wireless: bool = False
+
+
+@dataclass
+class WorkflowGraph:
+    nodes: Dict[int, NodeDef]
+    connections: List[ConnectionDef]
+    properties: Optional[ET.Element]
--- a/alteryx_runner/engine/parser.py
+++ b/alteryx_runner/engine/parser.py
@ -0,0 +1,82 @@
+from __future__ import annotations
+import xml.etree.ElementTree as ET
+from typing import Dict, List, Optional
+from .graph import FieldDef, NodeDef, ConnectionDef, WorkflowGraph
+
+
+def parse_workflow(path: str) -> WorkflowGraph:
+    """Parse a .yxmd XML file into a WorkflowGraph."""
+    tree = ET.parse(path)
+    root = tree.getroot()
+
+    nodes: Dict[int, NodeDef] = {}
+    _collect_nodes(root, nodes)
+
+    connections: List[ConnectionDef] = []
+    for conn_el in root.findall("Connections/Connection"):
+        orig = conn_el.find("Origin")
+        dest = conn_el.find("Destination")
+        if orig is None or dest is None:
+            continue
+        connections.append(ConnectionDef(
+            origin_id=int(orig.attrib["ToolID"]),
+            origin_anchor=orig.attrib.get("Connection", "Output"),
+            dest_id=int(dest.attrib["ToolID"]),
+            dest_anchor=dest.attrib.get("Connection", "Input"),
+            name=conn_el.attrib.get("name"),
+            wireless=conn_el.attrib.get("Wireless", "False") == "True",
+        ))
+
+    props = root.find("Properties")
+    return WorkflowGraph(nodes=nodes, connections=connections, properties=props)
+
+
+def _collect_nodes(parent: ET.Element, nodes: Dict[int, NodeDef]) -> None:
+    """Recursively collect Node elements, flattening ChildNodes containers."""
+    for node_el in parent.findall("Nodes/Node"):
+        _parse_node(node_el, nodes)
+        # Recurse into ChildNodes (tool containers)
+        child_nodes = node_el.find("ChildNodes")
+        if child_nodes is not None:
+            for child in child_nodes.findall("Node"):
+                _parse_node(child, nodes)
+                grandchildren = child.find("ChildNodes")
+                if grandchildren is not None:
+                    _collect_nodes_flat(grandchildren, nodes)
+
+
+def _collect_nodes_flat(parent: ET.Element, nodes: Dict[int, NodeDef]) -> None:
+    for node_el in parent.findall("Node"):
+        _parse_node(node_el, nodes)
+        child_nodes = node_el.find("ChildNodes")
+        if child_nodes is not None:
+            _collect_nodes_flat(child_nodes, nodes)
+
+
+def _parse_node(node_el: ET.Element, nodes: Dict[int, NodeDef]) -> None:
+    tid = int(node_el.attrib["ToolID"])
+    gui = node_el.find("GuiSettings")
+    plugin = gui.attrib.get("Plugin", "") if gui is not None else ""
+    config = node_el.find("Properties/Configuration")
+    pos_el = gui.find("Position") if gui is not None else None
+    pos = (
+        int(pos_el.attrib.get("x", 0)),
+        int(pos_el.attrib.get("y", 0)),
+    ) if pos_el is not None else (0, 0)
+    schema = _parse_schema(node_el)
+    nodes[tid] = NodeDef(tool_id=tid, plugin=plugin, config=config,
+                         output_schema=schema, position=pos)
+
+
+def _parse_schema(node_el: ET.Element) -> List[FieldDef]:
+    fields = []
+    for f in node_el.findall(".//MetaInfo/RecordInfo/Field"):
+        size_str = f.attrib.get("size")
+        size = int(float(size_str)) if size_str else None
+        fields.append(FieldDef(
+            name=f.attrib["name"],
+            type=f.attrib.get("type", "V_String"),
+            size=size,
+            source=f.attrib.get("source"),
+        ))
+    return fields
--- a/alteryx_runner/engine/type_mapper.py
+++ b/alteryx_runner/engine/type_mapper.py
@ -0,0 +1,66 @@
+from __future__ import annotations
+from typing import Optional
+import polars as pl
+
+
+class TypeMapper:
+    """Maps Alteryx field types to Polars dtypes and DuckDB type strings."""
+
+    _POLARS: dict[str, pl.PolarsDataType] = {
+        "Bool": pl.Boolean,
+        "Byte": pl.UInt8,
+        "Int16": pl.Int16,
+        "Int32": pl.Int32,
+        "Int64": pl.Int64,
+        "Float": pl.Float32,
+        "Double": pl.Float64,
+        "String": pl.String,
+        "V_String": pl.String,
+        "WString": pl.String,
+        "V_WString": pl.String,
+        "Date": pl.Date,
+        "Time": pl.Time,
+        "DateTime": pl.Datetime,
+        "SpatialObj": pl.String,
+        "Blob": pl.Binary,
+    }
+
+    _DUCKDB: dict[str, str] = {
+        "Bool": "BOOLEAN",
+        "Byte": "UTINYINT",
+        "Int16": "SMALLINT",
+        "Int32": "INTEGER",
+        "Int64": "BIGINT",
+        "Float": "FLOAT",
+        "Double": "DOUBLE",
+        "String": "VARCHAR",
+        "V_String": "VARCHAR",
+        "WString": "VARCHAR",
+        "V_WString": "VARCHAR",
+        "Date": "DATE",
+        "Time": "TIME",
+        "DateTime": "TIMESTAMP",
+        "SpatialObj": "VARCHAR",
+        "Blob": "BLOB",
+        "FixedDecimal": "DECIMAL",
+    }
+
+    def map(self, alteryx_type: str, size: Optional[str] = None) -> pl.PolarsDataType:
+        if alteryx_type == "FixedDecimal":
+            if size:
+                parts = size.split(".")
+                precision = int(parts[0])
+                scale = int(parts[1]) if len(parts) > 1 else 0
+                return pl.Decimal(precision=precision, scale=scale)
+            return pl.Decimal(precision=19, scale=2)
+        return self._POLARS.get(alteryx_type, pl.String)
+
+    def map_duckdb(self, alteryx_type: str, size: Optional[str] = None) -> str:
+        if alteryx_type == "FixedDecimal":
+            if size:
+                parts = size.split(".")
+                precision = int(parts[0])
+                scale = int(parts[1]) if len(parts) > 1 else 0
+                return f"DECIMAL({precision},{scale})"
+            return "DECIMAL(19,2)"
+        return self._DUCKDB.get(alteryx_type, "VARCHAR")
--- a/alteryx_runner/expression/init.py
+++ b/alteryx_runner/expression/init.py
@ -0,0 +1,4 @@
+from .transpiler import ExpressionTranspiler, transpile, UnsupportedExpressionError
+from .functions import get_function_sql
+
+__all__ = ["ExpressionTranspiler", "transpile", "UnsupportedExpressionError", "get_function_sql"]
--- a/alteryx_runner/expression/pycache/init.cpython-312.pyc
+++ b/alteryx_runner/expression/pycache/init.cpython-312.pyc
--- a/alteryx_runner/expression/pycache/init.cpython-313.pyc
+++ b/alteryx_runner/expression/pycache/init.cpython-313.pyc
--- a/alteryx_runner/expression/pycache/functions.cpython-312.pyc
+++ b/alteryx_runner/expression/pycache/functions.cpython-312.pyc
--- a/alteryx_runner/expression/pycache/functions.cpython-313.pyc
+++ b/alteryx_runner/expression/pycache/functions.cpython-313.pyc
--- a/alteryx_runner/expression/pycache/transpiler.cpython-312.pyc
+++ b/alteryx_runner/expression/pycache/transpiler.cpython-312.pyc
--- a/alteryx_runner/expression/pycache/transpiler.cpython-313.pyc
+++ b/alteryx_runner/expression/pycache/transpiler.cpython-313.pyc
--- a/alteryx_runner/expression/functions.py
+++ b/alteryx_runner/expression/functions.py
@ -0,0 +1,152 @@
+"""Mapping of Alteryx built-in functions to DuckDB SQL equivalents."""
+from __future__ import annotations
+
+# Single-argument function translations (name → SQL template using {0}, {1}, etc.)
+FUNCTION_MAP: dict[str, str] = {
+    # String
+    "Uppercase": "UPPER({0})",
+    "Lowercase": "LOWER({0})",
+    "Trim": "TRIM({0})",
+    "LTrim": "LTRIM({0})",
+    "RTrim": "RTRIM({0})",
+    "Length": "LENGTH({0})",
+    "Left": "LEFT({0}, {1})",
+    "Right": "RIGHT({0}, {1})",
+    "Substring": "SUBSTR({0}, {1}, {2})",
+    "FindString": "INSTR({0}, {1})",
+    "ReplaceChar": "REPLACE({0}, {1}, {2})",
+    "StringToDate": "STRPTIME({0}, {1})",
+    "ToString": "PRINTF('%.' || {1} || 'f', {0})",
+    "Contains": "CONTAINS({0}, {1})",
+    "StartsWith": "STARTS_WITH({0}, {1})",
+    "EndsWith": "ENDS_WITH({0}, {1})",
+    "REGEX_Match": "REGEXP_MATCHES({0}, {1})",
+    "REGEX_Replace": "REGEXP_REPLACE({0}, {1}, {2})",
+    "PadLeft": "LPAD({0}, {1}, {2})",
+    "PadRight": "RPAD({0}, {1}, {2})",
+    "GetWord": "list_extract(str_split_regex({0}, '\\\\s+'), {1} + 1)",
+    "CountWords": "array_length(str_split_regex(TRIM({0}), '\\\\s+'))",
+    "CharFromInt": "chr({0}::INTEGER)",
+    "IntFromChar": "ascii({0})",
+    "ConvertFromCodePage": "{0}",
+    "ReverseString": "reverse({0})",
+    "DecomposeUnicodeForMatch": "strip_accents(UPPER({0}))",
+    # Math
+    "ABS": "ABS({0})",
+    "Abs": "ABS({0})",
+    "CEIL": "CEIL({0})",
+    "Ceil": "CEIL({0})",
+    "FLOOR": "FLOOR({0})",
+    "Floor": "FLOOR({0})",
+    "ROUND": "ROUND({0}, {1})",
+    "Round": "ROUND({0}, {1})",
+    "SQRT": "SQRT({0})",
+    "Sqrt": "SQRT({0})",
+    "POW": "POWER({0}, {1})",
+    "Pow": "POWER({0}, {1})",
+    "LOG": "LN({0})",
+    "Log": "LN({0})",
+    "LOG10": "LOG10({0})",
+    "Log10": "LOG10({0})",
+    "MOD": "({0} % {1})",
+    "Mod": "({0} % {1})",
+    "MIN": "LEAST({0}, {1})",
+    "Max": "GREATEST({0}, {1})",
+    "MAX": "GREATEST({0}, {1})",
+    "Min": "LEAST({0}, {1})",
+    "RandInt": "FLOOR(RANDOM() * {0})::BIGINT",
+    "Random": "RANDOM()",
+    "PI": "PI()",
+    "SIN": "SIN({0})",
+    "COS": "COS({0})",
+    "TAN": "TAN({0})",
+    "ASIN": "ASIN({0})",
+    "ACOS": "ACOS({0})",
+    "ATAN": "ATAN({0})",
+    "ATAN2": "ATAN2({0}, {1})",
+    "EXP": "EXP({0})",
+    "Sign": "SIGN({0})",
+    # Null handling
+    "IsNull": "({0} IS NULL)",
+    "IsEmpty": "({0} IS NULL OR {0} = '')",
+    "NullConvert": "NULLIF({0}, '')",
+    "Null": "NULL",
+    # Type conversion
+    "ToNumber": "TRY_CAST({0} AS DOUBLE)",
+    "ToString_num": "CAST({0} AS VARCHAR)",
+    "TOBOOL": "CAST({0} AS BOOLEAN)",
+    # Date/Time
+    "DateTimeNow": "NOW()",
+    "DateTimeToday": "CURRENT_DATE",
+    "DateTimeAdd": "({0} + INTERVAL ({1}) {2})",
+    "DateTimeDiff": "DATEDIFF({2}, {1}, {0})",
+    "DateTimeFormat": "STRFTIME({0}, {1})",
+    "ToDate": "CAST({0} AS DATE)",
+    "DateTimeYear": "YEAR({0})",
+    "DateTimeMonth": "MONTH({0})",
+    "DateTimeDay": "DAY({0})",
+    "DateTimeHour": "HOUR({0})",
+    "DateTimeMinute": "MINUTE({0})",
+    "DateTimeSecond": "SECOND({0})",
+    "DateTimeFirstOfMonth": "DATE_TRUNC('month', {0})",
+    "DateTimeLastOfMonth": "(DATE_TRUNC('month', {0}) + INTERVAL '1 month' - INTERVAL '1 day')::DATE",
+    "DateTimeFirstOfYear": "DATE_TRUNC('year', {0})",
+    "DateTimeQuarter": "QUARTER({0})",
+    "DateTimeTrim": "DATE_TRUNC({1}, {0})",
+    # Conditional
+    "IIF": "(CASE WHEN {0} THEN {1} ELSE {2} END)",
+    "Switch": None,  # handled separately
+    # Misc
+    "TOPN": None,   # not a scalar function
+}
+
+
+def get_function_sql(name: str, args: list[str]) -> str:
+    """Render a function call to DuckDB SQL given evaluated argument SQL strings."""
+    # Case-insensitive lookup
+    template = FUNCTION_MAP.get(name)
+    if template is None:
+        canon = name.lower()
+        for k, v in FUNCTION_MAP.items():
+            if k.lower() == canon:
+                template = v
+                break
+
+    if template is None:
+        # Unknown function — pass through as-is (may work in DuckDB natively)
+        args_joined = ", ".join(args)
+        return f"{name}({args_joined})"
+
+    if name in ("Switch", "switch"):
+        return _render_switch(args)
+
+    try:
+        result = template
+        for i, arg in enumerate(args):
+            result = result.replace(f"{{{i}}}", arg)
+        return result
+    except Exception:
+        args_joined = ", ".join(args)
+        return f"{name}({args_joined})"
+
+
+def _render_switch(args: list[str]) -> str:
+    """Switch(val, default, v1, r1, v2, r2, ...) → CASE val WHEN v1 THEN r1 ... ELSE default END"""
+    if len(args) < 2:
+        return "NULL"
+    val = args[0]
+    default = args[1]
+    pairs = args[2:]
+    cases = []
+    for i in range(0, len(pairs) - 1, 2):
+        cases.append(f"WHEN {pairs[i]} THEN {pairs[i+1]}")
+    cases_sql = " ".join(cases)
+    return f"CASE {val} {cases_sql} ELSE {default} END"
+
+
+def titlecase_sql(col: str) -> str:
+    """Approximate Titlecase via DuckDB: capitalise first letter of each word."""
+    return (
+        f"array_to_string(list_transform(str_split({col}, ' '), "
+        f"x -> UPPER(LEFT(x,1)) || LOWER(SUBSTR(x,2))), ' ')"
+    )
--- a/alteryx_runner/expression/transpiler.py
+++ b/alteryx_runner/expression/transpiler.py
@ -0,0 +1,630 @@
+"""
+Alteryx expression → DuckDB SQL transpiler.
+
+Handles:
+  [ColumnName]       → "ColumnName"
+  "string"           → 'string'  (double → single quotes)
+  IF...THEN...ENDIF  → CASE WHEN...END
+  IIF(c,t,f)         → CASE WHEN c THEN t ELSE f END
+  IsNull/IsEmpty     → IS NULL checks
+  NULL()             → NULL
+  AND/OR/NOT         → AND/OR/NOT
+  == / !=            → = / <>
+  Row references     → not supported in SQL mode (raises)
+  All functions in expression/functions.py
+"""
+from __future__ import annotations
+import re
+from enum import Enum, auto
+from typing import Optional
+import polars as pl
+import duckdb
+
+from .functions import get_function_sql, titlecase_sql
+
+
+class UnsupportedExpressionError(Exception):
+    pass
+
+
+# ---------------------------------------------------------------------------
+# Tokeniser
+# ---------------------------------------------------------------------------
+
+class TT(Enum):
+    LBRACKET = auto()   # [
+    RBRACKET = auto()   # ]
+    LPAREN = auto()     # (
+    RPAREN = auto()     # )
+    COMMA = auto()      # ,
+    PLUS = auto()       # +
+    MINUS = auto()      # -
+    STAR = auto()       # *
+    SLASH = auto()      # /
+    PERCENT = auto()    # %
+    CONCAT = auto()     # +  (string, same as PLUS — resolved by context)
+    EQ = auto()         # == or =
+    NEQ = auto()        # != or <>
+    LT = auto()         # <
+    LE = auto()         # <=
+    GT = auto()         # >
+    GE = auto()         # >=
+    AND = auto()
+    OR = auto()
+    NOT = auto()
+    IF = auto()
+    THEN = auto()
+    ELSEIF = auto()
+    ELSE = auto()
+    ENDIF = auto()
+    IIF = auto()
+    NULL_FUNC = auto()  # NULL()
+    ISNULL = auto()
+    ISEMPTY = auto()
+    NUMBER = auto()
+    STRING = auto()     # double-quoted string literal
+    IDENT = auto()      # function name or keyword
+    COLUMN = auto()     # [ColName]  — after stripping brackets
+    EOF = auto()
+    BANG = auto()       # !  (prefix not)
+    PIPE2 = auto()      # ||  (string concat in SQL)
+    POWER = auto()      # ^
+
+
+_KEYWORDS = {
+    "AND": TT.AND,
+    "OR": TT.OR,
+    "NOT": TT.NOT,
+    "IF": TT.IF,
+    "THEN": TT.THEN,
+    "ELSEIF": TT.ELSEIF,
+    "ELSE": TT.ELSE,
+    "ENDIF": TT.ENDIF,
+    "IIF": TT.IIF,
+    "NULL": TT.NULL_FUNC,
+    "ISNULL": TT.ISNULL,
+    "ISEMPTY": TT.ISEMPTY,
+    "ISNUMBER": TT.IDENT,  # keep as IDENT, handled in primary
+    "TRUE": TT.IDENT,
+    "FALSE": TT.IDENT,
+}
+
+
+class Token:
+    __slots__ = ("type", "value")
+
+    def __init__(self, type_: TT, value: object = None):
+        self.type = type_
+        self.value = value
+
+    def __repr__(self):
+        return f"Token({self.type}, {self.value!r})"
+
+
+_TOKEN_RE = re.compile(
+    r"""
+    (?P<SPACE>\s+)
+  | (?P<COLUMN>\[[^\]]*\])
+  | (?P<NUMBER>-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)
+  | (?P<STRING>"(?:[^"\\]|\\.)*")
+  | (?P<LE><=)
+  | (?P<GE>>=)
+  | (?P<NEQ>!=|<>)
+  | (?P<EQ>==|=)
+  | (?P<LT><)
+  | (?P<GT>>)
+  | (?P<PIPE2>\|\|)
+  | (?P<CONCAT>\+)
+  | (?P<MINUS>-)
+  | (?P<STAR>\*)
+  | (?P<SLASH>/)
+  | (?P<PERCENT>%)
+  | (?P<POWER>\^)
+  | (?P<BANG>!)
+  | (?P<LPAREN>\()
+  | (?P<RPAREN>\))
+  | (?P<COMMA>,)
+  | (?P<IDENT>[A-Za-z_]\w*)
+""",
+    re.VERBOSE,
+)
+
+
+def tokenise(text: str) -> list[Token]:
+    tokens: list[Token] = []
+    pos = 0
+    while pos < len(text):
+        m = _TOKEN_RE.match(text, pos)
+        if not m:
+            raise UnsupportedExpressionError(
+                f"Unexpected character {text[pos]!r} at pos {pos} in: {text!r}"
+            )
+        pos = m.end()
+        kind = m.lastgroup
+        raw = m.group()
+
+        if kind == "SPACE":
+            continue
+        if kind == "COLUMN":
+            tokens.append(Token(TT.COLUMN, raw[1:-1]))  # strip [ ]
+        elif kind == "NUMBER":
+            tokens.append(Token(TT.NUMBER, raw))
+        elif kind == "STRING":
+            # Convert double-quoted Alteryx string to single-quoted SQL
+            inner = raw[1:-1].replace("\\'", "'").replace("'", "''").replace('\\"', '"')
+            tokens.append(Token(TT.STRING, inner))
+        elif kind == "LE":
+            tokens.append(Token(TT.LE))
+        elif kind == "GE":
+            tokens.append(Token(TT.GE))
+        elif kind == "NEQ":
+            tokens.append(Token(TT.NEQ))
+        elif kind == "EQ":
+            tokens.append(Token(TT.EQ))
+        elif kind == "LT":
+            tokens.append(Token(TT.LT))
+        elif kind == "GT":
+            tokens.append(Token(TT.GT))
+        elif kind == "PIPE2":
+            tokens.append(Token(TT.PIPE2))
+        elif kind == "CONCAT":
+            tokens.append(Token(TT.PLUS))
+        elif kind == "MINUS":
+            tokens.append(Token(TT.MINUS))
+        elif kind == "STAR":
+            tokens.append(Token(TT.STAR))
+        elif kind == "SLASH":
+            tokens.append(Token(TT.SLASH))
+        elif kind == "PERCENT":
+            tokens.append(Token(TT.PERCENT))
+        elif kind == "POWER":
+            tokens.append(Token(TT.POWER))
+        elif kind == "BANG":
+            tokens.append(Token(TT.BANG))
+        elif kind == "LPAREN":
+            tokens.append(Token(TT.LPAREN))
+        elif kind == "RPAREN":
+            tokens.append(Token(TT.RPAREN))
+        elif kind == "COMMA":
+            tokens.append(Token(TT.COMMA))
+        elif kind == "IDENT":
+            upper = raw.upper()
+            tt = _KEYWORDS.get(upper, TT.IDENT)
+            tokens.append(Token(tt, raw))
+        else:
+            raise UnsupportedExpressionError(f"Unhandled token kind {kind}")
+
+    tokens.append(Token(TT.EOF))
+    return tokens
+
+
+# ---------------------------------------------------------------------------
+# Parser / code generator  (recursive descent → DuckDB SQL string)
+# ---------------------------------------------------------------------------
+
+class _Parser:
+    def __init__(self, tokens: list[Token]):
+        self._tokens = tokens
+        self._pos = 0
+
+    @property
+    def _cur(self) -> Token:
+        return self._tokens[self._pos]
+
+    def _peek(self, offset: int = 1) -> Token:
+        idx = self._pos + offset
+        if idx >= len(self._tokens):
+            return Token(TT.EOF)
+        return self._tokens[idx]
+
+    def _advance(self) -> Token:
+        tok = self._tokens[self._pos]
+        self._pos += 1
+        return tok
+
+    def _expect(self, tt: TT) -> Token:
+        tok = self._advance()
+        if tok.type != tt:
+            raise UnsupportedExpressionError(
+                f"Expected {tt}, got {tok.type} ({tok.value!r})"
+            )
+        return tok
+
+    # ------------------------------------------------------------------ #
+
+    def parse(self) -> str:
+        sql = self._parse_expr()
+        if self._cur.type != TT.EOF:
+            raise UnsupportedExpressionError(
+                f"Unexpected token at end: {self._cur}"
+            )
+        return sql
+
+    def _parse_expr(self) -> str:
+        return self._parse_or()
+
+    def _parse_or(self) -> str:
+        left = self._parse_and()
+        while self._cur.type == TT.OR:
+            self._advance()
+            right = self._parse_and()
+            left = f"({left} OR {right})"
+        return left
+
+    def _parse_and(self) -> str:
+        left = self._parse_not()
+        while self._cur.type == TT.AND:
+            self._advance()
+            right = self._parse_not()
+            left = f"({left} AND {right})"
+        return left
+
+    def _parse_not(self) -> str:
+        if self._cur.type in (TT.NOT, TT.BANG):
+            self._advance()
+            operand = self._parse_not()
+            return f"(NOT {operand})"
+        return self._parse_comparison()
+
+    def _parse_comparison(self) -> str:
+        left = self._parse_additive()
+        cmp_map = {
+            TT.EQ: "=",
+            TT.NEQ: "<>",
+            TT.LT: "<",
+            TT.LE: "<=",
+            TT.GT: ">",
+            TT.GE: ">=",
+        }
+        if self._cur.type in cmp_map:
+            op = cmp_map[self._advance().type]
+            right = self._parse_additive()
+            return f"({left} {op} {right})"
+        return left
+
+    def _parse_additive(self) -> str:
+        left = self._parse_multiplicative()
+        while self._cur.type in (TT.PLUS, TT.MINUS, TT.PIPE2):
+            op = self._advance()
+            right = self._parse_multiplicative()
+            if op.type == TT.PIPE2:
+                left = f"({left} || {right})"
+            elif op.type == TT.MINUS:
+                left = f"({left} - {right})"
+            else:
+                left = f"({left} + {right})"
+        return left
+
+    def _parse_multiplicative(self) -> str:
+        left = self._parse_unary()
+        while self._cur.type in (TT.STAR, TT.SLASH, TT.PERCENT, TT.POWER):
+            op = self._advance()
+            right = self._parse_unary()
+            if op.type == TT.POWER:
+                left = f"POWER({left}, {right})"
+            elif op.type == TT.PERCENT:
+                left = f"({left} % {right})"
+            elif op.type == TT.SLASH:
+                left = f"({left} / {right})"
+            else:
+                left = f"({left} * {right})"
+        return left
+
+    def _parse_unary(self) -> str:
+        if self._cur.type == TT.MINUS:
+            self._advance()
+            return f"(-{self._parse_primary()})"
+        if self._cur.type == TT.PLUS:
+            self._advance()
+            return self._parse_primary()
+        return self._parse_primary()
+
+    def _parse_primary(self) -> str:  # noqa: C901 (complexity ok for parser)
+        tok = self._cur
+
+        # Parenthesised sub-expression
+        if tok.type == TT.LPAREN:
+            self._advance()
+            inner = self._parse_expr()
+            self._expect(TT.RPAREN)
+            return f"({inner})"
+
+        # Column reference
+        if tok.type == TT.COLUMN:
+            self._advance()
+            # Row reference [Row-N:Field] or [Row+N:Field]
+            col = tok.value
+            row_m = re.match(r"^Row([+-]\d+):(.+)$", col, re.IGNORECASE)
+            if row_m:
+                offset = int(row_m.group(1))
+                field = row_m.group(2)
+                func = "LAG" if offset < 0 else "LEAD"
+                return f'{func}("{field}", {abs(offset)}) OVER ()'
+            return f'"{col}"'
+
+        # Numeric literal
+        if tok.type == TT.NUMBER:
+            self._advance()
+            return tok.value
+
+        # String literal (already converted to single-quoted)
+        if tok.type == TT.STRING:
+            self._advance()
+            return f"'{tok.value}'"
+
+        # IF … THEN … [ELSEIF … THEN …]* [ELSE …] ENDIF
+        if tok.type == TT.IF:
+            return self._parse_if()
+
+        # NULL() or bare NULL keyword
+        if tok.type == TT.NULL_FUNC:
+            self._advance()
+            if self._cur.type == TT.LPAREN:
+                self._advance()
+                self._expect(TT.RPAREN)
+            return "NULL"
+
+        # IsNull([F]) — keyword form
+        if tok.type == TT.ISNULL:
+            self._advance()
+            self._expect(TT.LPAREN)
+            inner = self._parse_expr()
+            self._expect(TT.RPAREN)
+            return f"({inner} IS NULL)"
+
+        # IsEmpty([F]) — keyword form
+        if tok.type == TT.ISEMPTY:
+            self._advance()
+            self._expect(TT.LPAREN)
+            inner = self._parse_expr()
+            self._expect(TT.RPAREN)
+            return f"({inner} IS NULL OR {inner} = '')"
+
+        # Function call or bare identifier
+        if tok.type == TT.IDENT:
+            name = tok.value
+            upper = name.upper()
+            self._advance()
+
+            # Bare boolean/null literals
+            if upper == "TRUE":
+                return "TRUE"
+            if upper == "FALSE":
+                return "FALSE"
+            if upper == "NULL":
+                if self._cur.type == TT.LPAREN:
+                    self._advance()
+                    self._expect(TT.RPAREN)
+                return "NULL"
+
+            # IsNull / IsEmpty used as plain identifiers (case variations)
+            if upper == "ISNULL":
+                self._expect(TT.LPAREN)
+                inner = self._parse_expr()
+                self._expect(TT.RPAREN)
+                return f"({inner} IS NULL)"
+            if upper == "ISEMPTY":
+                self._expect(TT.LPAREN)
+                inner = self._parse_expr()
+                self._expect(TT.RPAREN)
+                return f"({inner} IS NULL OR {inner} = '')"
+            if upper == "ISNUMBER":
+                self._expect(TT.LPAREN)
+                inner = self._parse_expr()
+                self._expect(TT.RPAREN)
+                return f"(TRY_CAST({inner} AS DOUBLE) IS NOT NULL)"
+
+            # Titlecase — special SQL rendering
+            if upper == "TITLECASE":
+                self._expect(TT.LPAREN)
+                inner = self._parse_expr()
+                self._expect(TT.RPAREN)
+                return titlecase_sql(inner)
+
+            # DateTimeAdd / DateTimeDiff need string arg unquoted for INTERVAL
+            if upper == "DATETIMEADD":
+                self._expect(TT.LPAREN)
+                d_arg = self._parse_expr()
+                self._expect(TT.COMMA)
+                n_arg = self._parse_expr()
+                self._expect(TT.COMMA)
+                unit_arg = self._parse_expr()
+                self._expect(TT.RPAREN)
+                # unit_arg is a SQL string like 'days' — strip quotes for INTERVAL keyword
+                unit = unit_arg.strip("'").rstrip("s").upper()
+                return f"({d_arg} + INTERVAL ({n_arg}) {unit})"
+            if upper == "DATETIMEDIFF":
+                self._expect(TT.LPAREN)
+                d1 = self._parse_expr()
+                self._expect(TT.COMMA)
+                d2 = self._parse_expr()
+                self._expect(TT.COMMA)
+                unit_arg = self._parse_expr()
+                self._expect(TT.RPAREN)
+                unit = unit_arg.strip("'").rstrip("s").upper()
+                return f"DATEDIFF('{unit}', {d2}, {d1})"
+
+            # IIF as identifier (keyword token is TT.IIF but may arrive as IDENT)
+            if upper == "IIF":
+                self._expect(TT.LPAREN)
+                cond = self._parse_expr()
+                self._expect(TT.COMMA)
+                true_val = self._parse_expr()
+                self._expect(TT.COMMA)
+                false_val = self._parse_expr()
+                self._expect(TT.RPAREN)
+                return f"(CASE WHEN {cond} THEN {true_val} ELSE {false_val} END)"
+
+            if self._cur.type == TT.LPAREN:
+                # Function call
+                self._advance()
+                args: list[str] = []
+                if self._cur.type != TT.RPAREN:
+                    args.append(self._parse_expr())
+                    while self._cur.type == TT.COMMA:
+                        self._advance()
+                        args.append(self._parse_expr())
+                self._expect(TT.RPAREN)
+                return get_function_sql(name, args)
+
+            # Bare identifier (e.g. a column name without brackets — unusual)
+            return f'"{name}"'
+
+        # IIF keyword token
+        if tok.type == TT.IIF:
+            self._advance()
+            self._expect(TT.LPAREN)
+            cond = self._parse_expr()
+            self._expect(TT.COMMA)
+            true_val = self._parse_expr()
+            self._expect(TT.COMMA)
+            false_val = self._parse_expr()
+            self._expect(TT.RPAREN)
+            return f"(CASE WHEN {cond} THEN {true_val} ELSE {false_val} END)"
+
+        raise UnsupportedExpressionError(f"Unexpected token: {tok}")
+
+    def _parse_if(self) -> str:
+        self._expect(TT.IF)
+        branches: list[tuple[str, str]] = []
+        else_val: Optional[str] = None
+
+        cond = self._parse_expr()
+        self._expect(TT.THEN)
+        val = self._parse_expr()
+        branches.append((cond, val))
+
+        while self._cur.type == TT.ELSEIF:
+            self._advance()
+            cond = self._parse_expr()
+            self._expect(TT.THEN)
+            val = self._parse_expr()
+            branches.append((cond, val))
+
+        if self._cur.type == TT.ELSE:
+            self._advance()
+            else_val = self._parse_expr()
+
+        self._expect(TT.ENDIF)
+
+        parts = ["CASE"]
+        for cond, val in branches:
+            parts.append(f"WHEN {cond} THEN {val}")
+        if else_val is not None:
+            parts.append(f"ELSE {else_val}")
+        parts.append("END")
+        return " ".join(parts)
+
+
+# ---------------------------------------------------------------------------
+# Public API
+# ---------------------------------------------------------------------------
+
+def transpile(expression: str) -> str:
+    """Convert an Alteryx expression string to a DuckDB SQL fragment."""
+    expression = expression.strip()
+    if not expression:
+        return "NULL"
+    tokens = tokenise(expression)
+    return _Parser(tokens).parse()
+
+
+def _coerce_numeric_strings(df: pl.DataFrame) -> pl.DataFrame:
+    """Cast string columns that contain only numeric data to Int64 or Float64.
+
+    Alteryx implicitly coerces TextInput strings to numbers when the expression
+    treats them numerically. This mirrors that behaviour.
+    """
+    casts: list[pl.Expr] = []
+    for col_name in df.columns:
+        s = df[col_name]
+        if s.dtype != pl.String:
+            continue
+        non_null = s.drop_nulls()
+        if len(non_null) == 0:
+            continue
+        # Try integer first (covers integer-looking strings)
+        int_s = non_null.cast(pl.Int64, strict=False)
+        if int_s.null_count() == 0:
+            casts.append(pl.col(col_name).cast(pl.Int64, strict=False))
+            continue
+        # Try float
+        float_s = non_null.cast(pl.Float64, strict=False)
+        if float_s.null_count() == 0:
+            casts.append(pl.col(col_name).cast(pl.Float64, strict=False))
+    return df.with_columns(casts) if casts else df
+
+
+class ExpressionTranspiler:
+    """Stateful transpiler bound to a DuckDB connection for evaluation."""
+
+    def __init__(self, con: duckdb.DuckDBPyConnection):
+        self._con = con
+        self._view_counter = 0
+
+    def _register(self, df: pl.DataFrame) -> str:
+        name = f"_expr_df_{self._view_counter}"
+        self._view_counter += 1
+        self._con.register(name, df.to_arrow())
+        return name
+
+    def eval_mask(self, df: pl.DataFrame, expression: str) -> pl.Series:
+        """Evaluate a boolean Alteryx expression against df, return bool Series."""
+        sql_expr = transpile(expression)
+        view = self._register(df)
+        try:
+            result = self._con.execute(
+                f'SELECT ({sql_expr}) AS _mask FROM "{view}"'
+            ).pl()
+            return result["_mask"]
+        except duckdb.BinderException:
+            # Type mismatch: retry after coercing numeric-looking string columns
+            self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
+            df2 = _coerce_numeric_strings(df)
+            view = self._register(df2)
+            result = self._con.execute(
+                f'SELECT ({sql_expr}) AS _mask FROM "{view}"'
+            ).pl()
+            return result["_mask"]
+        finally:
+            self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
+
+    def eval_series(
+        self,
+        df: pl.DataFrame,
+        expression: str,
+        field: str,
+        dtype: pl.PolarsDataType,
+    ) -> pl.Series:
+        """Evaluate a scalar Alteryx expression against df, return a Series."""
+        sql_expr = transpile(expression)
+        view = self._register(df)
+        try:
+            result = self._con.execute(
+                f'SELECT ({sql_expr}) AS "{field}" FROM "{view}"'
+            ).pl()
+            series = result[field]
+            try:
+                return series.cast(dtype)
+            except Exception:
+                return series
+        except duckdb.BinderException:
+            self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
+            df2 = _coerce_numeric_strings(df)
+            view = self._register(df2)
+            result = self._con.execute(
+                f'SELECT ({sql_expr}) AS "{field}" FROM "{view}"'
+            ).pl()
+            series = result[field]
+            try:
+                return series.cast(dtype)
+            except Exception:
+                return series
+        finally:
+            self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
+
+    def eval_scalar(self, expression: str) -> object:
+        """Evaluate an expression that requires no input columns."""
+        sql_expr = transpile(expression)
+        result = self._con.execute(f"SELECT ({sql_expr})").fetchone()
+        return result[0] if result else None
--- a/alteryx_runner/tests/init.py
+++ b/alteryx_runner/tests/init.py
--- a/alteryx_runner/tests/pycache/init.cpython-312.pyc
+++ b/alteryx_runner/tests/pycache/init.cpython-312.pyc
--- a/alteryx_runner/tests/pycache/conftest.cpython-312-pytest-9.0.3.pyc
+++ b/alteryx_runner/tests/pycache/conftest.cpython-312-pytest-9.0.3.pyc
--- a/alteryx_runner/tests/pycache/test_parser.cpython-312-pytest-9.0.3.pyc
+++ b/alteryx_runner/tests/pycache/test_parser.cpython-312-pytest-9.0.3.pyc
--- a/alteryx_runner/tests/pycache/test_tools.cpython-312-pytest-9.0.3.pyc
+++ b/alteryx_runner/tests/pycache/test_tools.cpython-312-pytest-9.0.3.pyc
--- a/alteryx_runner/tests/pycache/test_transpiler.cpython-312-pytest-9.0.3.pyc
+++ b/alteryx_runner/tests/pycache/test_transpiler.cpython-312-pytest-9.0.3.pyc
--- a/alteryx_runner/tests/conftest.py
+++ b/alteryx_runner/tests/conftest.py
@ -0,0 +1,25 @@
+"""Shared fixtures for tests."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import pytest
+import duckdb
+
+# Ensure the alteryx_runner package root is on sys.path
+PKG = Path(__file__).parent.parent  # alteryx_runner/
+if str(PKG) not in sys.path:
+    sys.path.insert(0, str(PKG))
+
+from engine.context import RunContext
+
+
+@pytest.fixture
+def ctx(tmp_path):
+    return RunContext(workflow_dir=str(tmp_path), verbose=False)
+
+
+@pytest.fixture
+def duckdb_con():
+    con = duckdb.connect(":memory:")
+    yield con
+    con.close()
--- a/alteryx_runner/tests/test_parser.py
+++ b/alteryx_runner/tests/test_parser.py
@ -0,0 +1,89 @@
+"""Tests for the XML parser."""
+from __future__ import annotations
+import sys
+import textwrap
+from pathlib import Path
+import pytest
+
+PKG = Path(__file__).parent.parent  # alteryx_runner/
+if str(PKG) not in sys.path:
+    sys.path.insert(0, str(PKG))
+
+from engine.parser import parse_workflow
+
+
+def _write_yxmd(tmp_path: Path, body: str) -> Path:
+    content = f'<AlteryxDocument yxmdVer="2022.1">{body}<Properties/></AlteryxDocument>'
+    p = tmp_path / "test.yxmd"
+    p.write_text(content)
+    return p
+
+
+class TestParser:
+    def test_simple_nodes(self, tmp_path):
+        body = textwrap.dedent("""\
+            <Nodes>
+              <Node ToolID="1">
+                <GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
+                  <Position x="0" y="0"/>
+                </GuiSettings>
+                <Properties><Configuration/></Properties>
+              </Node>
+              <Node ToolID="2">
+                <GuiSettings Plugin="AlteryxBasePluginsGui.Filter.Filter">
+                  <Position x="100" y="0"/>
+                </GuiSettings>
+                <Properties><Configuration><Expression>True</Expression></Configuration></Properties>
+              </Node>
+            </Nodes>
+            <Connections>
+              <Connection>
+                <Origin ToolID="1" Connection="Output"/>
+                <Destination ToolID="2" Connection="Input"/>
+              </Connection>
+            </Connections>
+        """)
+        path = _write_yxmd(tmp_path, body)
+        graph = parse_workflow(str(path))
+        assert 1 in graph.nodes
+        assert 2 in graph.nodes
+        assert len(graph.connections) == 1
+        assert graph.connections[0].origin_id == 1
+        assert graph.connections[0].dest_id == 2
+
+    def test_wireless_connection(self, tmp_path):
+        body = textwrap.dedent("""\
+            <Nodes>
+              <Node ToolID="10">
+                <GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
+                  <Position x="0" y="0"/>
+                </GuiSettings>
+                <Properties><Configuration/></Properties>
+              </Node>
+            </Nodes>
+            <Connections>
+              <Connection Wireless="True">
+                <Origin ToolID="10" Connection="Output"/>
+                <Destination ToolID="20" Connection="Input"/>
+              </Connection>
+            </Connections>
+        """)
+        path = _write_yxmd(tmp_path, body)
+        graph = parse_workflow(str(path))
+        assert graph.connections[0].wireless is True
+
+    def test_node_position(self, tmp_path):
+        body = textwrap.dedent("""\
+            <Nodes>
+              <Node ToolID="5">
+                <GuiSettings Plugin="AlteryxBasePluginsGui.Sort.Sort">
+                  <Position x="42" y="99"/>
+                </GuiSettings>
+                <Properties><Configuration/></Properties>
+              </Node>
+            </Nodes>
+            <Connections/>
+        """)
+        path = _write_yxmd(tmp_path, body)
+        graph = parse_workflow(str(path))
+        assert graph.nodes[5].position == (42, 99)
--- a/alteryx_runner/tests/test_tools.py
+++ b/alteryx_runner/tests/test_tools.py
@ -0,0 +1,266 @@
+"""Integration-level tool tests using in-memory DataFrames (no .yxmd required)."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import xml.etree.ElementTree as ET
+import pytest
+import polars as pl
+
+PKG = Path(__file__).parent.parent  # alteryx_runner/
+if str(PKG) not in sys.path:
+    sys.path.insert(0, str(PKG))
+
+from engine.graph import NodeDef
+from engine.context import RunContext
+
+
+def make_node(tool_id: int, plugin: str, config_xml: str) -> NodeDef:
+    config = ET.fromstring(config_xml)
+    return NodeDef(tool_id=tool_id, plugin=plugin, config=config)
+
+
+@pytest.fixture
+def ctx(tmp_path):
+    return RunContext(workflow_dir=str(tmp_path), verbose=False)
+
+
+# ---------------------------------------------------------------------------
+# TextInput
+# ---------------------------------------------------------------------------
+
+class TestTextInput:
+    def test_basic(self, ctx):
+        from tools.inout.text_input import TextInputTool
+        xml = """<Configuration>
+          <Fields><Field name="A"/><Field name="B"/></Fields>
+          <Data>
+            <r><c>1</c><c>hello</c></r>
+            <r><c>2</c><c></c></r>
+          </Data>
+        </Configuration>"""
+        node = make_node(1, "", xml)
+        result = TextInputTool(node, ctx).execute({})
+        df = result["Output"]
+        assert df.shape == (2, 2)
+        assert df["A"].to_list() == ["1", "2"]
+        assert df["B"][1] is None   # empty → NULL
+
+
+# ---------------------------------------------------------------------------
+# Filter
+# ---------------------------------------------------------------------------
+
+class TestFilter:
+    def _df(self) -> pl.DataFrame:
+        return pl.DataFrame({"ID": [1, 2, 3, 4], "Region": ["North", "South", "North", "South"]})
+
+    def test_simple_gt(self, ctx):
+        from tools.preparation.filter_tool import FilterTool
+        xml = """<Configuration>
+          <Mode>Simple</Mode>
+          <Simple>
+            <Operator>&gt;</Operator>
+            <Field>ID</Field>
+            <Operands><Operand>2</Operand><DateType>fixed</DateType></Operands>
+          </Simple>
+          <Expression>[ID] &gt; 2</Expression>
+        </Configuration>"""
+        node = make_node(1, "", xml)
+        result = FilterTool(node, ctx).execute({"Input": self._df()})
+        assert len(result["True"]) == 2
+        assert len(result["False"]) == 2
+
+    def test_custom_expr(self, ctx):
+        from tools.preparation.filter_tool import FilterTool
+        xml = """<Configuration>
+          <Mode>Custom</Mode>
+          <Expression>[Region] == "South"</Expression>
+        </Configuration>"""
+        node = make_node(1, "", xml)
+        result = FilterTool(node, ctx).execute({"Input": self._df()})
+        assert len(result["True"]) == 2
+        assert all(v == "South" for v in result["True"]["Region"].to_list())
+
+
+# ---------------------------------------------------------------------------
+# Select
+# ---------------------------------------------------------------------------
+
+class TestSelect:
+    def test_drop_and_rename(self, ctx):
+        from tools.preparation.select_tool import SelectTool
+        df = pl.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
+        xml = """<Configuration OrderChanged="False">
+          <SelectFields>
+            <SelectField field="A" selected="True" rename="Alpha"/>
+            <SelectField field="B" selected="False"/>
+            <SelectField field="*Unknown" selected="True"/>
+          </SelectFields>
+        </Configuration>"""
+        node = make_node(1, "", xml)
+        result = SelectTool(node, ctx).execute({"Input": df})["Output"]
+        assert "Alpha" in result.columns
+        assert "B" not in result.columns
+        assert "C" in result.columns  # *Unknown passes through
+
+
+# ---------------------------------------------------------------------------
+# Sort
+# ---------------------------------------------------------------------------
+
+class TestSort:
+    def test_ascending(self, ctx):
+        from tools.preparation.sort_tool import SortTool
+        df = pl.DataFrame({"Name": ["Charlie", "Alice", "Bob"], "Score": [3, 1, 2]})
+        xml = """<Configuration>
+          <SortInfo locale="0">
+            <Field field="Name" order="Ascending"/>
+          </SortInfo>
+        </Configuration>"""
+        node = make_node(1, "", xml)
+        result = SortTool(node, ctx).execute({"Input": df})["Output"]
+        assert result["Name"].to_list() == ["Alice", "Bob", "Charlie"]
+
+    def test_descending(self, ctx):
+        from tools.preparation.sort_tool import SortTool
+        df = pl.DataFrame({"Score": [3, 1, 2]})
+        xml = """<Configuration>
+          <SortInfo locale="0">
+            <Field field="Score" order="Descending"/>
+          </SortInfo>
+        </Configuration>"""
+        node = make_node(1, "", xml)
+        result = SortTool(node, ctx).execute({"Input": df})["Output"]
+        assert result["Score"].to_list() == [3, 2, 1]
+
+
+# ---------------------------------------------------------------------------
+# Unique
+# ---------------------------------------------------------------------------
+
+class TestUnique:
+    def test_unique_and_duplicate(self, ctx):
+        from tools.preparation.unique_tool import UniqueTool
+        df = pl.DataFrame({"Name": ["Alice", "Bob", "Alice", "Charlie", "Bob"]})
+        xml = """<Configuration>
+          <UniqueFields><Field name="Name"/></UniqueFields>
+        </Configuration>"""
+        node = make_node(1, "", xml)
+        result = UniqueTool(node, ctx).execute({"Input": df})
+        assert len(result["Unique"]) == 3
+        assert len(result["Duplicate"]) == 2
+
+
+# ---------------------------------------------------------------------------
+# Sample
+# ---------------------------------------------------------------------------
+
+class TestSample:
+    def _df(self) -> pl.DataFrame:
+        return pl.DataFrame({"N": list(range(10))})
+
+    def test_first(self, ctx):
+        from tools.preparation.sample_tool import SampleTool
+        xml = "<Configuration><Mode>First</Mode><N>3</N><GroupFields/></Configuration>"
+        node = make_node(1, "", xml)
+        result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
+        assert len(result) == 3
+        assert result["N"].to_list() == [0, 1, 2]
+
+    def test_last(self, ctx):
+        from tools.preparation.sample_tool import SampleTool
+        xml = "<Configuration><Mode>Last</Mode><N>2</N><GroupFields/></Configuration>"
+        node = make_node(1, "", xml)
+        result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
+        assert result["N"].to_list() == [8, 9]
+
+
+# ---------------------------------------------------------------------------
+# Union
+# ---------------------------------------------------------------------------
+
+class TestUnion:
+    def test_by_name(self, ctx):
+        from tools.join.union_tool import UnionTool
+        df1 = pl.DataFrame({"A": [1], "B": [2]})
+        df2 = pl.DataFrame({"B": [4], "A": [3]})
+        xml = "<Configuration><Mode>Auto</Mode></Configuration>"
+        node = make_node(1, "", xml)
+        result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
+        assert len(result) == 2
+
+    def test_by_position(self, ctx):
+        from tools.join.union_tool import UnionTool
+        df1 = pl.DataFrame({"X": [1], "Y": [2]})
+        df2 = pl.DataFrame({"P": [3], "Q": [4]})
+        xml = "<Configuration><Mode>ByPosition</Mode></Configuration>"
+        node = make_node(1, "", xml)
+        result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
+        assert result.columns == ["X", "Y"]
+        assert len(result) == 2
+
+
+# ---------------------------------------------------------------------------
+# Summarize
+# ---------------------------------------------------------------------------
+
+class TestSummarize:
+    def test_group_sum(self, ctx):
+        from tools.transform.summarize_tool import SummarizeTool
+        df = pl.DataFrame({
+            "Region": ["N", "S", "N", "S"],
+            "Sales": [100, 200, 150, 250],
+        })
+        xml = """<Configuration>
+          <SummarizeFields>
+            <SummarizeField field="Region" action="GroupBy" rename="Region"/>
+            <SummarizeField field="Sales" action="Sum" rename="Total"/>
+          </SummarizeFields>
+        </Configuration>"""
+        node = make_node(1, "", xml)
+        result = SummarizeTool(node, ctx).execute({"Input": df})["Output"]
+        assert set(result.columns) == {"Region", "Total"}
+        totals = dict(zip(result["Region"].to_list(), result["Total"].to_list()))
+        assert totals["N"] == 250
+        assert totals["S"] == 450
+
+
+# ---------------------------------------------------------------------------
+# Transpose
+# ---------------------------------------------------------------------------
+
+class TestTranspose:
+    def test_unpivot(self, ctx):
+        from tools.transform.transpose_tool import TransposeTool
+        df = pl.DataFrame({"ID": [1, 2], "Visits": [5, 10], "Spend": [100, 200]})
+        xml = """<Configuration>
+          <KeyFields><Field name="ID"/></KeyFields>
+          <DataFields>
+            <Field name="Visits"/>
+            <Field name="Spend"/>
+          </DataFields>
+        </Configuration>"""
+        node = make_node(1, "", xml)
+        result = TransposeTool(node, ctx).execute({"Input": df})["Output"]
+        assert "Name" in result.columns
+        assert "Value" in result.columns
+        assert len(result) == 4  # 2 rows × 2 data cols
+
+
+# ---------------------------------------------------------------------------
+# RecordID
+# ---------------------------------------------------------------------------
+
+class TestRecordID:
+    def test_starts_at_one(self, ctx):
+        from tools.preparation.record_id import RecordIDTool
+        df = pl.DataFrame({"Name": ["A", "B", "C"]})
+        xml = """<Configuration>
+          <Field>ID</Field>
+          <StartValue>1</StartValue>
+          <FieldType>Int32</FieldType>
+        </Configuration>"""
+        node = make_node(1, "", xml)
+        result = RecordIDTool(node, ctx).execute({"Input": df})["Output"]
+        assert result["ID"].to_list() == [1, 2, 3]
+        assert result.columns[0] == "ID"
--- a/alteryx_runner/tests/test_transpiler.py
+++ b/alteryx_runner/tests/test_transpiler.py
@ -0,0 +1,155 @@
+"""Unit tests for the Alteryx → DuckDB expression transpiler."""
+from __future__ import annotations
+import sys
+from pathlib import Path
+import pytest
+
+PKG = Path(__file__).parent.parent  # alteryx_runner/
+if str(PKG) not in sys.path:
+    sys.path.insert(0, str(PKG))
+
+from expression.transpiler import transpile, UnsupportedExpressionError
+
+
+def t(expr: str) -> str:
+    return transpile(expr)
+
+
+class TestLiterals:
+    def test_number(self):
+        assert t("42") == "42"
+
+    def test_float(self):
+        assert t("3.14") == "3.14"
+
+    def test_string(self):
+        assert t('"hello"') == "'hello'"
+
+    def test_null(self):
+        assert t("NULL()") == "NULL"
+
+    def test_true(self):
+        assert t("True") == "TRUE"
+
+    def test_false(self):
+        assert t("False") == "FALSE"
+
+
+class TestColumnRef:
+    def test_simple(self):
+        assert t("[CustomerID]") == '"CustomerID"'
+
+    def test_spaces(self):
+        assert t("[First Name]") == '"First Name"'
+
+
+class TestOperators:
+    def test_eq(self):
+        assert t("[A] == [B]") == '("A" = "B")'
+
+    def test_neq(self):
+        assert t("[A] != [B]") == '("A" <> "B")'
+
+    def test_gt(self):
+        assert t("[Score] > 50") == '("Score" > 50)'
+
+    def test_and(self):
+        sql = t('[A] > 0 AND [B] < 10')
+        assert "AND" in sql
+
+    def test_or(self):
+        sql = t('[A] > 0 OR [B] < 0')
+        assert "OR" in sql
+
+    def test_not(self):
+        sql = t('NOT [IsActive]')
+        assert "NOT" in sql
+
+    def test_bang(self):
+        sql = t('![IsActive]')
+        assert "NOT" in sql
+
+
+class TestIfThenEndif:
+    def test_simple(self):
+        sql = t('IF [Score] > 50 THEN "Pass" ELSE "Fail" ENDIF')
+        assert "CASE WHEN" in sql
+        assert "'Pass'" in sql
+        assert "'Fail'" in sql
+
+    def test_elseif(self):
+        sql = t('IF [Score] > 90 THEN "A" ELSEIF [Score] > 70 THEN "B" ELSE "C" ENDIF')
+        assert sql.count("WHEN") == 2
+
+    def test_no_else(self):
+        sql = t('IF [Active] == "Y" THEN "Yes" ENDIF')
+        assert "CASE WHEN" in sql
+
+
+class TestIIF:
+    def test_iif(self):
+        sql = t('IIF([Score] > 50, "Pass", "Fail")')
+        assert "CASE WHEN" in sql
+
+
+class TestIsNull:
+    def test_isnull_keyword(self):
+        sql = t('IsNull([Field])')
+        assert "IS NULL" in sql
+
+    def test_not_isnull(self):
+        sql = t('!IsNull([Field])')
+        assert "NOT" in sql and "IS NULL" in sql
+
+
+class TestFunctions:
+    def test_uppercase(self):
+        assert t('Uppercase([Name])') == "UPPER(\"Name\")"
+
+    def test_length(self):
+        assert "LENGTH" in t('Length([Name])')
+
+    def test_left(self):
+        assert "LEFT" in t('Left([Name], 3)')
+
+    def test_round(self):
+        assert "ROUND" in t('Round([Score], 2)')
+
+    def test_abs(self):
+        assert "ABS" in t('ABS([Val])')
+
+    def test_trim(self):
+        assert "TRIM" in t('Trim([Name])')
+
+    def test_nested(self):
+        sql = t('Uppercase(Trim([Name]))')
+        assert "UPPER" in sql
+        assert "TRIM" in sql
+
+
+class TestArithmetic:
+    def test_add(self):
+        sql = t('[A] + [B]')
+        assert "+" in sql
+
+    def test_multiply(self):
+        sql = t('[A] * [B]')
+        assert "*" in sql
+
+    def test_divide(self):
+        sql = t('[A] / [B]')
+        assert "/" in sql
+
+    def test_complex(self):
+        sql = t('ROUND([Spend] / [Visits], 1)')
+        assert "ROUND" in sql
+
+
+class TestDateFunctions:
+    def test_datetimenow(self):
+        sql = t('DateTimeNow()')
+        assert "NOW()" in sql
+
+    def test_datetimetoday(self):
+        sql = t('DateTimeToday()')
+        assert "CURRENT_DATE" in sql
--- a/alteryx_runner/tools/init.py
+++ b/alteryx_runner/tools/init.py
@ -0,0 +1,87 @@
+"""Tool registry: Plugin string → tool class."""
+from __future__ import annotations
+from typing import Optional, Type
+from tools.base import BaseTool
+
+from tools.inout import InputDataTool, OutputDataTool, TextInputTool, BrowseTool
+from tools.preparation import (
+    FilterTool, FormulaTool, SelectTool, SortTool, SampleTool,
+    UniqueTool, GenerateRowsTool, MultiRowFormulaTool,
+    MultiFieldFormulaTool, RecordIDTool, AutoFieldTool,
+)
+from tools.join import JoinTool, JoinMultipleTool, UnionTool, AppendFieldsTool, FindReplaceTool
+from tools.parse import DateTimeTool, RegExTool, TextToColumnsTool
+from tools.transform import SummarizeTool, CrossTabTool, TransposeTool
+
+
+class _PassthroughTool(BaseTool):
+    def execute(self, inputs):
+        df = next(iter(inputs.values())) if inputs else __import__("polars").DataFrame()
+        return {"Output": df}
+
+
+class _NullTool(BaseTool):
+    def execute(self, inputs):
+        return {}
+
+
+_REGISTRY: dict[str, Type[BaseTool]] = {
+    # In/Out
+    "AlteryxBasePluginsGui.DbFileInput.DbFileInput": InputDataTool,
+    "AlteryxBasePluginsGui.DbFileOutput.DbFileOutput": OutputDataTool,
+    "AlteryxBasePluginsGui.TextInput.TextInput": TextInputTool,
+    "AlteryxBasePluginsGui.BrowseV2.BrowseV2": BrowseTool,
+    # Preparation
+    "AlteryxBasePluginsGui.Filter.Filter": FilterTool,
+    "AlteryxBasePluginsGui.Formula.Formula": FormulaTool,
+    "AlteryxBasePluginsGui.AlteryxSelect.AlteryxSelect": SelectTool,
+    "AlteryxBasePluginsGui.Sort.Sort": SortTool,
+    "AlteryxBasePluginsGui.Sample.Sample": SampleTool,
+    "AlteryxBasePluginsGui.Unique.Unique": UniqueTool,
+    "AlteryxBasePluginsGui.GenerateRows.GenerateRows": GenerateRowsTool,
+    "AlteryxBasePluginsGui.MultiRowFormula.MultiRowFormula": MultiRowFormulaTool,
+    "AlteryxBasePluginsGui.MultiFieldFormula.MultiFieldFormula": MultiFieldFormulaTool,
+    "AlteryxBasePluginsGui.RecordID.RecordID": RecordIDTool,
+    "AlteryxBasePluginsGui.AutoField.AutoField": AutoFieldTool,
+    # Join
+    "AlteryxBasePluginsGui.Join.Join": JoinTool,
+    "AlteryxBasePluginsGui.JoinMultiple.JoinMultiple": JoinMultipleTool,
+    "AlteryxBasePluginsGui.Union.Union": UnionTool,
+    "AlteryxBasePluginsGui.AppendFields.AppendFields": AppendFieldsTool,
+    "AlteryxBasePluginsGui.FindReplace.FindReplace": FindReplaceTool,
+    # Parse
+    "AlteryxBasePluginsGui.DateTime.DateTime": DateTimeTool,
+    "AlteryxBasePluginsGui.RegEx.RegEx": RegExTool,
+    "AlteryxBasePluginsGui.TextToColumns.TextToColumns": TextToColumnsTool,
+    # Transform
+    "AlteryxSpatialPluginsGui.Summarize.Summarize": SummarizeTool,
+    "AlteryxBasePluginsGui.CrossTab.CrossTab": CrossTabTool,
+    "AlteryxBasePluginsGui.Transpose.Transpose": TransposeTool,
+    # Documentation / no-op
+    "AlteryxGuiToolkit.ToolContainer.ToolContainer": _PassthroughTool,
+    "AlteryxGuiToolkit.TextBox.TextBox": _NullTool,
+    "AlteryxGuiToolkit.Comment.Comment": _NullTool,
+    "AlteryxBasePluginsGui.AlteryxAnnotation.AlteryxAnnotation": _NullTool,
+}
+
+
+def get_tool_class(plugin: str) -> Optional[Type[BaseTool]]:
+    """Return the tool class for a given Plugin string, or None if unsupported."""
+    if not plugin:
+        return None
+    cls = _REGISTRY.get(plugin)
+    if cls is not None:
+        return cls
+    # Partial match fallback — useful for minor version differences in plugin names
+    for key, cls in _REGISTRY.items():
+        if plugin.endswith(key.split(".")[-1]) or key in plugin:
+            return cls
+    return None
+
+
+def register_tool(plugin: str, cls: Type[BaseTool]) -> None:
+    """Register a custom tool class for a given Plugin string."""
+    _REGISTRY[plugin] = cls
+
+
+__all__ = ["get_tool_class", "register_tool", "BaseTool"]
--- a/alteryx_runner/tools/pycache/init.cpython-312.pyc
+++ b/alteryx_runner/tools/pycache/init.cpython-312.pyc
--- a/alteryx_runner/tools/pycache/init.cpython-313.pyc
+++ b/alteryx_runner/tools/pycache/init.cpython-313.pyc
--- a/alteryx_runner/tools/pycache/base.cpython-312.pyc
+++ b/alteryx_runner/tools/pycache/base.cpython-312.pyc
--- a/alteryx_runner/tools/pycache/base.cpython-313.pyc
+++ b/alteryx_runner/tools/pycache/base.cpython-313.pyc
--- a/alteryx_runner/tools/base.py
+++ b/alteryx_runner/tools/base.py
@ -0,0 +1,30 @@
+from __future__ import annotations
+from abc import ABC, abstractmethod
+from typing import Dict, Optional
+import polars as pl
+import xml.etree.ElementTree as ET
+from engine.graph import NodeDef
+from engine.context import RunContext
+
+
+class BaseTool(ABC):
+    def __init__(self, node: NodeDef, ctx: RunContext):
+        self.node = node
+        self.ctx = ctx
+        self.config: Optional[ET.Element] = node.config
+
+    @abstractmethod
+    def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
+        """Execute the tool and return named output DataFrames."""
+
+    def _cfg(self, xpath: str, default: Optional[str] = None) -> Optional[str]:
+        el = self.config.find(xpath) if self.config is not None else None
+        return el.text if el is not None else default
+
+    def _cfg_attr(self, xpath: str, attr: str, default: Optional[str] = None) -> Optional[str]:
+        el = self.config.find(xpath) if self.config is not None else None
+        return el.attrib.get(attr, default) if el is not None else default
+
+    def _cfg_text(self, xpath: str, default: str = "") -> str:
+        val = self._cfg(xpath, default)
+        return val if val is not None else default
--- a/alteryx_runner/tools/inout/init.py
+++ b/alteryx_runner/tools/inout/init.py
@ -0,0 +1,6 @@
+from .input_data import InputDataTool
+from .output_data import OutputDataTool
+from .text_input import TextInputTool
+from .browse import BrowseTool
+
+__all__ = ["InputDataTool", "OutputDataTool", "TextInputTool", "BrowseTool"]
--- a/alteryx_runner/tools/inout/pycache/init.cpython-312.pyc
+++ b/alteryx_runner/tools/inout/pycache/init.cpython-312.pyc
--- a/alteryx_runner/tools/inout/pycache/init.cpython-313.pyc
+++ b/alteryx_runner/tools/inout/pycache/init.cpython-313.pyc
--- a/alteryx_runner/tools/inout/pycache/browse.cpython-312.pyc
+++ b/alteryx_runner/tools/inout/pycache/browse.cpython-312.pyc
--- a/alteryx_runner/tools/inout/pycache/browse.cpython-313.pyc
+++ b/alteryx_runner/tools/inout/pycache/browse.cpython-313.pyc
--- a/alteryx_runner/tools/inout/pycache/input_data.cpython-312.pyc
+++ b/alteryx_runner/tools/inout/pycache/input_data.cpython-312.pyc
--- a/alteryx_runner/tools/inout/pycache/input_data.cpython-313.pyc
+++ b/alteryx_runner/tools/inout/pycache/input_data.cpython-313.pyc
--- a/alteryx_runner/tools/inout/pycache/output_data.cpython-312.pyc
+++ b/alteryx_runner/tools/inout/pycache/output_data.cpython-312.pyc
--- a/alteryx_runner/tools/inout/pycache/output_data.cpython-313.pyc
+++ b/alteryx_runner/tools/inout/pycache/output_data.cpython-313.pyc
--- a/alteryx_runner/tools/inout/pycache/text_input.cpython-312.pyc
+++ b/alteryx_runner/tools/inout/pycache/text_input.cpython-312.pyc
--- a/alteryx_runner/tools/inout/pycache/text_input.cpython-313.pyc
+++ b/alteryx_runner/tools/inout/pycache/text_input.cpython-313.pyc
--- a/alteryx_runner/tools/inout/browse.py
+++ b/alteryx_runner/tools/inout/browse.py
@ -0,0 +1,14 @@
+from __future__ import annotations
+from typing import Dict
+import polars as pl
+from tools.base import BaseTool
+
+
+class BrowseTool(BaseTool):
+    def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
+        df = inputs.get("Input", pl.DataFrame())
+        if self.ctx.verbose:
+            print(f"\n[Browse ToolID={self.node.tool_id}]")
+            print(f"  rows={len(df)}  cols={df.columns}")
+            print(df.head(20))
+        return {"Output": df}
--- a/alteryx_runner/tools/inout/input_data.py
+++ b/alteryx_runner/tools/inout/input_data.py
@ -0,0 +1,99 @@
+from __future__ import annotations
+from typing import Dict, Optional
+import xml.etree.ElementTree as ET
+import polars as pl
+from tools.base import BaseTool
+
+
+class InputDataTool(BaseTool):
+    def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
+        if self.config is None:
+            return {"Output": pl.DataFrame()}
+
+        file_el = self.config.find("File")
+        if file_el is None or not file_el.text:
+            return {"Output": pl.DataFrame()}
+
+        raw_path = (file_el.text or "").strip()
+        fmt = int(file_el.attrib.get("FileFormat", "0"))
+        record_limit_str = file_el.attrib.get("RecordLimit", "").strip()
+        limit = int(record_limit_str) if record_limit_str else None
+
+        opts = self.config.find("FormatSpecificOptions")
+        if opts is None:
+            opts = ET.Element("x")
+
+        path_str, sheet = self._parse_path(raw_path)
+        resolved = self.ctx.resolve_path(path_str)
+
+        df = self._read(str(resolved), fmt, sheet, opts)
+
+        # Trim whitespace from string columns (matches Alteryx behavior)
+        for col in df.columns:
+            if df[col].dtype == pl.String:
+                df = df.with_columns(pl.col(col).str.strip_chars())
+
+        if limit:
+            df = df.head(limit)
+
+        return {"Output": df}
+
+    def _parse_path(self, raw: str) -> tuple[str, Optional[str]]:
+        if "|||" in raw:
+            path, sheet = raw.split("|||", 1)
+            return path.strip(), sheet.strip().strip("`").rstrip("$")
+        return raw.strip(), None
+
+    def _read(
+        self,
+        path: str,
+        fmt: int,
+        sheet: Optional[str],
+        opts: ET.Element,
+    ) -> pl.DataFrame:
+        if fmt in (0, 6):   # CSV / delimited
+            delim = opts.findtext("Delimeter") or opts.findtext("Delimiter") or ","
+            header_text = opts.findtext("HeaderRow") or "True"
+            has_header = header_text.strip().lower() in ("true", "1", "yes")
+            import_line = int(opts.findtext("ImportLine") or "1")
+            skip = max(0, import_line - 1)
+            return pl.read_csv(
+                path,
+                separator=delim,
+                has_header=has_header,
+                skip_rows=skip,
+                infer_schema_length=10000,
+                ignore_errors=True,
+            )
+
+        if fmt == 25:   # Excel
+            read_header = (opts.findtext("FirstRowData") or "False").lower() != "true"
+            import_line = int(opts.findtext("ImportLine") or "1")
+            skip = max(0, import_line - 1)
+            return pl.read_excel(
+                path,
+                sheet_name=sheet or 0,
+                read_options={"has_header": read_header, "skip_rows": skip},
+            )
+
+        if fmt == 2:    # Parquet
+            return pl.read_parquet(path)
+
+        if fmt == 19:   # YXDB
+            try:
+                import yxdb
+                reader = yxdb.open_file(path)
+                rows = list(reader)
+                if rows:
+                    return pl.DataFrame(rows)
+                return pl.DataFrame()
+            except ImportError:
+                raise NotImplementedError(
+                    "YXDB format requires the 'yxdb' package: pip install yxdb"
+                )
+
+        if fmt == 56:   # JSON
+            return pl.read_json(path)
+
+        # Fallback: try CSV
+        return pl.read_csv(path, infer_schema_length=10000, ignore_errors=True)
--- a/alteryx_runner/tools/inout/output_data.py
+++ b/alteryx_runner/tools/inout/output_data.py
@ -0,0 +1,88 @@
+from __future__ import annotations
+from pathlib import Path
+from typing import Dict
+import xml.etree.ElementTree as ET
+import polars as pl
+from tools.base import BaseTool
+
+
+class OutputDataTool(BaseTool):
+    def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
+        df = inputs.get("Input", pl.DataFrame())
+        if self.config is None or df.is_empty():
+            return {}
+
+        file_el = self.config.find("File")
+        if file_el is None or not file_el.text:
+            return {}
+
+        raw_path = (file_el.text or "").strip()
+        fmt = int(file_el.attrib.get("FileFormat", "0"))
+        max_records_str = (file_el.attrib.get("MaxRecords") or "").strip()
+        max_records = int(max_records_str) if max_records_str else None
+
+        opts = self.config.find("FormatSpecificOptions")
+        if opts is None:
+            opts = ET.Element("x")
+
+        multi_el = self.config.find("MultiFile")
+        multi_file = (multi_el.attrib.get("value", "False") if multi_el is not None else "False") == "True"
+        multi_field = (self.config.findtext("MultiFileField") or "").strip()
+        multi_type = (self.config.findtext("MultiFileType") or "Suffix").strip()
+        keep_field = (self.config.findtext("KeepField") or "True").strip().lower() == "true"
+
+        out_path = self.ctx.resolve_output_path(raw_path)
+        out_path.parent.mkdir(parents=True, exist_ok=True)
+
+        if multi_file and multi_field and multi_field in df.columns:
+            for val in df[multi_field].unique().to_list():
+                part = df.filter(pl.col(multi_field) == val)
+                if not keep_field:
+                    part = part.drop(multi_field)
+                part_path = self._multi_path(out_path, str(val), multi_type)
+                self._write(part, part_path, fmt, opts)
+        elif max_records:
+            chunk_num = 0
+            for i in range(0, len(df), max_records):
+                chunk = df.slice(i, max_records)
+                chunk_path = out_path if chunk_num == 0 else out_path.with_stem(
+                    f"{out_path.stem}_{chunk_num}"
+                )
+                self._write(chunk, chunk_path, fmt, opts)
+                chunk_num += 1
+        else:
+            self._write(df, out_path, fmt, opts)
+
+        if self.ctx.verbose:
+            print(f"[Output] Wrote {len(df)} rows → {out_path}")
+
+        return {}
+
+    def _multi_path(self, base: Path, value: str, mode: str) -> Path:
+        safe = "".join(c if c.isalnum() or c in "-_" else "_" for c in value)
+        if mode == "Suffix":
+            return base.with_stem(f"{base.stem}_{safe}")
+        return base.with_stem(f"{safe}_{base.stem}")
+
+    def _write(self, df: pl.DataFrame, path: Path, fmt: int, opts: ET.Element) -> None:
+        if fmt in (0, 6):   # CSV
+            delim = opts.findtext("Delimeter") or opts.findtext("Delimiter") or ","
+            # Accept both HeaderRow and Headers attribute names
+            header_val = (
+                opts.findtext("HeaderRow")
+                or opts.findtext("Headers")
+                or "True"
+            )
+            header = header_val.lower() != "false"
+            df.write_csv(str(path), separator=delim, include_header=header)
+        elif fmt == 25:     # Excel
+            df.write_excel(str(path))
+        elif fmt == 2:      # Parquet
+            df.write_parquet(str(path))
+        elif fmt == 19:     # YXDB — fall back to Parquet
+            fallback = path.with_suffix(".parquet")
+            df.write_parquet(str(fallback))
+            if self.ctx.verbose:
+                print(f"[Output] YXDB write not supported; wrote Parquet to {fallback}")
+        else:
+            df.write_csv(str(path))
--- a/alteryx_runner/tools/inout/text_input.py
+++ b/alteryx_runner/tools/inout/text_input.py
@ -0,0 +1,35 @@
+from __future__ import annotations
+from typing import Dict, Optional
+import polars as pl
+from tools.base import BaseTool
+
+
+class TextInputTool(BaseTool):
+    def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
+        if self.config is None:
+            return {"Output": pl.DataFrame()}
+
+        fields = [
+            f.attrib["name"]
+            for f in self.config.findall("Fields/Field")
+        ]
+        if not fields:
+            return {"Output": pl.DataFrame()}
+
+        rows: list[dict] = []
+        for r in self.config.findall("Data/r"):
+            cells = r.findall("c")
+            row: dict[str, Optional[str]] = {}
+            for i, col_name in enumerate(fields):
+                el = cells[i] if i < len(cells) else None
+                text: Optional[str] = el.text if el is not None else None
+                # Empty text in XML → NULL
+                row[col_name] = text if text else None
+            rows.append(row)
+
+        if not rows:
+            schema = {f: pl.String for f in fields}
+            return {"Output": pl.DataFrame(schema=schema)}
+
+        df = pl.DataFrame(rows, schema={f: pl.String for f in fields})
+        return {"Output": df}
--- a/alteryx_runner/tools/join/init.py
+++ b/alteryx_runner/tools/join/init.py
@ -0,0 +1,7 @@
+from .join_tool import JoinTool
+from .join_multiple import JoinMultipleTool
+from .union_tool import UnionTool
+from .append_fields import AppendFieldsTool
+from .find_replace import FindReplaceTool
+
+__all__ = ["JoinTool", "JoinMultipleTool", "UnionTool", "AppendFieldsTool", "FindReplaceTool"]
--- a/alteryx_runner/tools/join/pycache/init.cpython-312.pyc
+++ b/alteryx_runner/tools/join/pycache/init.cpython-312.pyc
--- a/alteryx_runner/tools/join/pycache/init.cpython-313.pyc
+++ b/alteryx_runner/tools/join/pycache/init.cpython-313.pyc
--- a/alteryx_runner/tools/join/pycache/append_fields.cpython-312.pyc
+++ b/alteryx_runner/tools/join/pycache/append_fields.cpython-312.pyc
--- a/alteryx_runner/tools/join/pycache/append_fields.cpython-313.pyc
+++ b/alteryx_runner/tools/join/pycache/append_fields.cpython-313.pyc
--- a/alteryx_runner/tools/join/pycache/find_replace.cpython-312.pyc
+++ b/alteryx_runner/tools/join/pycache/find_replace.cpython-312.pyc
--- a/alteryx_runner/tools/join/pycache/find_replace.cpython-313.pyc
+++ b/alteryx_runner/tools/join/pycache/find_replace.cpython-313.pyc
--- a/alteryx_runner/tools/join/pycache/join_multiple.cpython-312.pyc
+++ b/alteryx_runner/tools/join/pycache/join_multiple.cpython-312.pyc
--- a/alteryx_runner/tools/join/pycache/join_multiple.cpython-313.pyc
+++ b/alteryx_runner/tools/join/pycache/join_multiple.cpython-313.pyc
--- a/alteryx_runner/tools/join/pycache/join_tool.cpython-312.pyc
+++ b/alteryx_runner/tools/join/pycache/join_tool.cpython-312.pyc
--- a/alteryx_runner/tools/join/pycache/join_tool.cpython-313.pyc
+++ b/alteryx_runner/tools/join/pycache/join_tool.cpython-313.pyc
--- a/alteryx_runner/tools/join/pycache/union_tool.cpython-312.pyc
+++ b/alteryx_runner/tools/join/pycache/union_tool.cpython-312.pyc
--- a/alteryx_runner/tools/join/pycache/union_tool.cpython-313.pyc
+++ b/alteryx_runner/tools/join/pycache/union_tool.cpython-313.pyc
--- a/Show More
+++ b/Show More