initial commit with runner kinda working

main
casey 2026-06-13 08:27:38 +10:00
commit 02e71a857c
167 changed files with 841206 additions and 0 deletions

157
.gitignore vendored Normal file
View File

@ -0,0 +1,157 @@
# uv
uv.lock
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
bin/
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.toast/
.cache/
.pytest_cache/
.noscript/
.htmlcov/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.mutmut-cache
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a discussion that covers why this should be excluded, see:
# https://stackoverflow.com/questions/54315206/should-we-gitignore-the-python-version-file
# .python-version
# pipenv
# According to pypa/pipenv#118, it is recommended to include Pipfile.lock in version control.
# However, if you are executing a library instead of an application, you might skip it.
#Pipfile.lock
# poetry
# Using Poetry requires committing poetry.lock alongside pyproject.toml
# https://python-poetry.org
#poetry.lock
# pdm
# https://fming.dev
.pdm-plugins/
.pdm-build/
# Hatch
.hatch/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site/
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype
.pytype/
# Cython debug symbols
cython_debug/
# IDEs and Editors (Optional but recommended)
.vscode/
.idea/
*.swp
*.swo
.DS_Store

44
AGENTS.md Normal file
View File

@ -0,0 +1,44 @@
# Project Overview
Pyteryx is a python-first reimplementation of the Alteryx tool.
The intent is to keep the same features of obserability and a rich DAG based user interface, but take advantage of the ease of deploying python.
Pyteryx should have a near identical look-and-feel of Alteryx, but have the option of exporting a workflow (yxmd) as a python script.
### Alteryx Tools Documentation / UI Examples
https://help.alteryx.com/current/en/designer/tools.html
### Alteryx Tool Logic / Conversion code
./alteryx-to-python-migration-strategy-main
This migration system helps organizations transition from Alteryx's visual workflow platform to Python-based data processing pipelines. It automatically converts Alteryx workflow XML files (.yxmd) into equivalent Python code using pandas, numpy, and other standard data science libraries.
./alteryx-to-python-migration-strategy-main/migration_toolkit.py
This file contains conversion logic that could be used to create an Alteryx execution engine that can run within the Pyteryx app.
# Back End
This is a Python project using `uv` for dependency management and environment setup.
## Agent instructions
When interacting with this project, AI agents should adhere to the following guidelines:
- Always use uv for dependency management and environment activation. Avoid using pip directly.
- Do not manually edit pyproject.toml or uv.lock files. Use uv add or uv lock --upgrade for dependency changes.
- Ensure the virtual environment is activated before executing Python scripts or commands. Use uv run for this purpose.
- Prioritize using uv commands over direct Python or system commands for package management.
- If suggesting package installations, always recommend using uv add <package-name> over pip install <package-name>
## Setup Commands
### Install dependencies
uv add <package-name>
### Running files
uv run python <your_script.py>
### Clean cache
uv clean
### Environment Variables / Auth
.env file in the project root folder contains all required Auth strings in the following format
name = 'string',
# Front End
GoLang and the fyne UI library (http://fyne.io/) for the front end.

View File

@ -0,0 +1,388 @@
<?xml version="1.0"?>
<AlteryxDocument yxmdVer="2022.3">
<Nodes>
<Node ToolID="2">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
<Position x="162" y="378" />
</GuiSettings>
<Properties>
<Configuration>
<Passwords />
<File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv</File>
<FormatSpecificOptions>
<HeaderRow>True</HeaderRow>
<IgnoreErrors>False</IgnoreErrors>
<AllowShareWrite>False</AllowShareWrite>
<ImportLine>1</ImportLine>
<FieldLen>254</FieldLen>
<SingleThreadRead>False</SingleThreadRead>
<IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
<Delimeter>,</Delimeter>
<QuoteRecordBreak>False</QuoteRecordBreak>
<CodePage>28591</CodePage>
</FormatSpecificOptions>
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>products.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
<MetaInfo connection="Output">
<RecordInfo>
<Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
<Field name="Product_Name" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
<Field name="Product_Category" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
<Field name="Product_Cost" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
<Field name="Product_Price" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
</RecordInfo>
</MetaInfo>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
</Node>
<Node ToolID="3">
<GuiSettings Plugin="AlteryxBasePluginsGui.Join.Join">
<Position x="558" y="282" />
</GuiSettings>
<Properties>
<Configuration joinByRecordPos="False">
<JoinInfo connection="Left">
<Field field="Product_ID" />
</JoinInfo>
<JoinInfo connection="Right">
<Field field="Product_ID" />
</JoinInfo>
<SelectConfiguration>
<Configuration outputConnection="Join">
<OrderChanged value="False" />
<CommaDecimal value="False" />
<SelectFields>
<SelectField field="Right_Product_ID" selected="True" rename="Right_Product_ID" input="Right_" />
<SelectField field="*Unknown" selected="True" />
</SelectFields>
</Configuration>
</SelectConfiguration>
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText />
<Left value="False" />
</Annotation>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxJoin" />
</Node>
<Node ToolID="4">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
<Position x="162" y="270" />
</GuiSettings>
<Properties>
<Configuration>
<Passwords />
<File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv</File>
<FormatSpecificOptions>
<HeaderRow>True</HeaderRow>
<IgnoreErrors>False</IgnoreErrors>
<AllowShareWrite>False</AllowShareWrite>
<ImportLine>1</ImportLine>
<FieldLen>254</FieldLen>
<SingleThreadRead>False</SingleThreadRead>
<IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
<Delimeter>,</Delimeter>
<QuoteRecordBreak>False</QuoteRecordBreak>
<CodePage>28591</CodePage>
</FormatSpecificOptions>
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>inventory.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
<MetaInfo connection="Output">
<RecordInfo>
<Field name="Store_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
<Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
<Field name="Stock_On_Hand" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
</RecordInfo>
</MetaInfo>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
</Node>
<Node ToolID="6">
<GuiSettings Plugin="AlteryxBasePluginsGui.Union.Union">
<Position x="402" y="378" />
</GuiSettings>
<Properties>
<Configuration>
<ByName_ErrorMode>Warning</ByName_ErrorMode>
<ByName_OutputMode>All</ByName_OutputMode>
<Mode>ByName</Mode>
<SetOutputOrder value="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText />
<Left value="False" />
</Annotation>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxUnion" />
</Node>
<Node ToolID="7">
<GuiSettings Plugin="AlteryxBasePluginsGui.Sample.Sample">
<Position x="258" y="378" />
</GuiSettings>
<Properties>
<Configuration>
<Mode>First</Mode>
<N>30</N>
<GroupFields orderChanged="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>First 30 rows</DefaultAnnotationText>
<Left value="False" />
</Annotation>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxSample" />
</Node>
<Node ToolID="8">
<GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
<Position x="258" y="474" />
</GuiSettings>
<Properties>
<Configuration>
<NumRows value="1" />
<Fields>
<Field name="Product_ID" />
<Field name="Product_Name" />
<Field name="Product_Category" />
<Field name="Product_Cost" />
<Field name="Product_Price" />
</Fields>
<Data>
<r>
<c>100</c>
<c>Non-product</c>
<c>NoCat</c>
<c>$1</c>
<c>$1</c>
</r>
</Data>
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText />
<Left value="False" />
</Annotation>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxTextInput" />
</Node>
<Node ToolID="9">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
<Position x="510" y="438" />
</GuiSettings>
<Properties>
<Configuration>
<File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Products_before_join.csv</File>
<Passwords />
<Disable>False</Disable>
<FormatSpecificOptions>
<LineEndStyle>CRLF</LineEndStyle>
<Delimeter>,</Delimeter>
<ForceQuotes>False</ForceQuotes>
<HeaderRow>True</HeaderRow>
<CodePage>28591</CodePage>
<WriteBOM>True</WriteBOM>
</FormatSpecificOptions>
<MultiFile value="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>Products_before_join.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
</Node>
<Node ToolID="10">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
<Position x="774" y="414" />
</GuiSettings>
<Properties>
<Configuration>
<File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Join_out_R.csv</File>
<Passwords />
<Disable>False</Disable>
<FormatSpecificOptions>
<LineEndStyle>CRLF</LineEndStyle>
<Delimeter>,</Delimeter>
<ForceQuotes>False</ForceQuotes>
<HeaderRow>True</HeaderRow>
<CodePage>28591</CodePage>
<WriteBOM>True</WriteBOM>
</FormatSpecificOptions>
<MultiFile value="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>Join_out_R.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
</Node>
<Node ToolID="11">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
<Position x="774" y="282" />
</GuiSettings>
<Properties>
<Configuration>
<File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Join_out_J.csv</File>
<Passwords />
<Disable>False</Disable>
<FormatSpecificOptions>
<LineEndStyle>CRLF</LineEndStyle>
<Delimeter>,</Delimeter>
<ForceQuotes>False</ForceQuotes>
<HeaderRow>True</HeaderRow>
<CodePage>28591</CodePage>
<WriteBOM>True</WriteBOM>
</FormatSpecificOptions>
<MultiFile value="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>Join_out_J.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
</Node>
<Node ToolID="12">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
<Position x="774" y="138" />
</GuiSettings>
<Properties>
<Configuration>
<File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Join_out_L.csv</File>
<Passwords />
<Disable>False</Disable>
<FormatSpecificOptions>
<LineEndStyle>CRLF</LineEndStyle>
<Delimeter>,</Delimeter>
<ForceQuotes>False</ForceQuotes>
<HeaderRow>True</HeaderRow>
<CodePage>28591</CodePage>
<WriteBOM>True</WriteBOM>
</FormatSpecificOptions>
<MultiFile value="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>Join_out_L.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
</Node>
</Nodes>
<Connections>
<Connection>
<Origin ToolID="2" Connection="Output" />
<Destination ToolID="7" Connection="Input" />
</Connection>
<Connection>
<Origin ToolID="3" Connection="Right" />
<Destination ToolID="10" Connection="Input" />
</Connection>
<Connection>
<Origin ToolID="3" Connection="Join" />
<Destination ToolID="11" Connection="Input" />
</Connection>
<Connection>
<Origin ToolID="3" Connection="Left" />
<Destination ToolID="12" Connection="Input" />
</Connection>
<Connection>
<Origin ToolID="4" Connection="Output" />
<Destination ToolID="3" Connection="Left" />
</Connection>
<Connection>
<Origin ToolID="6" Connection="Output" />
<Destination ToolID="3" Connection="Right" />
</Connection>
<Connection>
<Origin ToolID="6" Connection="Output" />
<Destination ToolID="9" Connection="Input" />
</Connection>
<Connection name="#1">
<Origin ToolID="7" Connection="Output" />
<Destination ToolID="6" Connection="Input" />
</Connection>
<Connection name="#2">
<Origin ToolID="8" Connection="Output" />
<Destination ToolID="6" Connection="Input" />
</Connection>
</Connections>
<Properties>
<Memory default="True" />
<GlobalRecordLimit value="0" />
<TempFiles default="True" />
<Annotation on="True" includeToolName="False" />
<ConvErrorLimit value="10" />
<ConvErrorLimit_Stop value="False" />
<CancelOnError value="False" />
<DisableBrowse value="False" />
<EnablePerformanceProfiling value="False" />
<RunWithE2 value="True" />
<PredictiveToolsCodePage value="1252" />
<DisableAllOutput value="False" />
<ShowAllMacroMessages value="False" />
<ShowConnectionStatusIsOn value="True" />
<ShowConnectionStatusOnlyWhenRunning value="True" />
<ZoomLevel value="0" />
<LayoutType>Horizontal</LayoutType>
<MetaInfo>
<NameIsFileName value="True" />
<Name>JoinTesting</Name>
<Description />
<RootToolName />
<ToolVersion />
<ToolInDb value="False" />
<CategoryName />
<SearchTags />
<Author />
<Company />
<Copyright />
<DescriptionLink actual="" displayed="" />
<Example>
<Description />
<File />
</Example>
<WorkflowId value="b5723901-81a1-4ff2-8b26-42700d914cb2" />
<Telemetry>
<PreviousWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
<OriginWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
</Telemetry>
</MetaInfo>
<Events>
<Enabled value="True" />
</Events>
</Properties>
</AlteryxDocument>

View File

@ -0,0 +1,388 @@
<?xml version="1.0"?>
<AlteryxDocument yxmdVer="2022.3">
<Nodes>
<Node ToolID="2">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
<Position x="162" y="378" />
</GuiSettings>
<Properties>
<Configuration>
<Passwords />
<File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">..\Maven Toys Data\products.csv</File>
<FormatSpecificOptions>
<HeaderRow>True</HeaderRow>
<IgnoreErrors>False</IgnoreErrors>
<AllowShareWrite>False</AllowShareWrite>
<ImportLine>1</ImportLine>
<FieldLen>254</FieldLen>
<SingleThreadRead>False</SingleThreadRead>
<IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
<Delimeter>,</Delimeter>
<QuoteRecordBreak>False</QuoteRecordBreak>
<CodePage>28591</CodePage>
</FormatSpecificOptions>
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>products.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
<MetaInfo connection="Output">
<RecordInfo>
<Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
<Field name="Product_Name" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
<Field name="Product_Category" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
<Field name="Product_Cost" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
<Field name="Product_Price" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
</RecordInfo>
</MetaInfo>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
</Node>
<Node ToolID="3">
<GuiSettings Plugin="AlteryxBasePluginsGui.Join.Join">
<Position x="558" y="282" />
</GuiSettings>
<Properties>
<Configuration joinByRecordPos="False">
<JoinInfo connection="Left">
<Field field="Product_ID" />
</JoinInfo>
<JoinInfo connection="Right">
<Field field="Product_ID" />
</JoinInfo>
<SelectConfiguration>
<Configuration outputConnection="Join">
<OrderChanged value="False" />
<CommaDecimal value="False" />
<SelectFields>
<SelectField field="Right_Product_ID" selected="True" rename="Right_Product_ID" input="Right_" />
<SelectField field="*Unknown" selected="True" />
</SelectFields>
</Configuration>
</SelectConfiguration>
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText />
<Left value="False" />
</Annotation>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxJoin" />
</Node>
<Node ToolID="4">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
<Position x="162" y="270" />
</GuiSettings>
<Properties>
<Configuration>
<Passwords />
<File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">..\Maven Toys Data\inventory.csv</File>
<FormatSpecificOptions>
<HeaderRow>True</HeaderRow>
<IgnoreErrors>False</IgnoreErrors>
<AllowShareWrite>False</AllowShareWrite>
<ImportLine>1</ImportLine>
<FieldLen>254</FieldLen>
<SingleThreadRead>False</SingleThreadRead>
<IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
<Delimeter>,</Delimeter>
<QuoteRecordBreak>False</QuoteRecordBreak>
<CodePage>28591</CodePage>
</FormatSpecificOptions>
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>inventory.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
<MetaInfo connection="Output">
<RecordInfo>
<Field name="Store_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
<Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
<Field name="Stock_On_Hand" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
</RecordInfo>
</MetaInfo>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
</Node>
<Node ToolID="6">
<GuiSettings Plugin="AlteryxBasePluginsGui.Union.Union">
<Position x="402" y="378" />
</GuiSettings>
<Properties>
<Configuration>
<ByName_ErrorMode>Warning</ByName_ErrorMode>
<ByName_OutputMode>All</ByName_OutputMode>
<Mode>ByName</Mode>
<SetOutputOrder value="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText />
<Left value="False" />
</Annotation>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxUnion" />
</Node>
<Node ToolID="7">
<GuiSettings Plugin="AlteryxBasePluginsGui.Sample.Sample">
<Position x="258" y="378" />
</GuiSettings>
<Properties>
<Configuration>
<Mode>First</Mode>
<N>30</N>
<GroupFields orderChanged="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>First 30 rows</DefaultAnnotationText>
<Left value="False" />
</Annotation>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxSample" />
</Node>
<Node ToolID="8">
<GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
<Position x="258" y="474" />
</GuiSettings>
<Properties>
<Configuration>
<NumRows value="1" />
<Fields>
<Field name="Product_ID" />
<Field name="Product_Name" />
<Field name="Product_Category" />
<Field name="Product_Cost" />
<Field name="Product_Price" />
</Fields>
<Data>
<r>
<c>100</c>
<c>Non-product</c>
<c>NoCat</c>
<c>$1</c>
<c>$1</c>
</r>
</Data>
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText />
<Left value="False" />
</Annotation>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxTextInput" />
</Node>
<Node ToolID="9">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
<Position x="510" y="438" />
</GuiSettings>
<Properties>
<Configuration>
<File MaxRecords="" FileFormat="0">.\Output\Products_before_join.csv</File>
<Passwords />
<Disable>False</Disable>
<FormatSpecificOptions>
<LineEndStyle>CRLF</LineEndStyle>
<Delimeter>,</Delimeter>
<ForceQuotes>False</ForceQuotes>
<HeaderRow>True</HeaderRow>
<CodePage>28591</CodePage>
<WriteBOM>True</WriteBOM>
</FormatSpecificOptions>
<MultiFile value="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>Products_before_join.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
</Node>
<Node ToolID="10">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
<Position x="774" y="414" />
</GuiSettings>
<Properties>
<Configuration>
<File MaxRecords="" FileFormat="0">.\Output\Join_out_R.csv</File>
<Passwords />
<Disable>False</Disable>
<FormatSpecificOptions>
<LineEndStyle>CRLF</LineEndStyle>
<Delimeter>,</Delimeter>
<ForceQuotes>False</ForceQuotes>
<HeaderRow>True</HeaderRow>
<CodePage>28591</CodePage>
<WriteBOM>True</WriteBOM>
</FormatSpecificOptions>
<MultiFile value="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>Join_out_R.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
</Node>
<Node ToolID="11">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
<Position x="774" y="282" />
</GuiSettings>
<Properties>
<Configuration>
<File MaxRecords="" FileFormat="0">.\Output\Join_out_J.csv</File>
<Passwords />
<Disable>False</Disable>
<FormatSpecificOptions>
<LineEndStyle>CRLF</LineEndStyle>
<Delimeter>,</Delimeter>
<ForceQuotes>False</ForceQuotes>
<HeaderRow>True</HeaderRow>
<CodePage>28591</CodePage>
<WriteBOM>True</WriteBOM>
</FormatSpecificOptions>
<MultiFile value="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>Join_out_J.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
</Node>
<Node ToolID="12">
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
<Position x="774" y="138" />
</GuiSettings>
<Properties>
<Configuration>
<File MaxRecords="" FileFormat="0">.\Output\Join_out_L.csv</File>
<Passwords />
<Disable>False</Disable>
<FormatSpecificOptions>
<LineEndStyle>CRLF</LineEndStyle>
<Delimeter>,</Delimeter>
<ForceQuotes>False</ForceQuotes>
<HeaderRow>True</HeaderRow>
<CodePage>28591</CodePage>
<WriteBOM>True</WriteBOM>
</FormatSpecificOptions>
<MultiFile value="False" />
</Configuration>
<Annotation DisplayMode="0">
<Name />
<DefaultAnnotationText>Join_out_L.csv</DefaultAnnotationText>
<Left value="False" />
</Annotation>
<Dependencies>
<Implicit />
</Dependencies>
</Properties>
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
</Node>
</Nodes>
<Connections>
<Connection>
<Origin ToolID="2" Connection="Output" />
<Destination ToolID="7" Connection="Input" />
</Connection>
<Connection>
<Origin ToolID="3" Connection="Right" />
<Destination ToolID="10" Connection="Input" />
</Connection>
<Connection>
<Origin ToolID="3" Connection="Join" />
<Destination ToolID="11" Connection="Input" />
</Connection>
<Connection>
<Origin ToolID="3" Connection="Left" />
<Destination ToolID="12" Connection="Input" />
</Connection>
<Connection>
<Origin ToolID="4" Connection="Output" />
<Destination ToolID="3" Connection="Left" />
</Connection>
<Connection>
<Origin ToolID="6" Connection="Output" />
<Destination ToolID="3" Connection="Right" />
</Connection>
<Connection>
<Origin ToolID="6" Connection="Output" />
<Destination ToolID="9" Connection="Input" />
</Connection>
<Connection name="#1">
<Origin ToolID="7" Connection="Output" />
<Destination ToolID="6" Connection="Input" />
</Connection>
<Connection name="#2">
<Origin ToolID="8" Connection="Output" />
<Destination ToolID="6" Connection="Input" />
</Connection>
</Connections>
<Properties>
<Memory default="True" />
<GlobalRecordLimit value="0" />
<TempFiles default="True" />
<Annotation on="True" includeToolName="False" />
<ConvErrorLimit value="10" />
<ConvErrorLimit_Stop value="False" />
<CancelOnError value="False" />
<DisableBrowse value="False" />
<EnablePerformanceProfiling value="False" />
<RunWithE2 value="True" />
<PredictiveToolsCodePage value="1252" />
<DisableAllOutput value="False" />
<ShowAllMacroMessages value="False" />
<ShowConnectionStatusIsOn value="True" />
<ShowConnectionStatusOnlyWhenRunning value="True" />
<ZoomLevel value="0" />
<LayoutType>Horizontal</LayoutType>
<MetaInfo>
<NameIsFileName value="True" />
<Name>JoinTesting</Name>
<Description />
<RootToolName />
<ToolVersion />
<ToolInDb value="False" />
<CategoryName />
<SearchTags />
<Author />
<Company />
<Copyright />
<DescriptionLink actual="" displayed="" />
<Example>
<Description />
<File />
</Example>
<WorkflowId value="b5723901-81a1-4ff2-8b26-42700d914cb2" />
<Telemetry>
<PreviousWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
<OriginWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
</Telemetry>
</MetaInfo>
<Events>
<Enabled value="True" />
</Events>
</Properties>
</AlteryxDocument>

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,226 @@
Left_Store_ID,Product_ID,Left_Stock_On_Hand
15,31,4
15,32,16
15,33,8
15,34,7
16,31,14
16,32,7
16,33,6
16,34,2
16,35,6
17,31,20
17,32,15
17,33,27
17,34,11
18,31,4
18,32,9
18,33,9
18,34,8
18,35,10
19,31,4
19,32,5
19,33,0
19,34,15
19,35,14
20,31,10
20,32,9
20,33,28
20,34,19
21,31,19
21,32,3
21,33,16
21,34,16
22,31,34
22,32,38
22,33,8
22,34,6
22,35,2
23,31,19
23,32,11
23,33,6
23,34,18
23,35,4
24,31,10
24,32,10
24,33,4
24,34,17
24,35,19
25,31,0
25,32,10
25,33,4
25,34,23
26,31,4
26,32,2
26,33,2
26,34,17
26,35,8
27,31,13
27,32,6
27,33,7
27,34,9
28,31,18
28,32,3
28,33,9
28,34,19
29,31,3
29,32,7
29,33,6
29,34,16
30,31,20
30,32,13
30,33,10
30,34,18
31,31,39
31,32,12
31,33,20
31,34,20
32,31,4
32,32,8
32,33,13
32,34,20
33,31,7
33,32,15
33,33,9
33,34,14
33,35,18
34,31,30
34,32,19
34,33,9
34,34,17
34,35,20
35,31,74
35,32,20
35,33,14
35,34,9
36,31,6
36,32,7
36,33,21
36,34,2
36,35,12
37,31,14
37,32,0
37,33,10
37,34,13
37,35,14
38,31,17
38,32,20
38,33,9
38,34,18
38,35,2
39,31,15
39,32,5
39,33,14
39,34,4
40,31,5
40,32,7
40,33,16
40,34,5
41,31,18
41,32,29
41,33,13
41,34,15
41,35,10
1,31,7
1,32,4
1,33,2
1,34,0
1,35,12
2,31,18
2,32,10
2,33,11
2,34,18
3,31,29
3,32,4
3,33,4
3,34,7
4,31,35
4,32,6
4,33,2
4,34,0
4,35,4
5,31,31
5,32,10
5,33,17
5,34,10
6,31,17
6,32,7
6,33,7
6,34,8
6,35,3
7,31,15
7,32,3
7,33,18
7,34,2
7,35,17
8,31,27
8,32,7
8,33,17
8,34,18
8,35,8
9,31,6
9,32,3
9,33,9
9,34,5
9,35,4
10,31,7
10,32,13
10,33,12
10,34,16
10,35,2
11,31,20
11,32,4
11,33,6
11,34,9
12,31,13
12,32,9
12,33,5
12,34,9
12,35,9
13,31,24
13,32,7
13,33,3
13,34,3
14,31,5
14,32,2
14,33,2
14,34,8
42,31,11
42,32,4
42,33,18
42,34,34
42,35,13
43,31,18
43,32,38
43,33,5
43,34,7
44,31,8
44,32,29
44,33,0
44,34,22
45,31,6
45,32,6
45,33,7
45,34,3
46,31,13
46,32,8
46,33,11
46,34,24
47,31,48
47,32,6
47,33,13
47,34,3
48,31,41
48,32,7
48,33,0
48,34,39
48,35,3
49,31,51
49,32,11
49,33,15
49,34,2
49,35,19
50,31,18
50,32,9
50,33,1
50,34,17
50,35,8
1 Left_Store_ID Product_ID Left_Stock_On_Hand
2 15 31 4
3 15 32 16
4 15 33 8
5 15 34 7
6 16 31 14
7 16 32 7
8 16 33 6
9 16 34 2
10 16 35 6
11 17 31 20
12 17 32 15
13 17 33 27
14 17 34 11
15 18 31 4
16 18 32 9
17 18 33 9
18 18 34 8
19 18 35 10
20 19 31 4
21 19 32 5
22 19 33 0
23 19 34 15
24 19 35 14
25 20 31 10
26 20 32 9
27 20 33 28
28 20 34 19
29 21 31 19
30 21 32 3
31 21 33 16
32 21 34 16
33 22 31 34
34 22 32 38
35 22 33 8
36 22 34 6
37 22 35 2
38 23 31 19
39 23 32 11
40 23 33 6
41 23 34 18
42 23 35 4
43 24 31 10
44 24 32 10
45 24 33 4
46 24 34 17
47 24 35 19
48 25 31 0
49 25 32 10
50 25 33 4
51 25 34 23
52 26 31 4
53 26 32 2
54 26 33 2
55 26 34 17
56 26 35 8
57 27 31 13
58 27 32 6
59 27 33 7
60 27 34 9
61 28 31 18
62 28 32 3
63 28 33 9
64 28 34 19
65 29 31 3
66 29 32 7
67 29 33 6
68 29 34 16
69 30 31 20
70 30 32 13
71 30 33 10
72 30 34 18
73 31 31 39
74 31 32 12
75 31 33 20
76 31 34 20
77 32 31 4
78 32 32 8
79 32 33 13
80 32 34 20
81 33 31 7
82 33 32 15
83 33 33 9
84 33 34 14
85 33 35 18
86 34 31 30
87 34 32 19
88 34 33 9
89 34 34 17
90 34 35 20
91 35 31 74
92 35 32 20
93 35 33 14
94 35 34 9
95 36 31 6
96 36 32 7
97 36 33 21
98 36 34 2
99 36 35 12
100 37 31 14
101 37 32 0
102 37 33 10
103 37 34 13
104 37 35 14
105 38 31 17
106 38 32 20
107 38 33 9
108 38 34 18
109 38 35 2
110 39 31 15
111 39 32 5
112 39 33 14
113 39 34 4
114 40 31 5
115 40 32 7
116 40 33 16
117 40 34 5
118 41 31 18
119 41 32 29
120 41 33 13
121 41 34 15
122 41 35 10
123 1 31 7
124 1 32 4
125 1 33 2
126 1 34 0
127 1 35 12
128 2 31 18
129 2 32 10
130 2 33 11
131 2 34 18
132 3 31 29
133 3 32 4
134 3 33 4
135 3 34 7
136 4 31 35
137 4 32 6
138 4 33 2
139 4 34 0
140 4 35 4
141 5 31 31
142 5 32 10
143 5 33 17
144 5 34 10
145 6 31 17
146 6 32 7
147 6 33 7
148 6 34 8
149 6 35 3
150 7 31 15
151 7 32 3
152 7 33 18
153 7 34 2
154 7 35 17
155 8 31 27
156 8 32 7
157 8 33 17
158 8 34 18
159 8 35 8
160 9 31 6
161 9 32 3
162 9 33 9
163 9 34 5
164 9 35 4
165 10 31 7
166 10 32 13
167 10 33 12
168 10 34 16
169 10 35 2
170 11 31 20
171 11 32 4
172 11 33 6
173 11 34 9
174 12 31 13
175 12 32 9
176 12 33 5
177 12 34 9
178 12 35 9
179 13 31 24
180 13 32 7
181 13 33 3
182 13 34 3
183 14 31 5
184 14 32 2
185 14 33 2
186 14 34 8
187 42 31 11
188 42 32 4
189 42 33 18
190 42 34 34
191 42 35 13
192 43 31 18
193 43 32 38
194 43 33 5
195 43 34 7
196 44 31 8
197 44 32 29
198 44 33 0
199 44 34 22
200 45 31 6
201 45 32 6
202 45 33 7
203 45 34 3
204 46 31 13
205 46 32 8
206 46 33 11
207 46 34 24
208 47 31 48
209 47 32 6
210 47 33 13
211 47 34 3
212 48 31 41
213 48 32 7
214 48 33 0
215 48 34 39
216 48 35 3
217 49 31 51
218 49 32 11
219 49 33 15
220 49 34 2
221 49 35 19
222 50 31 18
223 50 32 9
224 50 33 1
225 50 34 17
226 50 35 8

View File

@ -0,0 +1,2 @@
Product_ID,Right_Product_Name,Right_Product_Category,Right_Product_Cost,Right_Product_Price
100,Non-product,NoCat,$1,$1
1 Product_ID Right_Product_Name Right_Product_Category Right_Product_Cost Right_Product_Price
2 100 Non-product NoCat $1 $1

View File

@ -0,0 +1,32 @@
Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
1,Action Figure,Toys,$9.99,$15.99
2,Animal Figures,Toys,$9.99,$12.99
3,Barrel O' Slime,Art & Crafts,$1.99,$3.99
4,Chutes & Ladders,Games,$9.99,$12.99
5,Classic Dominoes,Games,$7.99,$9.99
6,Colorbuds,Electronics,$6.99,$14.99
7,Dart Gun,Sports & Outdoors,$11.99,$15.99
8,Deck Of Cards,Games,$3.99,$6.99
9,Dino Egg,Toys,$9.99,$10.99
10,Dinosaur Figures,Toys,$10.99,$14.99
11,Etch A Sketch,Art & Crafts,$10.99,$20.99
12,Foam Disk Launcher,Sports & Outdoors,$8.99,$11.99
13,Gamer Headphones,Electronics,$14.99,$20.99
14,Glass Marbles,Games,$5.99,$10.99
15,Hot Wheels 5-Pack,Toys,$3.99,$5.99
16,Jenga,Games,$2.99,$9.99
17,Kids Makeup Kit,Art & Crafts,$13.99,$19.99
18,Lego Bricks,Toys,$34.99,$39.99
19,Magic Sand,Art & Crafts,$13.99,$15.99
20,Mini Basketball Hoop,Sports & Outdoors,$8.99,$24.99
21,Mini Ping Pong Set,Sports & Outdoors,$6.99,$9.99
22,Monopoly,Games,$13.99,$19.99
23,Mr. Potatohead,Toys,$4.99,$9.99
24,Nerf Gun,Sports & Outdoors,$14.99,$19.99
25,PlayDoh Can,Art & Crafts,$1.99,$2.99
26,PlayDoh Playset,Art & Crafts,$20.99,$24.99
27,PlayDoh Toolkit,Art & Crafts,$3.99,$4.99
28,Playfoam,Art & Crafts,$3.99,$10.99
29,Plush Pony,Toys,$8.99,$19.99
30,Rubik's Cube,Games,$17.99,$19.99
100,Non-product,NoCat,$1,$1
1 Product_ID Product_Name Product_Category Product_Cost Product_Price
2 1 Action Figure Toys $9.99 $15.99
3 2 Animal Figures Toys $9.99 $12.99
4 3 Barrel O' Slime Art & Crafts $1.99 $3.99
5 4 Chutes & Ladders Games $9.99 $12.99
6 5 Classic Dominoes Games $7.99 $9.99
7 6 Colorbuds Electronics $6.99 $14.99
8 7 Dart Gun Sports & Outdoors $11.99 $15.99
9 8 Deck Of Cards Games $3.99 $6.99
10 9 Dino Egg Toys $9.99 $10.99
11 10 Dinosaur Figures Toys $10.99 $14.99
12 11 Etch A Sketch Art & Crafts $10.99 $20.99
13 12 Foam Disk Launcher Sports & Outdoors $8.99 $11.99
14 13 Gamer Headphones Electronics $14.99 $20.99
15 14 Glass Marbles Games $5.99 $10.99
16 15 Hot Wheels 5-Pack Toys $3.99 $5.99
17 16 Jenga Games $2.99 $9.99
18 17 Kids Makeup Kit Art & Crafts $13.99 $19.99
19 18 Lego Bricks Toys $34.99 $39.99
20 19 Magic Sand Art & Crafts $13.99 $15.99
21 20 Mini Basketball Hoop Sports & Outdoors $8.99 $24.99
22 21 Mini Ping Pong Set Sports & Outdoors $6.99 $9.99
23 22 Monopoly Games $13.99 $19.99
24 23 Mr. Potatohead Toys $4.99 $9.99
25 24 Nerf Gun Sports & Outdoors $14.99 $19.99
26 25 PlayDoh Can Art & Crafts $1.99 $2.99
27 26 PlayDoh Playset Art & Crafts $20.99 $24.99
28 27 PlayDoh Toolkit Art & Crafts $3.99 $4.99
29 28 Playfoam Art & Crafts $3.99 $10.99
30 29 Plush Pony Toys $8.99 $19.99
31 30 Rubik's Cube Games $17.99 $19.99
32 100 Non-product NoCat $1 $1

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,226 @@
Store_ID,Product_ID,Stock_On_Hand
1,32,4
2,32,10
3,32,4
4,32,6
5,32,10
6,32,7
7,32,3
8,32,7
9,32,3
10,32,13
11,32,4
12,32,9
13,32,7
14,32,2
15,32,16
16,32,7
17,32,15
18,32,9
19,32,5
20,32,9
21,32,3
22,32,38
23,32,11
24,32,10
25,32,10
26,32,2
27,32,6
28,32,3
29,32,7
30,32,13
31,32,12
32,32,8
33,32,15
34,32,19
35,32,20
36,32,7
37,32,0
38,32,20
39,32,5
40,32,7
41,32,29
42,32,4
43,32,38
44,32,29
45,32,6
46,32,8
47,32,6
48,32,7
49,32,11
50,32,9
1,31,7
2,31,18
3,31,29
4,31,35
5,31,31
6,31,17
7,31,15
8,31,27
9,31,6
10,31,7
11,31,20
12,31,13
13,31,24
14,31,5
15,31,4
16,31,14
17,31,20
18,31,4
19,31,4
20,31,10
21,31,19
22,31,34
23,31,19
24,31,10
25,31,0
26,31,4
27,31,13
28,31,18
29,31,3
30,31,20
31,31,39
32,31,4
33,31,7
34,31,30
35,31,74
36,31,6
37,31,14
38,31,17
39,31,15
40,31,5
41,31,18
42,31,11
43,31,18
44,31,8
45,31,6
46,31,13
47,31,48
48,31,41
49,31,51
50,31,18
1,35,12
4,35,4
6,35,3
7,35,17
8,35,8
9,35,4
10,35,2
12,35,9
16,35,6
18,35,10
19,35,14
22,35,2
23,35,4
24,35,19
26,35,8
33,35,18
34,35,20
36,35,12
37,35,14
38,35,2
41,35,10
42,35,13
48,35,3
49,35,19
50,35,8
1,34,0
2,34,18
3,34,7
4,34,0
5,34,10
6,34,8
7,34,2
8,34,18
9,34,5
10,34,16
11,34,9
12,34,9
13,34,3
14,34,8
15,34,7
16,34,2
17,34,11
18,34,8
19,34,15
20,34,19
21,34,16
22,34,6
23,34,18
24,34,17
25,34,23
26,34,17
27,34,9
28,34,19
29,34,16
30,34,18
31,34,20
32,34,20
33,34,14
34,34,17
35,34,9
36,34,2
37,34,13
38,34,18
39,34,4
40,34,5
41,34,15
42,34,34
43,34,7
44,34,22
45,34,3
46,34,24
47,34,3
48,34,39
49,34,2
50,34,17
1,33,2
2,33,11
3,33,4
4,33,2
5,33,17
6,33,7
7,33,18
8,33,17
9,33,9
10,33,12
11,33,6
12,33,5
13,33,3
14,33,2
15,33,8
16,33,6
17,33,27
18,33,9
19,33,0
20,33,28
21,33,16
22,33,8
23,33,6
24,33,4
25,33,4
26,33,2
27,33,7
28,33,9
29,33,6
30,33,10
31,33,20
32,33,13
33,33,9
34,33,9
35,33,14
36,33,21
37,33,10
38,33,9
39,33,14
40,33,16
41,33,13
42,33,18
43,33,5
44,33,0
45,33,7
46,33,11
47,33,13
48,33,0
49,33,15
50,33,1
1 Store_ID Product_ID Stock_On_Hand
2 1 32 4
3 2 32 10
4 3 32 4
5 4 32 6
6 5 32 10
7 6 32 7
8 7 32 3
9 8 32 7
10 9 32 3
11 10 32 13
12 11 32 4
13 12 32 9
14 13 32 7
15 14 32 2
16 15 32 16
17 16 32 7
18 17 32 15
19 18 32 9
20 19 32 5
21 20 32 9
22 21 32 3
23 22 32 38
24 23 32 11
25 24 32 10
26 25 32 10
27 26 32 2
28 27 32 6
29 28 32 3
30 29 32 7
31 30 32 13
32 31 32 12
33 32 32 8
34 33 32 15
35 34 32 19
36 35 32 20
37 36 32 7
38 37 32 0
39 38 32 20
40 39 32 5
41 40 32 7
42 41 32 29
43 42 32 4
44 43 32 38
45 44 32 29
46 45 32 6
47 46 32 8
48 47 32 6
49 48 32 7
50 49 32 11
51 50 32 9
52 1 31 7
53 2 31 18
54 3 31 29
55 4 31 35
56 5 31 31
57 6 31 17
58 7 31 15
59 8 31 27
60 9 31 6
61 10 31 7
62 11 31 20
63 12 31 13
64 13 31 24
65 14 31 5
66 15 31 4
67 16 31 14
68 17 31 20
69 18 31 4
70 19 31 4
71 20 31 10
72 21 31 19
73 22 31 34
74 23 31 19
75 24 31 10
76 25 31 0
77 26 31 4
78 27 31 13
79 28 31 18
80 29 31 3
81 30 31 20
82 31 31 39
83 32 31 4
84 33 31 7
85 34 31 30
86 35 31 74
87 36 31 6
88 37 31 14
89 38 31 17
90 39 31 15
91 40 31 5
92 41 31 18
93 42 31 11
94 43 31 18
95 44 31 8
96 45 31 6
97 46 31 13
98 47 31 48
99 48 31 41
100 49 31 51
101 50 31 18
102 1 35 12
103 4 35 4
104 6 35 3
105 7 35 17
106 8 35 8
107 9 35 4
108 10 35 2
109 12 35 9
110 16 35 6
111 18 35 10
112 19 35 14
113 22 35 2
114 23 35 4
115 24 35 19
116 26 35 8
117 33 35 18
118 34 35 20
119 36 35 12
120 37 35 14
121 38 35 2
122 41 35 10
123 42 35 13
124 48 35 3
125 49 35 19
126 50 35 8
127 1 34 0
128 2 34 18
129 3 34 7
130 4 34 0
131 5 34 10
132 6 34 8
133 7 34 2
134 8 34 18
135 9 34 5
136 10 34 16
137 11 34 9
138 12 34 9
139 13 34 3
140 14 34 8
141 15 34 7
142 16 34 2
143 17 34 11
144 18 34 8
145 19 34 15
146 20 34 19
147 21 34 16
148 22 34 6
149 23 34 18
150 24 34 17
151 25 34 23
152 26 34 17
153 27 34 9
154 28 34 19
155 29 34 16
156 30 34 18
157 31 34 20
158 32 34 20
159 33 34 14
160 34 34 17
161 35 34 9
162 36 34 2
163 37 34 13
164 38 34 18
165 39 34 4
166 40 34 5
167 41 34 15
168 42 34 34
169 43 34 7
170 44 34 22
171 45 34 3
172 46 34 24
173 47 34 3
174 48 34 39
175 49 34 2
176 50 34 17
177 1 33 2
178 2 33 11
179 3 33 4
180 4 33 2
181 5 33 17
182 6 33 7
183 7 33 18
184 8 33 17
185 9 33 9
186 10 33 12
187 11 33 6
188 12 33 5
189 13 33 3
190 14 33 2
191 15 33 8
192 16 33 6
193 17 33 27
194 18 33 9
195 19 33 0
196 20 33 28
197 21 33 16
198 22 33 8
199 23 33 6
200 24 33 4
201 25 33 4
202 26 33 2
203 27 33 7
204 28 33 9
205 29 33 6
206 30 33 10
207 31 33 20
208 32 33 13
209 33 33 9
210 34 33 9
211 35 33 14
212 36 33 21
213 37 33 10
214 38 33 9
215 39 33 14
216 40 33 16
217 41 33 13
218 42 33 18
219 43 33 5
220 44 33 0
221 45 33 7
222 46 33 11
223 47 33 13
224 48 33 0
225 49 33 15
226 50 33 1

View File

@ -0,0 +1,2 @@
Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
100,Non-product,NoCat,$1,$1
1 Product_ID Product_Name Product_Category Product_Cost Product_Price
2 100 Non-product NoCat $1 $1

View File

@ -0,0 +1,2 @@
Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
100,Non-product,NoCat,$1,$1
1 Product_ID Product_Name Product_Category Product_Cost Product_Price
2 100 Non-product NoCat $1 $1

View File

@ -0,0 +1,100 @@
# 📚 Maven Toys Dataset Schema Relationship Guide
This document outlines the schema relationships and foreign key connections between all CSV files in this directory, suggesting how they can be joined for comprehensive data analysis.
## 🧩 Entity/Dimension Tables (The "Who" and "What")
These tables define core entities and are typically used as lookup tables.
1. **`stores`**: Information about the physical retail locations.
* **Primary Key (PK):** `Store_ID`
2. **`products`**: Master list of all items sold.
* **Primary Key (PK):** `Product_ID`
3. **`calendar`**: Time dimension data for the business.
* **Primary Key (PK):** `Date` (Assuming unique dates are recorded)
## 📊 Fact/Snapshot Tables (The "When" and "How Much")
These tables record events, measurements, or snapshots in time that link the dimensions together.
1. **`sales`**: The core transaction log. *This is the most frequently joined table.*
* **Foreign Keys (FKs):** `Store_ID` (references `stores`), `Product_ID` (references `products`).
2. **`inventory`**: Snapshot of stock levels at a point in time.
* **Composite Key/FKs:** (`Store_ID`, `Product_ID`) $\to$ Links to both `stores` and `products`.
3. **`data_dictionary`**: Metadata describing the other fields (Not used for joins, but crucial for understanding column definitions).
## 🗓️ Time Dimension
* The **`calendar`** table provides temporal context, which can be joined with `sales` records to analyze performance around holidays or specific periods.
---
# 🔗 Relationship Map and Join Paths
The following sections show the explicit paths you can use for joining data in SQL or Python (Pandas/DuckDB).
### 1. Sales Analysis Path
* **Goal:** Analyzing a transaction's details, location, and item description.
* **Join Chain:** `sales` $\to$ (`stores`, `products`)
* **Example Join:** `FROM sales s JOIN stores st ON s.Store_ID = st.Store_ID JOIN products p ON s.Product_ID = p.Product_ID;`
### 2. Inventory Valuation Path
* **Goal:** Calculating the total value of current stock across all stores.
* **Join Chain:** `inventory` $\to$ (`stores`, `products`)
* **Example Join:** `FROM inventory i JOIN stores st ON i.Store_ID = st.Store_ID JOIN products p ON i.Product_ID = p.Product_ID;`
### 3. Comprehensive Performance Path (The Full Picture)
* **Goal:** Linking sales performance to store location details and calendar dates.
* **Join Chain:** `sales` $\to$ (`stores`, `products`, `calendar`)
* **Notes:** You can join on the date field from both `sales` and `calendar`.
---
# 💡 Example Queries (Ready for Use)
These queries demonstrate how to combine the tables.
### 1. Total Revenue Over Time
Calculate the total revenue generated month-by-month, showing store performance over time.
```sql
SELECT
strftime('%Y-%m', s.Date) AS sales_month, -- Grouping by Year and Month
st.Store_Name,
COUNT(DISTINCT p.Product_ID) AS distinct_products_sold,
SUM(s.Units * p.Product_Price) AS total_monthly_revenue
FROM sales s
JOIN stores st ON s.Store_ID = st.Store_ID
JOIN products p ON s.Product_ID = p.Product_ID
GROUP BY 1, 2
ORDER BY 1 DESC, total_monthly_revenue DESC;
```
### 2. Top Performing Product/Category Analysis
Identify the best-selling categories and the top 5 specific products by units sold.
```sql
SELECT
p.Product_Name,
p.Product_Category,
SUM(s.Units) AS total_units_sold
FROM sales s
JOIN products p ON s.Product_ID = p.Product_ID
GROUP BY 1, 2
ORDER BY total_units_sold DESC
LIMIT 5;
```
### 3. Low Stock Alerts (Inventory Management)
List all stores and products where the current stock is below a specified threshold (e.g., < 50 units).
```sql
SELECT
st.Store_Name,
p.Product_Name,
i.Stock_On_Hand
FROM inventory i
JOIN stores st ON i.Store_ID = st.Store_ID
JOIN products p ON i.Product_ID = p.Product_ID
WHERE i.Stock_On_Hand < 50;
```

View File

@ -0,0 +1,639 @@
Date
1/1/2022
1/2/2022
1/3/2022
1/4/2022
1/5/2022
1/6/2022
1/7/2022
1/8/2022
1/9/2022
1/10/2022
1/11/2022
1/12/2022
1/13/2022
1/14/2022
1/15/2022
1/16/2022
1/17/2022
1/18/2022
1/19/2022
1/20/2022
1/21/2022
1/22/2022
1/23/2022
1/24/2022
1/25/2022
1/26/2022
1/27/2022
1/28/2022
1/29/2022
1/30/2022
1/31/2022
2/1/2022
2/2/2022
2/3/2022
2/4/2022
2/5/2022
2/6/2022
2/7/2022
2/8/2022
2/9/2022
2/10/2022
2/11/2022
2/12/2022
2/13/2022
2/14/2022
2/15/2022
2/16/2022
2/17/2022
2/18/2022
2/19/2022
2/20/2022
2/21/2022
2/22/2022
2/23/2022
2/24/2022
2/25/2022
2/26/2022
2/27/2022
2/28/2022
3/1/2022
3/2/2022
3/3/2022
3/4/2022
3/5/2022
3/6/2022
3/7/2022
3/8/2022
3/9/2022
3/10/2022
3/11/2022
3/12/2022
3/13/2022
3/14/2022
3/15/2022
3/16/2022
3/17/2022
3/18/2022
3/19/2022
3/20/2022
3/21/2022
3/22/2022
3/23/2022
3/24/2022
3/25/2022
3/26/2022
3/27/2022
3/28/2022
3/29/2022
3/30/2022
3/31/2022
4/1/2022
4/2/2022
4/3/2022
4/4/2022
4/5/2022
4/6/2022
4/7/2022
4/8/2022
4/9/2022
4/10/2022
4/11/2022
4/12/2022
4/13/2022
4/14/2022
4/15/2022
4/16/2022
4/17/2022
4/18/2022
4/19/2022
4/20/2022
4/21/2022
4/22/2022
4/23/2022
4/24/2022
4/25/2022
4/26/2022
4/27/2022
4/28/2022
4/29/2022
4/30/2022
5/1/2022
5/2/2022
5/3/2022
5/4/2022
5/5/2022
5/6/2022
5/7/2022
5/8/2022
5/9/2022
5/10/2022
5/11/2022
5/12/2022
5/13/2022
5/14/2022
5/15/2022
5/16/2022
5/17/2022
5/18/2022
5/19/2022
5/20/2022
5/21/2022
5/22/2022
5/23/2022
5/24/2022
5/25/2022
5/26/2022
5/27/2022
5/28/2022
5/29/2022
5/30/2022
5/31/2022
6/1/2022
6/2/2022
6/3/2022
6/4/2022
6/5/2022
6/6/2022
6/7/2022
6/8/2022
6/9/2022
6/10/2022
6/11/2022
6/12/2022
6/13/2022
6/14/2022
6/15/2022
6/16/2022
6/17/2022
6/18/2022
6/19/2022
6/20/2022
6/21/2022
6/22/2022
6/23/2022
6/24/2022
6/25/2022
6/26/2022
6/27/2022
6/28/2022
6/29/2022
6/30/2022
7/1/2022
7/2/2022
7/3/2022
7/4/2022
7/5/2022
7/6/2022
7/7/2022
7/8/2022
7/9/2022
7/10/2022
7/11/2022
7/12/2022
7/13/2022
7/14/2022
7/15/2022
7/16/2022
7/17/2022
7/18/2022
7/19/2022
7/20/2022
7/21/2022
7/22/2022
7/23/2022
7/24/2022
7/25/2022
7/26/2022
7/27/2022
7/28/2022
7/29/2022
7/30/2022
7/31/2022
8/1/2022
8/2/2022
8/3/2022
8/4/2022
8/5/2022
8/6/2022
8/7/2022
8/8/2022
8/9/2022
8/10/2022
8/11/2022
8/12/2022
8/13/2022
8/14/2022
8/15/2022
8/16/2022
8/17/2022
8/18/2022
8/19/2022
8/20/2022
8/21/2022
8/22/2022
8/23/2022
8/24/2022
8/25/2022
8/26/2022
8/27/2022
8/28/2022
8/29/2022
8/30/2022
8/31/2022
9/1/2022
9/2/2022
9/3/2022
9/4/2022
9/5/2022
9/6/2022
9/7/2022
9/8/2022
9/9/2022
9/10/2022
9/11/2022
9/12/2022
9/13/2022
9/14/2022
9/15/2022
9/16/2022
9/17/2022
9/18/2022
9/19/2022
9/20/2022
9/21/2022
9/22/2022
9/23/2022
9/24/2022
9/25/2022
9/26/2022
9/27/2022
9/28/2022
9/29/2022
9/30/2022
10/1/2022
10/2/2022
10/3/2022
10/4/2022
10/5/2022
10/6/2022
10/7/2022
10/8/2022
10/9/2022
10/10/2022
10/11/2022
10/12/2022
10/13/2022
10/14/2022
10/15/2022
10/16/2022
10/17/2022
10/18/2022
10/19/2022
10/20/2022
10/21/2022
10/22/2022
10/23/2022
10/24/2022
10/25/2022
10/26/2022
10/27/2022
10/28/2022
10/29/2022
10/30/2022
10/31/2022
11/1/2022
11/2/2022
11/3/2022
11/4/2022
11/5/2022
11/6/2022
11/7/2022
11/8/2022
11/9/2022
11/10/2022
11/11/2022
11/12/2022
11/13/2022
11/14/2022
11/15/2022
11/16/2022
11/17/2022
11/18/2022
11/19/2022
11/20/2022
11/21/2022
11/22/2022
11/23/2022
11/24/2022
11/25/2022
11/26/2022
11/27/2022
11/28/2022
11/29/2022
11/30/2022
12/1/2022
12/2/2022
12/3/2022
12/4/2022
12/5/2022
12/6/2022
12/7/2022
12/8/2022
12/9/2022
12/10/2022
12/11/2022
12/12/2022
12/13/2022
12/14/2022
12/15/2022
12/16/2022
12/17/2022
12/18/2022
12/19/2022
12/20/2022
12/21/2022
12/22/2022
12/23/2022
12/24/2022
12/25/2022
12/26/2022
12/27/2022
12/28/2022
12/29/2022
12/30/2022
12/31/2022
1/1/2023
1/2/2023
1/3/2023
1/4/2023
1/5/2023
1/6/2023
1/7/2023
1/8/2023
1/9/2023
1/10/2023
1/11/2023
1/12/2023
1/13/2023
1/14/2023
1/15/2023
1/16/2023
1/17/2023
1/18/2023
1/19/2023
1/20/2023
1/21/2023
1/22/2023
1/23/2023
1/24/2023
1/25/2023
1/26/2023
1/27/2023
1/28/2023
1/29/2023
1/30/2023
1/31/2023
2/1/2023
2/2/2023
2/3/2023
2/4/2023
2/5/2023
2/6/2023
2/7/2023
2/8/2023
2/9/2023
2/10/2023
2/11/2023
2/12/2023
2/13/2023
2/14/2023
2/15/2023
2/16/2023
2/17/2023
2/18/2023
2/19/2023
2/20/2023
2/21/2023
2/22/2023
2/23/2023
2/24/2023
2/25/2023
2/26/2023
2/27/2023
2/28/2023
3/1/2023
3/2/2023
3/3/2023
3/4/2023
3/5/2023
3/6/2023
3/7/2023
3/8/2023
3/9/2023
3/10/2023
3/11/2023
3/12/2023
3/13/2023
3/14/2023
3/15/2023
3/16/2023
3/17/2023
3/18/2023
3/19/2023
3/20/2023
3/21/2023
3/22/2023
3/23/2023
3/24/2023
3/25/2023
3/26/2023
3/27/2023
3/28/2023
3/29/2023
3/30/2023
3/31/2023
4/1/2023
4/2/2023
4/3/2023
4/4/2023
4/5/2023
4/6/2023
4/7/2023
4/8/2023
4/9/2023
4/10/2023
4/11/2023
4/12/2023
4/13/2023
4/14/2023
4/15/2023
4/16/2023
4/17/2023
4/18/2023
4/19/2023
4/20/2023
4/21/2023
4/22/2023
4/23/2023
4/24/2023
4/25/2023
4/26/2023
4/27/2023
4/28/2023
4/29/2023
4/30/2023
5/1/2023
5/2/2023
5/3/2023
5/4/2023
5/5/2023
5/6/2023
5/7/2023
5/8/2023
5/9/2023
5/10/2023
5/11/2023
5/12/2023
5/13/2023
5/14/2023
5/15/2023
5/16/2023
5/17/2023
5/18/2023
5/19/2023
5/20/2023
5/21/2023
5/22/2023
5/23/2023
5/24/2023
5/25/2023
5/26/2023
5/27/2023
5/28/2023
5/29/2023
5/30/2023
5/31/2023
6/1/2023
6/2/2023
6/3/2023
6/4/2023
6/5/2023
6/6/2023
6/7/2023
6/8/2023
6/9/2023
6/10/2023
6/11/2023
6/12/2023
6/13/2023
6/14/2023
6/15/2023
6/16/2023
6/17/2023
6/18/2023
6/19/2023
6/20/2023
6/21/2023
6/22/2023
6/23/2023
6/24/2023
6/25/2023
6/26/2023
6/27/2023
6/28/2023
6/29/2023
6/30/2023
7/1/2023
7/2/2023
7/3/2023
7/4/2023
7/5/2023
7/6/2023
7/7/2023
7/8/2023
7/9/2023
7/10/2023
7/11/2023
7/12/2023
7/13/2023
7/14/2023
7/15/2023
7/16/2023
7/17/2023
7/18/2023
7/19/2023
7/20/2023
7/21/2023
7/22/2023
7/23/2023
7/24/2023
7/25/2023
7/26/2023
7/27/2023
7/28/2023
7/29/2023
7/30/2023
7/31/2023
8/1/2023
8/2/2023
8/3/2023
8/4/2023
8/5/2023
8/6/2023
8/7/2023
8/8/2023
8/9/2023
8/10/2023
8/11/2023
8/12/2023
8/13/2023
8/14/2023
8/15/2023
8/16/2023
8/17/2023
8/18/2023
8/19/2023
8/20/2023
8/21/2023
8/22/2023
8/23/2023
8/24/2023
8/25/2023
8/26/2023
8/27/2023
8/28/2023
8/29/2023
8/30/2023
8/31/2023
9/1/2023
9/2/2023
9/3/2023
9/4/2023
9/5/2023
9/6/2023
9/7/2023
9/8/2023
9/9/2023
9/10/2023
9/11/2023
9/12/2023
9/13/2023
9/14/2023
9/15/2023
9/16/2023
9/17/2023
9/18/2023
9/19/2023
9/20/2023
9/21/2023
9/22/2023
9/23/2023
9/24/2023
9/25/2023
9/26/2023
9/27/2023
9/28/2023
9/29/2023
9/30/2023
1 Date
2 1/1/2022
3 1/2/2022
4 1/3/2022
5 1/4/2022
6 1/5/2022
7 1/6/2022
8 1/7/2022
9 1/8/2022
10 1/9/2022
11 1/10/2022
12 1/11/2022
13 1/12/2022
14 1/13/2022
15 1/14/2022
16 1/15/2022
17 1/16/2022
18 1/17/2022
19 1/18/2022
20 1/19/2022
21 1/20/2022
22 1/21/2022
23 1/22/2022
24 1/23/2022
25 1/24/2022
26 1/25/2022
27 1/26/2022
28 1/27/2022
29 1/28/2022
30 1/29/2022
31 1/30/2022
32 1/31/2022
33 2/1/2022
34 2/2/2022
35 2/3/2022
36 2/4/2022
37 2/5/2022
38 2/6/2022
39 2/7/2022
40 2/8/2022
41 2/9/2022
42 2/10/2022
43 2/11/2022
44 2/12/2022
45 2/13/2022
46 2/14/2022
47 2/15/2022
48 2/16/2022
49 2/17/2022
50 2/18/2022
51 2/19/2022
52 2/20/2022
53 2/21/2022
54 2/22/2022
55 2/23/2022
56 2/24/2022
57 2/25/2022
58 2/26/2022
59 2/27/2022
60 2/28/2022
61 3/1/2022
62 3/2/2022
63 3/3/2022
64 3/4/2022
65 3/5/2022
66 3/6/2022
67 3/7/2022
68 3/8/2022
69 3/9/2022
70 3/10/2022
71 3/11/2022
72 3/12/2022
73 3/13/2022
74 3/14/2022
75 3/15/2022
76 3/16/2022
77 3/17/2022
78 3/18/2022
79 3/19/2022
80 3/20/2022
81 3/21/2022
82 3/22/2022
83 3/23/2022
84 3/24/2022
85 3/25/2022
86 3/26/2022
87 3/27/2022
88 3/28/2022
89 3/29/2022
90 3/30/2022
91 3/31/2022
92 4/1/2022
93 4/2/2022
94 4/3/2022
95 4/4/2022
96 4/5/2022
97 4/6/2022
98 4/7/2022
99 4/8/2022
100 4/9/2022
101 4/10/2022
102 4/11/2022
103 4/12/2022
104 4/13/2022
105 4/14/2022
106 4/15/2022
107 4/16/2022
108 4/17/2022
109 4/18/2022
110 4/19/2022
111 4/20/2022
112 4/21/2022
113 4/22/2022
114 4/23/2022
115 4/24/2022
116 4/25/2022
117 4/26/2022
118 4/27/2022
119 4/28/2022
120 4/29/2022
121 4/30/2022
122 5/1/2022
123 5/2/2022
124 5/3/2022
125 5/4/2022
126 5/5/2022
127 5/6/2022
128 5/7/2022
129 5/8/2022
130 5/9/2022
131 5/10/2022
132 5/11/2022
133 5/12/2022
134 5/13/2022
135 5/14/2022
136 5/15/2022
137 5/16/2022
138 5/17/2022
139 5/18/2022
140 5/19/2022
141 5/20/2022
142 5/21/2022
143 5/22/2022
144 5/23/2022
145 5/24/2022
146 5/25/2022
147 5/26/2022
148 5/27/2022
149 5/28/2022
150 5/29/2022
151 5/30/2022
152 5/31/2022
153 6/1/2022
154 6/2/2022
155 6/3/2022
156 6/4/2022
157 6/5/2022
158 6/6/2022
159 6/7/2022
160 6/8/2022
161 6/9/2022
162 6/10/2022
163 6/11/2022
164 6/12/2022
165 6/13/2022
166 6/14/2022
167 6/15/2022
168 6/16/2022
169 6/17/2022
170 6/18/2022
171 6/19/2022
172 6/20/2022
173 6/21/2022
174 6/22/2022
175 6/23/2022
176 6/24/2022
177 6/25/2022
178 6/26/2022
179 6/27/2022
180 6/28/2022
181 6/29/2022
182 6/30/2022
183 7/1/2022
184 7/2/2022
185 7/3/2022
186 7/4/2022
187 7/5/2022
188 7/6/2022
189 7/7/2022
190 7/8/2022
191 7/9/2022
192 7/10/2022
193 7/11/2022
194 7/12/2022
195 7/13/2022
196 7/14/2022
197 7/15/2022
198 7/16/2022
199 7/17/2022
200 7/18/2022
201 7/19/2022
202 7/20/2022
203 7/21/2022
204 7/22/2022
205 7/23/2022
206 7/24/2022
207 7/25/2022
208 7/26/2022
209 7/27/2022
210 7/28/2022
211 7/29/2022
212 7/30/2022
213 7/31/2022
214 8/1/2022
215 8/2/2022
216 8/3/2022
217 8/4/2022
218 8/5/2022
219 8/6/2022
220 8/7/2022
221 8/8/2022
222 8/9/2022
223 8/10/2022
224 8/11/2022
225 8/12/2022
226 8/13/2022
227 8/14/2022
228 8/15/2022
229 8/16/2022
230 8/17/2022
231 8/18/2022
232 8/19/2022
233 8/20/2022
234 8/21/2022
235 8/22/2022
236 8/23/2022
237 8/24/2022
238 8/25/2022
239 8/26/2022
240 8/27/2022
241 8/28/2022
242 8/29/2022
243 8/30/2022
244 8/31/2022
245 9/1/2022
246 9/2/2022
247 9/3/2022
248 9/4/2022
249 9/5/2022
250 9/6/2022
251 9/7/2022
252 9/8/2022
253 9/9/2022
254 9/10/2022
255 9/11/2022
256 9/12/2022
257 9/13/2022
258 9/14/2022
259 9/15/2022
260 9/16/2022
261 9/17/2022
262 9/18/2022
263 9/19/2022
264 9/20/2022
265 9/21/2022
266 9/22/2022
267 9/23/2022
268 9/24/2022
269 9/25/2022
270 9/26/2022
271 9/27/2022
272 9/28/2022
273 9/29/2022
274 9/30/2022
275 10/1/2022
276 10/2/2022
277 10/3/2022
278 10/4/2022
279 10/5/2022
280 10/6/2022
281 10/7/2022
282 10/8/2022
283 10/9/2022
284 10/10/2022
285 10/11/2022
286 10/12/2022
287 10/13/2022
288 10/14/2022
289 10/15/2022
290 10/16/2022
291 10/17/2022
292 10/18/2022
293 10/19/2022
294 10/20/2022
295 10/21/2022
296 10/22/2022
297 10/23/2022
298 10/24/2022
299 10/25/2022
300 10/26/2022
301 10/27/2022
302 10/28/2022
303 10/29/2022
304 10/30/2022
305 10/31/2022
306 11/1/2022
307 11/2/2022
308 11/3/2022
309 11/4/2022
310 11/5/2022
311 11/6/2022
312 11/7/2022
313 11/8/2022
314 11/9/2022
315 11/10/2022
316 11/11/2022
317 11/12/2022
318 11/13/2022
319 11/14/2022
320 11/15/2022
321 11/16/2022
322 11/17/2022
323 11/18/2022
324 11/19/2022
325 11/20/2022
326 11/21/2022
327 11/22/2022
328 11/23/2022
329 11/24/2022
330 11/25/2022
331 11/26/2022
332 11/27/2022
333 11/28/2022
334 11/29/2022
335 11/30/2022
336 12/1/2022
337 12/2/2022
338 12/3/2022
339 12/4/2022
340 12/5/2022
341 12/6/2022
342 12/7/2022
343 12/8/2022
344 12/9/2022
345 12/10/2022
346 12/11/2022
347 12/12/2022
348 12/13/2022
349 12/14/2022
350 12/15/2022
351 12/16/2022
352 12/17/2022
353 12/18/2022
354 12/19/2022
355 12/20/2022
356 12/21/2022
357 12/22/2022
358 12/23/2022
359 12/24/2022
360 12/25/2022
361 12/26/2022
362 12/27/2022
363 12/28/2022
364 12/29/2022
365 12/30/2022
366 12/31/2022
367 1/1/2023
368 1/2/2023
369 1/3/2023
370 1/4/2023
371 1/5/2023
372 1/6/2023
373 1/7/2023
374 1/8/2023
375 1/9/2023
376 1/10/2023
377 1/11/2023
378 1/12/2023
379 1/13/2023
380 1/14/2023
381 1/15/2023
382 1/16/2023
383 1/17/2023
384 1/18/2023
385 1/19/2023
386 1/20/2023
387 1/21/2023
388 1/22/2023
389 1/23/2023
390 1/24/2023
391 1/25/2023
392 1/26/2023
393 1/27/2023
394 1/28/2023
395 1/29/2023
396 1/30/2023
397 1/31/2023
398 2/1/2023
399 2/2/2023
400 2/3/2023
401 2/4/2023
402 2/5/2023
403 2/6/2023
404 2/7/2023
405 2/8/2023
406 2/9/2023
407 2/10/2023
408 2/11/2023
409 2/12/2023
410 2/13/2023
411 2/14/2023
412 2/15/2023
413 2/16/2023
414 2/17/2023
415 2/18/2023
416 2/19/2023
417 2/20/2023
418 2/21/2023
419 2/22/2023
420 2/23/2023
421 2/24/2023
422 2/25/2023
423 2/26/2023
424 2/27/2023
425 2/28/2023
426 3/1/2023
427 3/2/2023
428 3/3/2023
429 3/4/2023
430 3/5/2023
431 3/6/2023
432 3/7/2023
433 3/8/2023
434 3/9/2023
435 3/10/2023
436 3/11/2023
437 3/12/2023
438 3/13/2023
439 3/14/2023
440 3/15/2023
441 3/16/2023
442 3/17/2023
443 3/18/2023
444 3/19/2023
445 3/20/2023
446 3/21/2023
447 3/22/2023
448 3/23/2023
449 3/24/2023
450 3/25/2023
451 3/26/2023
452 3/27/2023
453 3/28/2023
454 3/29/2023
455 3/30/2023
456 3/31/2023
457 4/1/2023
458 4/2/2023
459 4/3/2023
460 4/4/2023
461 4/5/2023
462 4/6/2023
463 4/7/2023
464 4/8/2023
465 4/9/2023
466 4/10/2023
467 4/11/2023
468 4/12/2023
469 4/13/2023
470 4/14/2023
471 4/15/2023
472 4/16/2023
473 4/17/2023
474 4/18/2023
475 4/19/2023
476 4/20/2023
477 4/21/2023
478 4/22/2023
479 4/23/2023
480 4/24/2023
481 4/25/2023
482 4/26/2023
483 4/27/2023
484 4/28/2023
485 4/29/2023
486 4/30/2023
487 5/1/2023
488 5/2/2023
489 5/3/2023
490 5/4/2023
491 5/5/2023
492 5/6/2023
493 5/7/2023
494 5/8/2023
495 5/9/2023
496 5/10/2023
497 5/11/2023
498 5/12/2023
499 5/13/2023
500 5/14/2023
501 5/15/2023
502 5/16/2023
503 5/17/2023
504 5/18/2023
505 5/19/2023
506 5/20/2023
507 5/21/2023
508 5/22/2023
509 5/23/2023
510 5/24/2023
511 5/25/2023
512 5/26/2023
513 5/27/2023
514 5/28/2023
515 5/29/2023
516 5/30/2023
517 5/31/2023
518 6/1/2023
519 6/2/2023
520 6/3/2023
521 6/4/2023
522 6/5/2023
523 6/6/2023
524 6/7/2023
525 6/8/2023
526 6/9/2023
527 6/10/2023
528 6/11/2023
529 6/12/2023
530 6/13/2023
531 6/14/2023
532 6/15/2023
533 6/16/2023
534 6/17/2023
535 6/18/2023
536 6/19/2023
537 6/20/2023
538 6/21/2023
539 6/22/2023
540 6/23/2023
541 6/24/2023
542 6/25/2023
543 6/26/2023
544 6/27/2023
545 6/28/2023
546 6/29/2023
547 6/30/2023
548 7/1/2023
549 7/2/2023
550 7/3/2023
551 7/4/2023
552 7/5/2023
553 7/6/2023
554 7/7/2023
555 7/8/2023
556 7/9/2023
557 7/10/2023
558 7/11/2023
559 7/12/2023
560 7/13/2023
561 7/14/2023
562 7/15/2023
563 7/16/2023
564 7/17/2023
565 7/18/2023
566 7/19/2023
567 7/20/2023
568 7/21/2023
569 7/22/2023
570 7/23/2023
571 7/24/2023
572 7/25/2023
573 7/26/2023
574 7/27/2023
575 7/28/2023
576 7/29/2023
577 7/30/2023
578 7/31/2023
579 8/1/2023
580 8/2/2023
581 8/3/2023
582 8/4/2023
583 8/5/2023
584 8/6/2023
585 8/7/2023
586 8/8/2023
587 8/9/2023
588 8/10/2023
589 8/11/2023
590 8/12/2023
591 8/13/2023
592 8/14/2023
593 8/15/2023
594 8/16/2023
595 8/17/2023
596 8/18/2023
597 8/19/2023
598 8/20/2023
599 8/21/2023
600 8/22/2023
601 8/23/2023
602 8/24/2023
603 8/25/2023
604 8/26/2023
605 8/27/2023
606 8/28/2023
607 8/29/2023
608 8/30/2023
609 8/31/2023
610 9/1/2023
611 9/2/2023
612 9/3/2023
613 9/4/2023
614 9/5/2023
615 9/6/2023
616 9/7/2023
617 9/8/2023
618 9/9/2023
619 9/10/2023
620 9/11/2023
621 9/12/2023
622 9/13/2023
623 9/14/2023
624 9/15/2023
625 9/16/2023
626 9/17/2023
627 9/18/2023
628 9/19/2023
629 9/20/2023
630 9/21/2023
631 9/22/2023
632 9/23/2023
633 9/24/2023
634 9/25/2023
635 9/26/2023
636 9/27/2023
637 9/28/2023
638 9/29/2023
639 9/30/2023

View File

@ -0,0 +1,20 @@
Table,Field,Description
Products,Product_ID,Product ID
Products,Product_Name,Product name
Products,Product_Category,Product Category
Products,Product_Cost,Product cost ($USD)
Products,Product_Price,Product retail price ($USD)
Inventory,Store_ID,Store ID
Inventory,Product_ID,Product ID
Inventory,Stock_On_Hand,Stock quantity of the product in the store (inventory)
Stores,Store_ID,Store ID
Stores,Store_Name,Store name
Stores,Store_City,City in Mexico where the store is located
Stores,Store_Location,Location in the city where the store is located
Stores,Store_Open_Date,Date when the store was opened
Sales,Sale_ID,Sale ID
Sales,Date,Date of the transaction
Sales,Store_ID,Store ID
Sales,Product_ID,Product ID
Sales,Units,Units sold
Calendar,Date,Calendar date
1 Table Field Description
2 Products Product_ID Product ID
3 Products Product_Name Product name
4 Products Product_Category Product Category
5 Products Product_Cost Product cost ($USD)
6 Products Product_Price Product retail price ($USD)
7 Inventory Store_ID Store ID
8 Inventory Product_ID Product ID
9 Inventory Stock_On_Hand Stock quantity of the product in the store (inventory)
10 Stores Store_ID Store ID
11 Stores Store_Name Store name
12 Stores Store_City City in Mexico where the store is located
13 Stores Store_Location Location in the city where the store is located
14 Stores Store_Open_Date Date when the store was opened
15 Sales Sale_ID Sale ID
16 Sales Date Date of the transaction
17 Sales Store_ID Store ID
18 Sales Product_ID Product ID
19 Sales Units Units sold
20 Calendar Date Calendar date

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,36 @@
Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
1,Action Figure,Toys,$9.99 ,$15.99
2,Animal Figures,Toys,$9.99 ,$12.99
3,Barrel O' Slime,Art & Crafts,$1.99 ,$3.99
4,Chutes & Ladders,Games,$9.99 ,$12.99
5,Classic Dominoes,Games,$7.99 ,$9.99
6,Colorbuds,Electronics,$6.99 ,$14.99
7,Dart Gun,Sports & Outdoors,$11.99 ,$15.99
8,Deck Of Cards,Games,$3.99 ,$6.99
9,Dino Egg,Toys,$9.99 ,$10.99
10,Dinosaur Figures,Toys,$10.99 ,$14.99
11,Etch A Sketch,Art & Crafts,$10.99 ,$20.99
12,Foam Disk Launcher,Sports & Outdoors,$8.99 ,$11.99
13,Gamer Headphones,Electronics,$14.99 ,$20.99
14,Glass Marbles,Games,$5.99 ,$10.99
15,Hot Wheels 5-Pack,Toys,$3.99 ,$5.99
16,Jenga,Games,$2.99 ,$9.99
17,Kids Makeup Kit,Art & Crafts,$13.99 ,$19.99
18,Lego Bricks,Toys,$34.99 ,$39.99
19,Magic Sand,Art & Crafts,$13.99 ,$15.99
20,Mini Basketball Hoop,Sports & Outdoors,$8.99 ,$24.99
21,Mini Ping Pong Set,Sports & Outdoors,$6.99 ,$9.99
22,Monopoly,Games,$13.99 ,$19.99
23,Mr. Potatohead,Toys,$4.99 ,$9.99
24,Nerf Gun,Sports & Outdoors,$14.99 ,$19.99
25,PlayDoh Can,Art & Crafts,$1.99 ,$2.99
26,PlayDoh Playset,Art & Crafts,$20.99 ,$24.99
27,PlayDoh Toolkit,Art & Crafts,$3.99 ,$4.99
28,Playfoam,Art & Crafts,$3.99 ,$10.99
29,Plush Pony,Toys,$8.99 ,$19.99
30,Rubik's Cube,Games,$17.99 ,$19.99
31,Splash Balls,Sports & Outdoors,$7.99 ,$8.99
32,Supersoaker Water Gun,Sports & Outdoors,$11.99 ,$14.99
33,Teddy Bear,Toys,$10.99 ,$12.99
34,Toy Robot,Electronics,$20.99 ,$25.99
35,Uno Card Game,Games,$3.99 ,$7.99
1 Product_ID Product_Name Product_Category Product_Cost Product_Price
2 1 Action Figure Toys $9.99 $15.99
3 2 Animal Figures Toys $9.99 $12.99
4 3 Barrel O' Slime Art & Crafts $1.99 $3.99
5 4 Chutes & Ladders Games $9.99 $12.99
6 5 Classic Dominoes Games $7.99 $9.99
7 6 Colorbuds Electronics $6.99 $14.99
8 7 Dart Gun Sports & Outdoors $11.99 $15.99
9 8 Deck Of Cards Games $3.99 $6.99
10 9 Dino Egg Toys $9.99 $10.99
11 10 Dinosaur Figures Toys $10.99 $14.99
12 11 Etch A Sketch Art & Crafts $10.99 $20.99
13 12 Foam Disk Launcher Sports & Outdoors $8.99 $11.99
14 13 Gamer Headphones Electronics $14.99 $20.99
15 14 Glass Marbles Games $5.99 $10.99
16 15 Hot Wheels 5-Pack Toys $3.99 $5.99
17 16 Jenga Games $2.99 $9.99
18 17 Kids Makeup Kit Art & Crafts $13.99 $19.99
19 18 Lego Bricks Toys $34.99 $39.99
20 19 Magic Sand Art & Crafts $13.99 $15.99
21 20 Mini Basketball Hoop Sports & Outdoors $8.99 $24.99
22 21 Mini Ping Pong Set Sports & Outdoors $6.99 $9.99
23 22 Monopoly Games $13.99 $19.99
24 23 Mr. Potatohead Toys $4.99 $9.99
25 24 Nerf Gun Sports & Outdoors $14.99 $19.99
26 25 PlayDoh Can Art & Crafts $1.99 $2.99
27 26 PlayDoh Playset Art & Crafts $20.99 $24.99
28 27 PlayDoh Toolkit Art & Crafts $3.99 $4.99
29 28 Playfoam Art & Crafts $3.99 $10.99
30 29 Plush Pony Toys $8.99 $19.99
31 30 Rubik's Cube Games $17.99 $19.99
32 31 Splash Balls Sports & Outdoors $7.99 $8.99
33 32 Supersoaker Water Gun Sports & Outdoors $11.99 $14.99
34 33 Teddy Bear Toys $10.99 $12.99
35 34 Toy Robot Electronics $20.99 $25.99
36 35 Uno Card Game Games $3.99 $7.99

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,51 @@
Store_ID,Store_Name,Store_City,Store_Location,Store_Open_Date
1,Maven Toys Guadalajara 1,Guadalajara,Residential,1992-09-18
2,Maven Toys Monterrey 1,Monterrey,Residential,1995-04-27
3,Maven Toys Guadalajara 2,Guadalajara,Commercial,1999-12-27
4,Maven Toys Saltillo 1,Saltillo,Downtown,2000-01-01
5,Maven Toys La Paz 1,La Paz,Downtown,2001-05-31
6,Maven Toys Mexicali 1,Mexicali,Commercial,2003-12-13
7,Maven Toys Monterrey 2,Monterrey,Downtown,2003-12-25
8,Maven Toys Pachuca 1,Pachuca,Downtown,2004-10-14
9,Maven Toys Ciudad de Mexico 1,Cuidad de Mexico,Downtown,2004-10-15
10,Maven Toys Campeche 1,Campeche,Downtown,2005-01-14
11,Maven Toys Cuernavaca 1,Cuernavaca,Downtown,2005-04-19
12,Maven Toys Chetumal 1,Chetumal,Downtown,2006-05-05
13,Maven Toys Mexicali 2,Mexicali,Downtown,2006-08-30
14,Maven Toys Guanajuato 1,Guanajuato,Downtown,2007-01-31
15,Maven Toys Tuxtla Gutierrez 1,Tuxtla Gutierrez,Downtown,2007-03-05
16,Maven Toys San Luis Potosi 1,San Luis Potosi,Downtown,2007-05-19
17,Maven Toys Toluca 1,Toluca,Downtown,2007-12-09
18,Maven Toys Merida 1,Merida,Downtown,2008-08-22
19,Maven Toys Puebla 1,Puebla,Commercial,2008-12-16
20,Maven Toys Zacatecas 1,Zacatecas,Downtown,2009-05-29
21,Maven Toys Santiago 1,Santiago,Downtown,2009-11-23
22,Maven Toys Guanajuato 2,Guanajuato,Commercial,2010-03-29
23,Maven Toys Chihuahua 1,Chihuahua,Commercial,2010-06-12
24,Maven Toys Aguascalientes 1,Aguascalientes,Downtown,2010-07-31
25,Maven Toys Ciudad Victoria 1,Ciudad Victoria,Downtown,2010-09-08
26,Maven Toys Campeche 2,Campeche,Commercial,2010-09-15
27,Maven Toys Oaxaca 1,Oaxaca,Downtown,2010-10-02
28,Maven Toys Puebla 2,Puebla,Downtown,2011-04-01
29,Maven Toys Xalapa 1,Xalapa,Commercial,2011-06-21
30,Maven Toys Guadalajara 3,Guadalajara,Airport,2011-10-20
31,Maven Toys Ciudad de Mexico 2,Cuidad de Mexico,Airport,2012-05-04
32,Maven Toys Hermosillo 1,Hermosillo,Residential,2012-08-31
33,Maven Toys Monterrey 3,Monterrey,Airport,2013-03-17
34,Maven Toys Villahermosa 1,Villahermosa,Downtown,2013-06-07
35,Maven Toys Chilpancingo 1,Chilpancingo,Downtown,2013-06-11
36,Maven Toys Morelia 1,Morelia,Downtown,2013-07-01
37,Maven Toys Ciudad de Mexico 3,Cuidad de Mexico,Residential,2013-11-28
38,Maven Toys Chihuahua 2,Chihuahua,Downtown,2014-03-18
39,Maven Toys Xalapa 2,Xalapa,Downtown,2014-04-21
40,Maven Toys Toluca 2,Toluca,Commercial,2014-05-27
41,Maven Toys Hermosillo 2,Hermosillo,Downtown,2014-06-01
42,Maven Toys Hermosillo 3,Hermosillo,Commercial,2014-06-27
43,Maven Toys Durango 1,Durango,Downtown,2014-06-30
44,Maven Toys Puebla 3,Puebla,Residential,2014-12-27
45,Maven Toys Ciudad de Mexico 4,Cuidad de Mexico,Commercial,2015-06-21
46,Maven Toys Guadalajara 4,Guadalajara,Downtown,2015-10-31
47,Maven Toys Monterrey 4,Monterrey,Commercial,2015-11-21
48,Maven Toys Saltillo 2,Saltillo,Commercial,2016-03-23
49,Maven Toys Culiacan 1,Culiacan,Downtown,2016-05-10
50,Maven Toys Guanajuato 3,Guanajuato,Residential,2016-05-18
1 Store_ID Store_Name Store_City Store_Location Store_Open_Date
2 1 Maven Toys Guadalajara 1 Guadalajara Residential 1992-09-18
3 2 Maven Toys Monterrey 1 Monterrey Residential 1995-04-27
4 3 Maven Toys Guadalajara 2 Guadalajara Commercial 1999-12-27
5 4 Maven Toys Saltillo 1 Saltillo Downtown 2000-01-01
6 5 Maven Toys La Paz 1 La Paz Downtown 2001-05-31
7 6 Maven Toys Mexicali 1 Mexicali Commercial 2003-12-13
8 7 Maven Toys Monterrey 2 Monterrey Downtown 2003-12-25
9 8 Maven Toys Pachuca 1 Pachuca Downtown 2004-10-14
10 9 Maven Toys Ciudad de Mexico 1 Cuidad de Mexico Downtown 2004-10-15
11 10 Maven Toys Campeche 1 Campeche Downtown 2005-01-14
12 11 Maven Toys Cuernavaca 1 Cuernavaca Downtown 2005-04-19
13 12 Maven Toys Chetumal 1 Chetumal Downtown 2006-05-05
14 13 Maven Toys Mexicali 2 Mexicali Downtown 2006-08-30
15 14 Maven Toys Guanajuato 1 Guanajuato Downtown 2007-01-31
16 15 Maven Toys Tuxtla Gutierrez 1 Tuxtla Gutierrez Downtown 2007-03-05
17 16 Maven Toys San Luis Potosi 1 San Luis Potosi Downtown 2007-05-19
18 17 Maven Toys Toluca 1 Toluca Downtown 2007-12-09
19 18 Maven Toys Merida 1 Merida Downtown 2008-08-22
20 19 Maven Toys Puebla 1 Puebla Commercial 2008-12-16
21 20 Maven Toys Zacatecas 1 Zacatecas Downtown 2009-05-29
22 21 Maven Toys Santiago 1 Santiago Downtown 2009-11-23
23 22 Maven Toys Guanajuato 2 Guanajuato Commercial 2010-03-29
24 23 Maven Toys Chihuahua 1 Chihuahua Commercial 2010-06-12
25 24 Maven Toys Aguascalientes 1 Aguascalientes Downtown 2010-07-31
26 25 Maven Toys Ciudad Victoria 1 Ciudad Victoria Downtown 2010-09-08
27 26 Maven Toys Campeche 2 Campeche Commercial 2010-09-15
28 27 Maven Toys Oaxaca 1 Oaxaca Downtown 2010-10-02
29 28 Maven Toys Puebla 2 Puebla Downtown 2011-04-01
30 29 Maven Toys Xalapa 1 Xalapa Commercial 2011-06-21
31 30 Maven Toys Guadalajara 3 Guadalajara Airport 2011-10-20
32 31 Maven Toys Ciudad de Mexico 2 Cuidad de Mexico Airport 2012-05-04
33 32 Maven Toys Hermosillo 1 Hermosillo Residential 2012-08-31
34 33 Maven Toys Monterrey 3 Monterrey Airport 2013-03-17
35 34 Maven Toys Villahermosa 1 Villahermosa Downtown 2013-06-07
36 35 Maven Toys Chilpancingo 1 Chilpancingo Downtown 2013-06-11
37 36 Maven Toys Morelia 1 Morelia Downtown 2013-07-01
38 37 Maven Toys Ciudad de Mexico 3 Cuidad de Mexico Residential 2013-11-28
39 38 Maven Toys Chihuahua 2 Chihuahua Downtown 2014-03-18
40 39 Maven Toys Xalapa 2 Xalapa Downtown 2014-04-21
41 40 Maven Toys Toluca 2 Toluca Commercial 2014-05-27
42 41 Maven Toys Hermosillo 2 Hermosillo Downtown 2014-06-01
43 42 Maven Toys Hermosillo 3 Hermosillo Commercial 2014-06-27
44 43 Maven Toys Durango 1 Durango Downtown 2014-06-30
45 44 Maven Toys Puebla 3 Puebla Residential 2014-12-27
46 45 Maven Toys Ciudad de Mexico 4 Cuidad de Mexico Commercial 2015-06-21
47 46 Maven Toys Guadalajara 4 Guadalajara Downtown 2015-10-31
48 47 Maven Toys Monterrey 4 Monterrey Commercial 2015-11-21
49 48 Maven Toys Saltillo 2 Saltillo Commercial 2016-03-23
50 49 Maven Toys Culiacan 1 Culiacan Downtown 2016-05-10
51 50 Maven Toys Guanajuato 3 Guanajuato Residential 2016-05-18

BIN
alteryx_runner/.DS_Store vendored Normal file

Binary file not shown.

View File

@ -0,0 +1,2 @@
"""Alteryx workflow runner — Python-native .yxmd executor."""
__version__ = "0.1.0"

View File

@ -0,0 +1,13 @@
import sys
from pathlib import Path
# Add the package root to sys.path so imports resolve correctly when run with
# `python -m alteryx_runner` from the project root.
_pkg_dir = Path(__file__).parent # alteryx_runner/
if str(_pkg_dir) not in sys.path:
sys.path.insert(0, str(_pkg_dir))
from cli import main
if __name__ == "__main__":
main()

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

Binary file not shown.

94
alteryx_runner/cli.py Normal file
View File

@ -0,0 +1,94 @@
"""CLI entry point: python -m alteryx_runner run workflow.yxmd [options]"""
from __future__ import annotations
import sys
from pathlib import Path
import click
import polars as pl
@click.group()
def main():
"""Alteryx workflow runner — execute .yxmd files without Alteryx."""
@main.command()
@click.argument("workflow", type=click.Path(exists=True, path_type=Path))
@click.option("--output-dir", default=None, type=click.Path(path_type=Path),
help="Write output files to this directory.")
@click.option("--param", multiple=True, metavar="KEY=VALUE",
help="Set workflow constant (repeatable).")
@click.option("--verbose", is_flag=True, default=False,
help="Print Browse results and execution log.")
@click.option("--dry-run", is_flag=True, default=False,
help="Parse and validate only; do not execute.")
@click.option("--format", "fmt",
type=click.Choice(["json", "csv", "parquet"]), default="csv",
help="Default output format for Browse nodes.")
def run(
workflow: Path,
output_dir: Path | None,
param: tuple[str, ...],
verbose: bool,
dry_run: bool,
fmt: str,
) -> None:
"""Execute WORKFLOW (.yxmd file)."""
# Import here so CLI loads fast even if deps are missing
from engine.parser import parse_workflow
from engine.executor import execute
from engine.context import RunContext
params: dict[str, str] = {}
for p in param:
if "=" in p:
k, v = p.split("=", 1)
params[k.strip()] = v.strip()
else:
click.echo(f"Warning: --param {p!r} ignored (no '=' found)", err=True)
click.echo(f"Parsing {workflow}")
try:
graph = parse_workflow(str(workflow))
except Exception as e:
click.echo(f"Parse error: {e}", err=True)
sys.exit(1)
click.echo(
f" {len(graph.nodes)} nodes, {len(graph.connections)} connections"
)
if dry_run:
click.echo("Dry run complete — no execution.")
return
ctx = RunContext(
workflow_dir=str(workflow.parent),
verbose=verbose,
output_dir=str(output_dir) if output_dir else None,
params=params,
)
click.echo("Executing …")
try:
outputs = execute(graph, ctx)
except Exception as e:
click.echo(f"Execution error: {e}", err=True)
if verbose:
import traceback
traceback.print_exc()
sys.exit(1)
n_frames = sum(1 for df in outputs.values() if isinstance(df, pl.DataFrame) and len(df) > 0)
click.echo(f"Done. {n_frames} non-empty output frames produced.")
@main.command("list-tools")
def list_tools() -> None:
"""List all registered tool Plugin strings."""
from tools import _REGISTRY
for plugin, cls in sorted(_REGISTRY.items()):
click.echo(f" {plugin:<70}{cls.__name__}")
if __name__ == "__main__":
main()

View File

@ -0,0 +1,10 @@
from .parser import parse_workflow
from .executor import execute
from .context import RunContext
from .graph import WorkflowGraph, NodeDef, ConnectionDef, FieldDef
from .type_mapper import TypeMapper
__all__ = [
"parse_workflow", "execute", "RunContext",
"WorkflowGraph", "NodeDef", "ConnectionDef", "FieldDef", "TypeMapper",
]

View File

@ -0,0 +1,52 @@
from __future__ import annotations
import tempfile
from pathlib import Path
import duckdb
from .type_mapper import TypeMapper
from expression.transpiler import ExpressionTranspiler
class RunContext:
def __init__(
self,
workflow_dir: str,
verbose: bool = False,
output_dir: str | None = None,
params: dict | None = None,
):
self.workflow_dir = Path(workflow_dir)
self.verbose = verbose
self.output_dir: Path | None = Path(output_dir) if output_dir else None
self.duckdb_con = duckdb.connect(":memory:")
self.temp_dir = Path(tempfile.mkdtemp(prefix="alteryx_runner_"))
self.type_mapper = TypeMapper()
self.transpiler = ExpressionTranspiler(self.duckdb_con)
self.constants: dict = params or {}
def resolve_path(self, path: str) -> Path:
# Normalise Windows backslashes so relative segments like .. work on
# POSIX platforms (workflow XMLs are authored on Windows).
path = path.replace("\\", "/")
path = path.replace("%temp%", str(self.temp_dir) + "/")
path = path.replace("%Desktop%", str(Path.home() / "Desktop") + "/")
# Substitute workflow constants
for k, v in self.constants.items():
path = path.replace(f"%{k}%", v)
p = Path(path)
if not p.is_absolute():
p = self.workflow_dir / p
# If output_dir override active, remap file-write destinations
return p
def resolve_output_path(self, path: str) -> Path:
p = self.resolve_path(path)
if self.output_dir is not None:
return self.output_dir / p.name
return p
def __del__(self):
try:
self.duckdb_con.close()
except Exception:
pass

View File

@ -0,0 +1,95 @@
from __future__ import annotations
from collections import defaultdict, deque
import polars as pl
from .graph import WorkflowGraph, ConnectionDef
from .context import RunContext
from tools import get_tool_class
def execute(graph: WorkflowGraph, ctx: RunContext) -> dict[tuple, pl.DataFrame]:
"""Execute a WorkflowGraph in topological (BFS) order."""
in_degree: dict[int, int] = defaultdict(int)
successors: dict[int, list[ConnectionDef]] = defaultdict(list)
predecessors: dict[int, list[ConnectionDef]] = defaultdict(list)
for c in graph.connections:
in_degree[c.dest_id] += 1
successors[c.origin_id].append(c)
predecessors[c.dest_id].append(c)
for tid in graph.nodes:
if tid not in in_degree:
in_degree[tid] = 0
# (tool_id, anchor) → DataFrame
outputs: dict[tuple[int, str], pl.DataFrame] = {}
queue: deque[int] = deque(
tid for tid, deg in in_degree.items() if deg == 0
)
while queue:
tid = queue.popleft()
node = graph.nodes[tid]
tool_cls = get_tool_class(node.plugin)
if tool_cls is None:
if ctx.verbose:
print(f"[SKIP] ToolID={tid} plugin={node.plugin!r} (unsupported)")
_passthrough(tid, predecessors, outputs, successors, in_degree, queue)
continue
tool = tool_cls(node, ctx)
inputs: dict[str, pl.DataFrame] = {}
# Track duplicate dest_anchors to handle multi-input tools like Union
anchor_counts: dict[str, int] = defaultdict(int)
for c in predecessors[tid]:
anchor_counts[c.dest_anchor] += 1
for c in predecessors[tid]:
df = outputs.get((c.origin_id, c.origin_anchor))
if df is not None:
key = c.dest_anchor
# If multiple connections share the same dest_anchor,
# use the connection name (e.g., '#1', '#2') as the key
if anchor_counts[c.dest_anchor] > 1 and c.name:
key = c.name
inputs[key] = df
if ctx.verbose:
print(f"[RUN ] ToolID={tid} plugin={node.plugin!r}")
result = tool.execute(inputs)
for anchor, df in result.items():
outputs[(tid, anchor)] = df
for c in successors[tid]:
in_degree[c.dest_id] -= 1
if in_degree[c.dest_id] == 0:
queue.append(c.dest_id)
return outputs
def _passthrough(
tid: int,
predecessors: dict[int, list[ConnectionDef]],
outputs: dict[tuple[int, str], pl.DataFrame],
successors: dict[int, list[ConnectionDef]],
in_degree: dict[int, int],
queue: deque[int],
) -> None:
"""Propagate a single upstream output through a no-op node."""
preds = predecessors.get(tid, [])
df = pl.DataFrame()
if preds:
first = preds[0]
df = outputs.get((first.origin_id, first.origin_anchor), pl.DataFrame())
outputs[(tid, "Output")] = df
for c in successors.get(tid, []):
in_degree[c.dest_id] -= 1
if in_degree[c.dest_id] == 0:
queue.append(c.dest_id)

View File

@ -0,0 +1,38 @@
from __future__ import annotations
from dataclasses import dataclass, field
from typing import Dict, List, Optional
import xml.etree.ElementTree as ET
@dataclass
class FieldDef:
name: str
type: str
size: Optional[int] = None
source: Optional[str] = None
@dataclass
class NodeDef:
tool_id: int
plugin: str
config: Optional[ET.Element]
output_schema: List[FieldDef] = field(default_factory=list)
position: tuple = (0, 0)
@dataclass
class ConnectionDef:
origin_id: int
origin_anchor: str
dest_id: int
dest_anchor: str
name: Optional[str] = None
wireless: bool = False
@dataclass
class WorkflowGraph:
nodes: Dict[int, NodeDef]
connections: List[ConnectionDef]
properties: Optional[ET.Element]

View File

@ -0,0 +1,82 @@
from __future__ import annotations
import xml.etree.ElementTree as ET
from typing import Dict, List, Optional
from .graph import FieldDef, NodeDef, ConnectionDef, WorkflowGraph
def parse_workflow(path: str) -> WorkflowGraph:
"""Parse a .yxmd XML file into a WorkflowGraph."""
tree = ET.parse(path)
root = tree.getroot()
nodes: Dict[int, NodeDef] = {}
_collect_nodes(root, nodes)
connections: List[ConnectionDef] = []
for conn_el in root.findall("Connections/Connection"):
orig = conn_el.find("Origin")
dest = conn_el.find("Destination")
if orig is None or dest is None:
continue
connections.append(ConnectionDef(
origin_id=int(orig.attrib["ToolID"]),
origin_anchor=orig.attrib.get("Connection", "Output"),
dest_id=int(dest.attrib["ToolID"]),
dest_anchor=dest.attrib.get("Connection", "Input"),
name=conn_el.attrib.get("name"),
wireless=conn_el.attrib.get("Wireless", "False") == "True",
))
props = root.find("Properties")
return WorkflowGraph(nodes=nodes, connections=connections, properties=props)
def _collect_nodes(parent: ET.Element, nodes: Dict[int, NodeDef]) -> None:
"""Recursively collect Node elements, flattening ChildNodes containers."""
for node_el in parent.findall("Nodes/Node"):
_parse_node(node_el, nodes)
# Recurse into ChildNodes (tool containers)
child_nodes = node_el.find("ChildNodes")
if child_nodes is not None:
for child in child_nodes.findall("Node"):
_parse_node(child, nodes)
grandchildren = child.find("ChildNodes")
if grandchildren is not None:
_collect_nodes_flat(grandchildren, nodes)
def _collect_nodes_flat(parent: ET.Element, nodes: Dict[int, NodeDef]) -> None:
for node_el in parent.findall("Node"):
_parse_node(node_el, nodes)
child_nodes = node_el.find("ChildNodes")
if child_nodes is not None:
_collect_nodes_flat(child_nodes, nodes)
def _parse_node(node_el: ET.Element, nodes: Dict[int, NodeDef]) -> None:
tid = int(node_el.attrib["ToolID"])
gui = node_el.find("GuiSettings")
plugin = gui.attrib.get("Plugin", "") if gui is not None else ""
config = node_el.find("Properties/Configuration")
pos_el = gui.find("Position") if gui is not None else None
pos = (
int(pos_el.attrib.get("x", 0)),
int(pos_el.attrib.get("y", 0)),
) if pos_el is not None else (0, 0)
schema = _parse_schema(node_el)
nodes[tid] = NodeDef(tool_id=tid, plugin=plugin, config=config,
output_schema=schema, position=pos)
def _parse_schema(node_el: ET.Element) -> List[FieldDef]:
fields = []
for f in node_el.findall(".//MetaInfo/RecordInfo/Field"):
size_str = f.attrib.get("size")
size = int(float(size_str)) if size_str else None
fields.append(FieldDef(
name=f.attrib["name"],
type=f.attrib.get("type", "V_String"),
size=size,
source=f.attrib.get("source"),
))
return fields

View File

@ -0,0 +1,66 @@
from __future__ import annotations
from typing import Optional
import polars as pl
class TypeMapper:
"""Maps Alteryx field types to Polars dtypes and DuckDB type strings."""
_POLARS: dict[str, pl.PolarsDataType] = {
"Bool": pl.Boolean,
"Byte": pl.UInt8,
"Int16": pl.Int16,
"Int32": pl.Int32,
"Int64": pl.Int64,
"Float": pl.Float32,
"Double": pl.Float64,
"String": pl.String,
"V_String": pl.String,
"WString": pl.String,
"V_WString": pl.String,
"Date": pl.Date,
"Time": pl.Time,
"DateTime": pl.Datetime,
"SpatialObj": pl.String,
"Blob": pl.Binary,
}
_DUCKDB: dict[str, str] = {
"Bool": "BOOLEAN",
"Byte": "UTINYINT",
"Int16": "SMALLINT",
"Int32": "INTEGER",
"Int64": "BIGINT",
"Float": "FLOAT",
"Double": "DOUBLE",
"String": "VARCHAR",
"V_String": "VARCHAR",
"WString": "VARCHAR",
"V_WString": "VARCHAR",
"Date": "DATE",
"Time": "TIME",
"DateTime": "TIMESTAMP",
"SpatialObj": "VARCHAR",
"Blob": "BLOB",
"FixedDecimal": "DECIMAL",
}
def map(self, alteryx_type: str, size: Optional[str] = None) -> pl.PolarsDataType:
if alteryx_type == "FixedDecimal":
if size:
parts = size.split(".")
precision = int(parts[0])
scale = int(parts[1]) if len(parts) > 1 else 0
return pl.Decimal(precision=precision, scale=scale)
return pl.Decimal(precision=19, scale=2)
return self._POLARS.get(alteryx_type, pl.String)
def map_duckdb(self, alteryx_type: str, size: Optional[str] = None) -> str:
if alteryx_type == "FixedDecimal":
if size:
parts = size.split(".")
precision = int(parts[0])
scale = int(parts[1]) if len(parts) > 1 else 0
return f"DECIMAL({precision},{scale})"
return "DECIMAL(19,2)"
return self._DUCKDB.get(alteryx_type, "VARCHAR")

View File

@ -0,0 +1,4 @@
from .transpiler import ExpressionTranspiler, transpile, UnsupportedExpressionError
from .functions import get_function_sql
__all__ = ["ExpressionTranspiler", "transpile", "UnsupportedExpressionError", "get_function_sql"]

View File

@ -0,0 +1,152 @@
"""Mapping of Alteryx built-in functions to DuckDB SQL equivalents."""
from __future__ import annotations
# Single-argument function translations (name → SQL template using {0}, {1}, etc.)
FUNCTION_MAP: dict[str, str] = {
# String
"Uppercase": "UPPER({0})",
"Lowercase": "LOWER({0})",
"Trim": "TRIM({0})",
"LTrim": "LTRIM({0})",
"RTrim": "RTRIM({0})",
"Length": "LENGTH({0})",
"Left": "LEFT({0}, {1})",
"Right": "RIGHT({0}, {1})",
"Substring": "SUBSTR({0}, {1}, {2})",
"FindString": "INSTR({0}, {1})",
"ReplaceChar": "REPLACE({0}, {1}, {2})",
"StringToDate": "STRPTIME({0}, {1})",
"ToString": "PRINTF('%.' || {1} || 'f', {0})",
"Contains": "CONTAINS({0}, {1})",
"StartsWith": "STARTS_WITH({0}, {1})",
"EndsWith": "ENDS_WITH({0}, {1})",
"REGEX_Match": "REGEXP_MATCHES({0}, {1})",
"REGEX_Replace": "REGEXP_REPLACE({0}, {1}, {2})",
"PadLeft": "LPAD({0}, {1}, {2})",
"PadRight": "RPAD({0}, {1}, {2})",
"GetWord": "list_extract(str_split_regex({0}, '\\\\s+'), {1} + 1)",
"CountWords": "array_length(str_split_regex(TRIM({0}), '\\\\s+'))",
"CharFromInt": "chr({0}::INTEGER)",
"IntFromChar": "ascii({0})",
"ConvertFromCodePage": "{0}",
"ReverseString": "reverse({0})",
"DecomposeUnicodeForMatch": "strip_accents(UPPER({0}))",
# Math
"ABS": "ABS({0})",
"Abs": "ABS({0})",
"CEIL": "CEIL({0})",
"Ceil": "CEIL({0})",
"FLOOR": "FLOOR({0})",
"Floor": "FLOOR({0})",
"ROUND": "ROUND({0}, {1})",
"Round": "ROUND({0}, {1})",
"SQRT": "SQRT({0})",
"Sqrt": "SQRT({0})",
"POW": "POWER({0}, {1})",
"Pow": "POWER({0}, {1})",
"LOG": "LN({0})",
"Log": "LN({0})",
"LOG10": "LOG10({0})",
"Log10": "LOG10({0})",
"MOD": "({0} % {1})",
"Mod": "({0} % {1})",
"MIN": "LEAST({0}, {1})",
"Max": "GREATEST({0}, {1})",
"MAX": "GREATEST({0}, {1})",
"Min": "LEAST({0}, {1})",
"RandInt": "FLOOR(RANDOM() * {0})::BIGINT",
"Random": "RANDOM()",
"PI": "PI()",
"SIN": "SIN({0})",
"COS": "COS({0})",
"TAN": "TAN({0})",
"ASIN": "ASIN({0})",
"ACOS": "ACOS({0})",
"ATAN": "ATAN({0})",
"ATAN2": "ATAN2({0}, {1})",
"EXP": "EXP({0})",
"Sign": "SIGN({0})",
# Null handling
"IsNull": "({0} IS NULL)",
"IsEmpty": "({0} IS NULL OR {0} = '')",
"NullConvert": "NULLIF({0}, '')",
"Null": "NULL",
# Type conversion
"ToNumber": "TRY_CAST({0} AS DOUBLE)",
"ToString_num": "CAST({0} AS VARCHAR)",
"TOBOOL": "CAST({0} AS BOOLEAN)",
# Date/Time
"DateTimeNow": "NOW()",
"DateTimeToday": "CURRENT_DATE",
"DateTimeAdd": "({0} + INTERVAL ({1}) {2})",
"DateTimeDiff": "DATEDIFF({2}, {1}, {0})",
"DateTimeFormat": "STRFTIME({0}, {1})",
"ToDate": "CAST({0} AS DATE)",
"DateTimeYear": "YEAR({0})",
"DateTimeMonth": "MONTH({0})",
"DateTimeDay": "DAY({0})",
"DateTimeHour": "HOUR({0})",
"DateTimeMinute": "MINUTE({0})",
"DateTimeSecond": "SECOND({0})",
"DateTimeFirstOfMonth": "DATE_TRUNC('month', {0})",
"DateTimeLastOfMonth": "(DATE_TRUNC('month', {0}) + INTERVAL '1 month' - INTERVAL '1 day')::DATE",
"DateTimeFirstOfYear": "DATE_TRUNC('year', {0})",
"DateTimeQuarter": "QUARTER({0})",
"DateTimeTrim": "DATE_TRUNC({1}, {0})",
# Conditional
"IIF": "(CASE WHEN {0} THEN {1} ELSE {2} END)",
"Switch": None, # handled separately
# Misc
"TOPN": None, # not a scalar function
}
def get_function_sql(name: str, args: list[str]) -> str:
"""Render a function call to DuckDB SQL given evaluated argument SQL strings."""
# Case-insensitive lookup
template = FUNCTION_MAP.get(name)
if template is None:
canon = name.lower()
for k, v in FUNCTION_MAP.items():
if k.lower() == canon:
template = v
break
if template is None:
# Unknown function — pass through as-is (may work in DuckDB natively)
args_joined = ", ".join(args)
return f"{name}({args_joined})"
if name in ("Switch", "switch"):
return _render_switch(args)
try:
result = template
for i, arg in enumerate(args):
result = result.replace(f"{{{i}}}", arg)
return result
except Exception:
args_joined = ", ".join(args)
return f"{name}({args_joined})"
def _render_switch(args: list[str]) -> str:
"""Switch(val, default, v1, r1, v2, r2, ...) → CASE val WHEN v1 THEN r1 ... ELSE default END"""
if len(args) < 2:
return "NULL"
val = args[0]
default = args[1]
pairs = args[2:]
cases = []
for i in range(0, len(pairs) - 1, 2):
cases.append(f"WHEN {pairs[i]} THEN {pairs[i+1]}")
cases_sql = " ".join(cases)
return f"CASE {val} {cases_sql} ELSE {default} END"
def titlecase_sql(col: str) -> str:
"""Approximate Titlecase via DuckDB: capitalise first letter of each word."""
return (
f"array_to_string(list_transform(str_split({col}, ' '), "
f"x -> UPPER(LEFT(x,1)) || LOWER(SUBSTR(x,2))), ' ')"
)

View File

@ -0,0 +1,630 @@
"""
Alteryx expression DuckDB SQL transpiler.
Handles:
[ColumnName] "ColumnName"
"string" 'string' (double single quotes)
IF...THEN...ENDIF CASE WHEN...END
IIF(c,t,f) CASE WHEN c THEN t ELSE f END
IsNull/IsEmpty IS NULL checks
NULL() NULL
AND/OR/NOT AND/OR/NOT
== / != = / <>
Row references not supported in SQL mode (raises)
All functions in expression/functions.py
"""
from __future__ import annotations
import re
from enum import Enum, auto
from typing import Optional
import polars as pl
import duckdb
from .functions import get_function_sql, titlecase_sql
class UnsupportedExpressionError(Exception):
pass
# ---------------------------------------------------------------------------
# Tokeniser
# ---------------------------------------------------------------------------
class TT(Enum):
LBRACKET = auto() # [
RBRACKET = auto() # ]
LPAREN = auto() # (
RPAREN = auto() # )
COMMA = auto() # ,
PLUS = auto() # +
MINUS = auto() # -
STAR = auto() # *
SLASH = auto() # /
PERCENT = auto() # %
CONCAT = auto() # + (string, same as PLUS — resolved by context)
EQ = auto() # == or =
NEQ = auto() # != or <>
LT = auto() # <
LE = auto() # <=
GT = auto() # >
GE = auto() # >=
AND = auto()
OR = auto()
NOT = auto()
IF = auto()
THEN = auto()
ELSEIF = auto()
ELSE = auto()
ENDIF = auto()
IIF = auto()
NULL_FUNC = auto() # NULL()
ISNULL = auto()
ISEMPTY = auto()
NUMBER = auto()
STRING = auto() # double-quoted string literal
IDENT = auto() # function name or keyword
COLUMN = auto() # [ColName] — after stripping brackets
EOF = auto()
BANG = auto() # ! (prefix not)
PIPE2 = auto() # || (string concat in SQL)
POWER = auto() # ^
_KEYWORDS = {
"AND": TT.AND,
"OR": TT.OR,
"NOT": TT.NOT,
"IF": TT.IF,
"THEN": TT.THEN,
"ELSEIF": TT.ELSEIF,
"ELSE": TT.ELSE,
"ENDIF": TT.ENDIF,
"IIF": TT.IIF,
"NULL": TT.NULL_FUNC,
"ISNULL": TT.ISNULL,
"ISEMPTY": TT.ISEMPTY,
"ISNUMBER": TT.IDENT, # keep as IDENT, handled in primary
"TRUE": TT.IDENT,
"FALSE": TT.IDENT,
}
class Token:
__slots__ = ("type", "value")
def __init__(self, type_: TT, value: object = None):
self.type = type_
self.value = value
def __repr__(self):
return f"Token({self.type}, {self.value!r})"
_TOKEN_RE = re.compile(
r"""
(?P<SPACE>\s+)
| (?P<COLUMN>\[[^\]]*\])
| (?P<NUMBER>-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)
| (?P<STRING>"(?:[^"\\]|\\.)*")
| (?P<LE><=)
| (?P<GE>>=)
| (?P<NEQ>!=|<>)
| (?P<EQ>==|=)
| (?P<LT><)
| (?P<GT>>)
| (?P<PIPE2>\|\|)
| (?P<CONCAT>\+)
| (?P<MINUS>-)
| (?P<STAR>\*)
| (?P<SLASH>/)
| (?P<PERCENT>%)
| (?P<POWER>\^)
| (?P<BANG>!)
| (?P<LPAREN>\()
| (?P<RPAREN>\))
| (?P<COMMA>,)
| (?P<IDENT>[A-Za-z_]\w*)
""",
re.VERBOSE,
)
def tokenise(text: str) -> list[Token]:
tokens: list[Token] = []
pos = 0
while pos < len(text):
m = _TOKEN_RE.match(text, pos)
if not m:
raise UnsupportedExpressionError(
f"Unexpected character {text[pos]!r} at pos {pos} in: {text!r}"
)
pos = m.end()
kind = m.lastgroup
raw = m.group()
if kind == "SPACE":
continue
if kind == "COLUMN":
tokens.append(Token(TT.COLUMN, raw[1:-1])) # strip [ ]
elif kind == "NUMBER":
tokens.append(Token(TT.NUMBER, raw))
elif kind == "STRING":
# Convert double-quoted Alteryx string to single-quoted SQL
inner = raw[1:-1].replace("\\'", "'").replace("'", "''").replace('\\"', '"')
tokens.append(Token(TT.STRING, inner))
elif kind == "LE":
tokens.append(Token(TT.LE))
elif kind == "GE":
tokens.append(Token(TT.GE))
elif kind == "NEQ":
tokens.append(Token(TT.NEQ))
elif kind == "EQ":
tokens.append(Token(TT.EQ))
elif kind == "LT":
tokens.append(Token(TT.LT))
elif kind == "GT":
tokens.append(Token(TT.GT))
elif kind == "PIPE2":
tokens.append(Token(TT.PIPE2))
elif kind == "CONCAT":
tokens.append(Token(TT.PLUS))
elif kind == "MINUS":
tokens.append(Token(TT.MINUS))
elif kind == "STAR":
tokens.append(Token(TT.STAR))
elif kind == "SLASH":
tokens.append(Token(TT.SLASH))
elif kind == "PERCENT":
tokens.append(Token(TT.PERCENT))
elif kind == "POWER":
tokens.append(Token(TT.POWER))
elif kind == "BANG":
tokens.append(Token(TT.BANG))
elif kind == "LPAREN":
tokens.append(Token(TT.LPAREN))
elif kind == "RPAREN":
tokens.append(Token(TT.RPAREN))
elif kind == "COMMA":
tokens.append(Token(TT.COMMA))
elif kind == "IDENT":
upper = raw.upper()
tt = _KEYWORDS.get(upper, TT.IDENT)
tokens.append(Token(tt, raw))
else:
raise UnsupportedExpressionError(f"Unhandled token kind {kind}")
tokens.append(Token(TT.EOF))
return tokens
# ---------------------------------------------------------------------------
# Parser / code generator (recursive descent → DuckDB SQL string)
# ---------------------------------------------------------------------------
class _Parser:
def __init__(self, tokens: list[Token]):
self._tokens = tokens
self._pos = 0
@property
def _cur(self) -> Token:
return self._tokens[self._pos]
def _peek(self, offset: int = 1) -> Token:
idx = self._pos + offset
if idx >= len(self._tokens):
return Token(TT.EOF)
return self._tokens[idx]
def _advance(self) -> Token:
tok = self._tokens[self._pos]
self._pos += 1
return tok
def _expect(self, tt: TT) -> Token:
tok = self._advance()
if tok.type != tt:
raise UnsupportedExpressionError(
f"Expected {tt}, got {tok.type} ({tok.value!r})"
)
return tok
# ------------------------------------------------------------------ #
def parse(self) -> str:
sql = self._parse_expr()
if self._cur.type != TT.EOF:
raise UnsupportedExpressionError(
f"Unexpected token at end: {self._cur}"
)
return sql
def _parse_expr(self) -> str:
return self._parse_or()
def _parse_or(self) -> str:
left = self._parse_and()
while self._cur.type == TT.OR:
self._advance()
right = self._parse_and()
left = f"({left} OR {right})"
return left
def _parse_and(self) -> str:
left = self._parse_not()
while self._cur.type == TT.AND:
self._advance()
right = self._parse_not()
left = f"({left} AND {right})"
return left
def _parse_not(self) -> str:
if self._cur.type in (TT.NOT, TT.BANG):
self._advance()
operand = self._parse_not()
return f"(NOT {operand})"
return self._parse_comparison()
def _parse_comparison(self) -> str:
left = self._parse_additive()
cmp_map = {
TT.EQ: "=",
TT.NEQ: "<>",
TT.LT: "<",
TT.LE: "<=",
TT.GT: ">",
TT.GE: ">=",
}
if self._cur.type in cmp_map:
op = cmp_map[self._advance().type]
right = self._parse_additive()
return f"({left} {op} {right})"
return left
def _parse_additive(self) -> str:
left = self._parse_multiplicative()
while self._cur.type in (TT.PLUS, TT.MINUS, TT.PIPE2):
op = self._advance()
right = self._parse_multiplicative()
if op.type == TT.PIPE2:
left = f"({left} || {right})"
elif op.type == TT.MINUS:
left = f"({left} - {right})"
else:
left = f"({left} + {right})"
return left
def _parse_multiplicative(self) -> str:
left = self._parse_unary()
while self._cur.type in (TT.STAR, TT.SLASH, TT.PERCENT, TT.POWER):
op = self._advance()
right = self._parse_unary()
if op.type == TT.POWER:
left = f"POWER({left}, {right})"
elif op.type == TT.PERCENT:
left = f"({left} % {right})"
elif op.type == TT.SLASH:
left = f"({left} / {right})"
else:
left = f"({left} * {right})"
return left
def _parse_unary(self) -> str:
if self._cur.type == TT.MINUS:
self._advance()
return f"(-{self._parse_primary()})"
if self._cur.type == TT.PLUS:
self._advance()
return self._parse_primary()
return self._parse_primary()
def _parse_primary(self) -> str: # noqa: C901 (complexity ok for parser)
tok = self._cur
# Parenthesised sub-expression
if tok.type == TT.LPAREN:
self._advance()
inner = self._parse_expr()
self._expect(TT.RPAREN)
return f"({inner})"
# Column reference
if tok.type == TT.COLUMN:
self._advance()
# Row reference [Row-N:Field] or [Row+N:Field]
col = tok.value
row_m = re.match(r"^Row([+-]\d+):(.+)$", col, re.IGNORECASE)
if row_m:
offset = int(row_m.group(1))
field = row_m.group(2)
func = "LAG" if offset < 0 else "LEAD"
return f'{func}("{field}", {abs(offset)}) OVER ()'
return f'"{col}"'
# Numeric literal
if tok.type == TT.NUMBER:
self._advance()
return tok.value
# String literal (already converted to single-quoted)
if tok.type == TT.STRING:
self._advance()
return f"'{tok.value}'"
# IF … THEN … [ELSEIF … THEN …]* [ELSE …] ENDIF
if tok.type == TT.IF:
return self._parse_if()
# NULL() or bare NULL keyword
if tok.type == TT.NULL_FUNC:
self._advance()
if self._cur.type == TT.LPAREN:
self._advance()
self._expect(TT.RPAREN)
return "NULL"
# IsNull([F]) — keyword form
if tok.type == TT.ISNULL:
self._advance()
self._expect(TT.LPAREN)
inner = self._parse_expr()
self._expect(TT.RPAREN)
return f"({inner} IS NULL)"
# IsEmpty([F]) — keyword form
if tok.type == TT.ISEMPTY:
self._advance()
self._expect(TT.LPAREN)
inner = self._parse_expr()
self._expect(TT.RPAREN)
return f"({inner} IS NULL OR {inner} = '')"
# Function call or bare identifier
if tok.type == TT.IDENT:
name = tok.value
upper = name.upper()
self._advance()
# Bare boolean/null literals
if upper == "TRUE":
return "TRUE"
if upper == "FALSE":
return "FALSE"
if upper == "NULL":
if self._cur.type == TT.LPAREN:
self._advance()
self._expect(TT.RPAREN)
return "NULL"
# IsNull / IsEmpty used as plain identifiers (case variations)
if upper == "ISNULL":
self._expect(TT.LPAREN)
inner = self._parse_expr()
self._expect(TT.RPAREN)
return f"({inner} IS NULL)"
if upper == "ISEMPTY":
self._expect(TT.LPAREN)
inner = self._parse_expr()
self._expect(TT.RPAREN)
return f"({inner} IS NULL OR {inner} = '')"
if upper == "ISNUMBER":
self._expect(TT.LPAREN)
inner = self._parse_expr()
self._expect(TT.RPAREN)
return f"(TRY_CAST({inner} AS DOUBLE) IS NOT NULL)"
# Titlecase — special SQL rendering
if upper == "TITLECASE":
self._expect(TT.LPAREN)
inner = self._parse_expr()
self._expect(TT.RPAREN)
return titlecase_sql(inner)
# DateTimeAdd / DateTimeDiff need string arg unquoted for INTERVAL
if upper == "DATETIMEADD":
self._expect(TT.LPAREN)
d_arg = self._parse_expr()
self._expect(TT.COMMA)
n_arg = self._parse_expr()
self._expect(TT.COMMA)
unit_arg = self._parse_expr()
self._expect(TT.RPAREN)
# unit_arg is a SQL string like 'days' — strip quotes for INTERVAL keyword
unit = unit_arg.strip("'").rstrip("s").upper()
return f"({d_arg} + INTERVAL ({n_arg}) {unit})"
if upper == "DATETIMEDIFF":
self._expect(TT.LPAREN)
d1 = self._parse_expr()
self._expect(TT.COMMA)
d2 = self._parse_expr()
self._expect(TT.COMMA)
unit_arg = self._parse_expr()
self._expect(TT.RPAREN)
unit = unit_arg.strip("'").rstrip("s").upper()
return f"DATEDIFF('{unit}', {d2}, {d1})"
# IIF as identifier (keyword token is TT.IIF but may arrive as IDENT)
if upper == "IIF":
self._expect(TT.LPAREN)
cond = self._parse_expr()
self._expect(TT.COMMA)
true_val = self._parse_expr()
self._expect(TT.COMMA)
false_val = self._parse_expr()
self._expect(TT.RPAREN)
return f"(CASE WHEN {cond} THEN {true_val} ELSE {false_val} END)"
if self._cur.type == TT.LPAREN:
# Function call
self._advance()
args: list[str] = []
if self._cur.type != TT.RPAREN:
args.append(self._parse_expr())
while self._cur.type == TT.COMMA:
self._advance()
args.append(self._parse_expr())
self._expect(TT.RPAREN)
return get_function_sql(name, args)
# Bare identifier (e.g. a column name without brackets — unusual)
return f'"{name}"'
# IIF keyword token
if tok.type == TT.IIF:
self._advance()
self._expect(TT.LPAREN)
cond = self._parse_expr()
self._expect(TT.COMMA)
true_val = self._parse_expr()
self._expect(TT.COMMA)
false_val = self._parse_expr()
self._expect(TT.RPAREN)
return f"(CASE WHEN {cond} THEN {true_val} ELSE {false_val} END)"
raise UnsupportedExpressionError(f"Unexpected token: {tok}")
def _parse_if(self) -> str:
self._expect(TT.IF)
branches: list[tuple[str, str]] = []
else_val: Optional[str] = None
cond = self._parse_expr()
self._expect(TT.THEN)
val = self._parse_expr()
branches.append((cond, val))
while self._cur.type == TT.ELSEIF:
self._advance()
cond = self._parse_expr()
self._expect(TT.THEN)
val = self._parse_expr()
branches.append((cond, val))
if self._cur.type == TT.ELSE:
self._advance()
else_val = self._parse_expr()
self._expect(TT.ENDIF)
parts = ["CASE"]
for cond, val in branches:
parts.append(f"WHEN {cond} THEN {val}")
if else_val is not None:
parts.append(f"ELSE {else_val}")
parts.append("END")
return " ".join(parts)
# ---------------------------------------------------------------------------
# Public API
# ---------------------------------------------------------------------------
def transpile(expression: str) -> str:
"""Convert an Alteryx expression string to a DuckDB SQL fragment."""
expression = expression.strip()
if not expression:
return "NULL"
tokens = tokenise(expression)
return _Parser(tokens).parse()
def _coerce_numeric_strings(df: pl.DataFrame) -> pl.DataFrame:
"""Cast string columns that contain only numeric data to Int64 or Float64.
Alteryx implicitly coerces TextInput strings to numbers when the expression
treats them numerically. This mirrors that behaviour.
"""
casts: list[pl.Expr] = []
for col_name in df.columns:
s = df[col_name]
if s.dtype != pl.String:
continue
non_null = s.drop_nulls()
if len(non_null) == 0:
continue
# Try integer first (covers integer-looking strings)
int_s = non_null.cast(pl.Int64, strict=False)
if int_s.null_count() == 0:
casts.append(pl.col(col_name).cast(pl.Int64, strict=False))
continue
# Try float
float_s = non_null.cast(pl.Float64, strict=False)
if float_s.null_count() == 0:
casts.append(pl.col(col_name).cast(pl.Float64, strict=False))
return df.with_columns(casts) if casts else df
class ExpressionTranspiler:
"""Stateful transpiler bound to a DuckDB connection for evaluation."""
def __init__(self, con: duckdb.DuckDBPyConnection):
self._con = con
self._view_counter = 0
def _register(self, df: pl.DataFrame) -> str:
name = f"_expr_df_{self._view_counter}"
self._view_counter += 1
self._con.register(name, df.to_arrow())
return name
def eval_mask(self, df: pl.DataFrame, expression: str) -> pl.Series:
"""Evaluate a boolean Alteryx expression against df, return bool Series."""
sql_expr = transpile(expression)
view = self._register(df)
try:
result = self._con.execute(
f'SELECT ({sql_expr}) AS _mask FROM "{view}"'
).pl()
return result["_mask"]
except duckdb.BinderException:
# Type mismatch: retry after coercing numeric-looking string columns
self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
df2 = _coerce_numeric_strings(df)
view = self._register(df2)
result = self._con.execute(
f'SELECT ({sql_expr}) AS _mask FROM "{view}"'
).pl()
return result["_mask"]
finally:
self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
def eval_series(
self,
df: pl.DataFrame,
expression: str,
field: str,
dtype: pl.PolarsDataType,
) -> pl.Series:
"""Evaluate a scalar Alteryx expression against df, return a Series."""
sql_expr = transpile(expression)
view = self._register(df)
try:
result = self._con.execute(
f'SELECT ({sql_expr}) AS "{field}" FROM "{view}"'
).pl()
series = result[field]
try:
return series.cast(dtype)
except Exception:
return series
except duckdb.BinderException:
self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
df2 = _coerce_numeric_strings(df)
view = self._register(df2)
result = self._con.execute(
f'SELECT ({sql_expr}) AS "{field}" FROM "{view}"'
).pl()
series = result[field]
try:
return series.cast(dtype)
except Exception:
return series
finally:
self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
def eval_scalar(self, expression: str) -> object:
"""Evaluate an expression that requires no input columns."""
sql_expr = transpile(expression)
result = self._con.execute(f"SELECT ({sql_expr})").fetchone()
return result[0] if result else None

View File

View File

@ -0,0 +1,25 @@
"""Shared fixtures for tests."""
from __future__ import annotations
import sys
from pathlib import Path
import pytest
import duckdb
# Ensure the alteryx_runner package root is on sys.path
PKG = Path(__file__).parent.parent # alteryx_runner/
if str(PKG) not in sys.path:
sys.path.insert(0, str(PKG))
from engine.context import RunContext
@pytest.fixture
def ctx(tmp_path):
return RunContext(workflow_dir=str(tmp_path), verbose=False)
@pytest.fixture
def duckdb_con():
con = duckdb.connect(":memory:")
yield con
con.close()

View File

@ -0,0 +1,89 @@
"""Tests for the XML parser."""
from __future__ import annotations
import sys
import textwrap
from pathlib import Path
import pytest
PKG = Path(__file__).parent.parent # alteryx_runner/
if str(PKG) not in sys.path:
sys.path.insert(0, str(PKG))
from engine.parser import parse_workflow
def _write_yxmd(tmp_path: Path, body: str) -> Path:
content = f'<AlteryxDocument yxmdVer="2022.1">{body}<Properties/></AlteryxDocument>'
p = tmp_path / "test.yxmd"
p.write_text(content)
return p
class TestParser:
def test_simple_nodes(self, tmp_path):
body = textwrap.dedent("""\
<Nodes>
<Node ToolID="1">
<GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
<Position x="0" y="0"/>
</GuiSettings>
<Properties><Configuration/></Properties>
</Node>
<Node ToolID="2">
<GuiSettings Plugin="AlteryxBasePluginsGui.Filter.Filter">
<Position x="100" y="0"/>
</GuiSettings>
<Properties><Configuration><Expression>True</Expression></Configuration></Properties>
</Node>
</Nodes>
<Connections>
<Connection>
<Origin ToolID="1" Connection="Output"/>
<Destination ToolID="2" Connection="Input"/>
</Connection>
</Connections>
""")
path = _write_yxmd(tmp_path, body)
graph = parse_workflow(str(path))
assert 1 in graph.nodes
assert 2 in graph.nodes
assert len(graph.connections) == 1
assert graph.connections[0].origin_id == 1
assert graph.connections[0].dest_id == 2
def test_wireless_connection(self, tmp_path):
body = textwrap.dedent("""\
<Nodes>
<Node ToolID="10">
<GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
<Position x="0" y="0"/>
</GuiSettings>
<Properties><Configuration/></Properties>
</Node>
</Nodes>
<Connections>
<Connection Wireless="True">
<Origin ToolID="10" Connection="Output"/>
<Destination ToolID="20" Connection="Input"/>
</Connection>
</Connections>
""")
path = _write_yxmd(tmp_path, body)
graph = parse_workflow(str(path))
assert graph.connections[0].wireless is True
def test_node_position(self, tmp_path):
body = textwrap.dedent("""\
<Nodes>
<Node ToolID="5">
<GuiSettings Plugin="AlteryxBasePluginsGui.Sort.Sort">
<Position x="42" y="99"/>
</GuiSettings>
<Properties><Configuration/></Properties>
</Node>
</Nodes>
<Connections/>
""")
path = _write_yxmd(tmp_path, body)
graph = parse_workflow(str(path))
assert graph.nodes[5].position == (42, 99)

View File

@ -0,0 +1,266 @@
"""Integration-level tool tests using in-memory DataFrames (no .yxmd required)."""
from __future__ import annotations
import sys
from pathlib import Path
import xml.etree.ElementTree as ET
import pytest
import polars as pl
PKG = Path(__file__).parent.parent # alteryx_runner/
if str(PKG) not in sys.path:
sys.path.insert(0, str(PKG))
from engine.graph import NodeDef
from engine.context import RunContext
def make_node(tool_id: int, plugin: str, config_xml: str) -> NodeDef:
config = ET.fromstring(config_xml)
return NodeDef(tool_id=tool_id, plugin=plugin, config=config)
@pytest.fixture
def ctx(tmp_path):
return RunContext(workflow_dir=str(tmp_path), verbose=False)
# ---------------------------------------------------------------------------
# TextInput
# ---------------------------------------------------------------------------
class TestTextInput:
def test_basic(self, ctx):
from tools.inout.text_input import TextInputTool
xml = """<Configuration>
<Fields><Field name="A"/><Field name="B"/></Fields>
<Data>
<r><c>1</c><c>hello</c></r>
<r><c>2</c><c></c></r>
</Data>
</Configuration>"""
node = make_node(1, "", xml)
result = TextInputTool(node, ctx).execute({})
df = result["Output"]
assert df.shape == (2, 2)
assert df["A"].to_list() == ["1", "2"]
assert df["B"][1] is None # empty → NULL
# ---------------------------------------------------------------------------
# Filter
# ---------------------------------------------------------------------------
class TestFilter:
def _df(self) -> pl.DataFrame:
return pl.DataFrame({"ID": [1, 2, 3, 4], "Region": ["North", "South", "North", "South"]})
def test_simple_gt(self, ctx):
from tools.preparation.filter_tool import FilterTool
xml = """<Configuration>
<Mode>Simple</Mode>
<Simple>
<Operator>&gt;</Operator>
<Field>ID</Field>
<Operands><Operand>2</Operand><DateType>fixed</DateType></Operands>
</Simple>
<Expression>[ID] &gt; 2</Expression>
</Configuration>"""
node = make_node(1, "", xml)
result = FilterTool(node, ctx).execute({"Input": self._df()})
assert len(result["True"]) == 2
assert len(result["False"]) == 2
def test_custom_expr(self, ctx):
from tools.preparation.filter_tool import FilterTool
xml = """<Configuration>
<Mode>Custom</Mode>
<Expression>[Region] == "South"</Expression>
</Configuration>"""
node = make_node(1, "", xml)
result = FilterTool(node, ctx).execute({"Input": self._df()})
assert len(result["True"]) == 2
assert all(v == "South" for v in result["True"]["Region"].to_list())
# ---------------------------------------------------------------------------
# Select
# ---------------------------------------------------------------------------
class TestSelect:
def test_drop_and_rename(self, ctx):
from tools.preparation.select_tool import SelectTool
df = pl.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
xml = """<Configuration OrderChanged="False">
<SelectFields>
<SelectField field="A" selected="True" rename="Alpha"/>
<SelectField field="B" selected="False"/>
<SelectField field="*Unknown" selected="True"/>
</SelectFields>
</Configuration>"""
node = make_node(1, "", xml)
result = SelectTool(node, ctx).execute({"Input": df})["Output"]
assert "Alpha" in result.columns
assert "B" not in result.columns
assert "C" in result.columns # *Unknown passes through
# ---------------------------------------------------------------------------
# Sort
# ---------------------------------------------------------------------------
class TestSort:
def test_ascending(self, ctx):
from tools.preparation.sort_tool import SortTool
df = pl.DataFrame({"Name": ["Charlie", "Alice", "Bob"], "Score": [3, 1, 2]})
xml = """<Configuration>
<SortInfo locale="0">
<Field field="Name" order="Ascending"/>
</SortInfo>
</Configuration>"""
node = make_node(1, "", xml)
result = SortTool(node, ctx).execute({"Input": df})["Output"]
assert result["Name"].to_list() == ["Alice", "Bob", "Charlie"]
def test_descending(self, ctx):
from tools.preparation.sort_tool import SortTool
df = pl.DataFrame({"Score": [3, 1, 2]})
xml = """<Configuration>
<SortInfo locale="0">
<Field field="Score" order="Descending"/>
</SortInfo>
</Configuration>"""
node = make_node(1, "", xml)
result = SortTool(node, ctx).execute({"Input": df})["Output"]
assert result["Score"].to_list() == [3, 2, 1]
# ---------------------------------------------------------------------------
# Unique
# ---------------------------------------------------------------------------
class TestUnique:
def test_unique_and_duplicate(self, ctx):
from tools.preparation.unique_tool import UniqueTool
df = pl.DataFrame({"Name": ["Alice", "Bob", "Alice", "Charlie", "Bob"]})
xml = """<Configuration>
<UniqueFields><Field name="Name"/></UniqueFields>
</Configuration>"""
node = make_node(1, "", xml)
result = UniqueTool(node, ctx).execute({"Input": df})
assert len(result["Unique"]) == 3
assert len(result["Duplicate"]) == 2
# ---------------------------------------------------------------------------
# Sample
# ---------------------------------------------------------------------------
class TestSample:
def _df(self) -> pl.DataFrame:
return pl.DataFrame({"N": list(range(10))})
def test_first(self, ctx):
from tools.preparation.sample_tool import SampleTool
xml = "<Configuration><Mode>First</Mode><N>3</N><GroupFields/></Configuration>"
node = make_node(1, "", xml)
result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
assert len(result) == 3
assert result["N"].to_list() == [0, 1, 2]
def test_last(self, ctx):
from tools.preparation.sample_tool import SampleTool
xml = "<Configuration><Mode>Last</Mode><N>2</N><GroupFields/></Configuration>"
node = make_node(1, "", xml)
result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
assert result["N"].to_list() == [8, 9]
# ---------------------------------------------------------------------------
# Union
# ---------------------------------------------------------------------------
class TestUnion:
def test_by_name(self, ctx):
from tools.join.union_tool import UnionTool
df1 = pl.DataFrame({"A": [1], "B": [2]})
df2 = pl.DataFrame({"B": [4], "A": [3]})
xml = "<Configuration><Mode>Auto</Mode></Configuration>"
node = make_node(1, "", xml)
result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
assert len(result) == 2
def test_by_position(self, ctx):
from tools.join.union_tool import UnionTool
df1 = pl.DataFrame({"X": [1], "Y": [2]})
df2 = pl.DataFrame({"P": [3], "Q": [4]})
xml = "<Configuration><Mode>ByPosition</Mode></Configuration>"
node = make_node(1, "", xml)
result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
assert result.columns == ["X", "Y"]
assert len(result) == 2
# ---------------------------------------------------------------------------
# Summarize
# ---------------------------------------------------------------------------
class TestSummarize:
def test_group_sum(self, ctx):
from tools.transform.summarize_tool import SummarizeTool
df = pl.DataFrame({
"Region": ["N", "S", "N", "S"],
"Sales": [100, 200, 150, 250],
})
xml = """<Configuration>
<SummarizeFields>
<SummarizeField field="Region" action="GroupBy" rename="Region"/>
<SummarizeField field="Sales" action="Sum" rename="Total"/>
</SummarizeFields>
</Configuration>"""
node = make_node(1, "", xml)
result = SummarizeTool(node, ctx).execute({"Input": df})["Output"]
assert set(result.columns) == {"Region", "Total"}
totals = dict(zip(result["Region"].to_list(), result["Total"].to_list()))
assert totals["N"] == 250
assert totals["S"] == 450
# ---------------------------------------------------------------------------
# Transpose
# ---------------------------------------------------------------------------
class TestTranspose:
def test_unpivot(self, ctx):
from tools.transform.transpose_tool import TransposeTool
df = pl.DataFrame({"ID": [1, 2], "Visits": [5, 10], "Spend": [100, 200]})
xml = """<Configuration>
<KeyFields><Field name="ID"/></KeyFields>
<DataFields>
<Field name="Visits"/>
<Field name="Spend"/>
</DataFields>
</Configuration>"""
node = make_node(1, "", xml)
result = TransposeTool(node, ctx).execute({"Input": df})["Output"]
assert "Name" in result.columns
assert "Value" in result.columns
assert len(result) == 4 # 2 rows × 2 data cols
# ---------------------------------------------------------------------------
# RecordID
# ---------------------------------------------------------------------------
class TestRecordID:
def test_starts_at_one(self, ctx):
from tools.preparation.record_id import RecordIDTool
df = pl.DataFrame({"Name": ["A", "B", "C"]})
xml = """<Configuration>
<Field>ID</Field>
<StartValue>1</StartValue>
<FieldType>Int32</FieldType>
</Configuration>"""
node = make_node(1, "", xml)
result = RecordIDTool(node, ctx).execute({"Input": df})["Output"]
assert result["ID"].to_list() == [1, 2, 3]
assert result.columns[0] == "ID"

View File

@ -0,0 +1,155 @@
"""Unit tests for the Alteryx → DuckDB expression transpiler."""
from __future__ import annotations
import sys
from pathlib import Path
import pytest
PKG = Path(__file__).parent.parent # alteryx_runner/
if str(PKG) not in sys.path:
sys.path.insert(0, str(PKG))
from expression.transpiler import transpile, UnsupportedExpressionError
def t(expr: str) -> str:
return transpile(expr)
class TestLiterals:
def test_number(self):
assert t("42") == "42"
def test_float(self):
assert t("3.14") == "3.14"
def test_string(self):
assert t('"hello"') == "'hello'"
def test_null(self):
assert t("NULL()") == "NULL"
def test_true(self):
assert t("True") == "TRUE"
def test_false(self):
assert t("False") == "FALSE"
class TestColumnRef:
def test_simple(self):
assert t("[CustomerID]") == '"CustomerID"'
def test_spaces(self):
assert t("[First Name]") == '"First Name"'
class TestOperators:
def test_eq(self):
assert t("[A] == [B]") == '("A" = "B")'
def test_neq(self):
assert t("[A] != [B]") == '("A" <> "B")'
def test_gt(self):
assert t("[Score] > 50") == '("Score" > 50)'
def test_and(self):
sql = t('[A] > 0 AND [B] < 10')
assert "AND" in sql
def test_or(self):
sql = t('[A] > 0 OR [B] < 0')
assert "OR" in sql
def test_not(self):
sql = t('NOT [IsActive]')
assert "NOT" in sql
def test_bang(self):
sql = t('![IsActive]')
assert "NOT" in sql
class TestIfThenEndif:
def test_simple(self):
sql = t('IF [Score] > 50 THEN "Pass" ELSE "Fail" ENDIF')
assert "CASE WHEN" in sql
assert "'Pass'" in sql
assert "'Fail'" in sql
def test_elseif(self):
sql = t('IF [Score] > 90 THEN "A" ELSEIF [Score] > 70 THEN "B" ELSE "C" ENDIF')
assert sql.count("WHEN") == 2
def test_no_else(self):
sql = t('IF [Active] == "Y" THEN "Yes" ENDIF')
assert "CASE WHEN" in sql
class TestIIF:
def test_iif(self):
sql = t('IIF([Score] > 50, "Pass", "Fail")')
assert "CASE WHEN" in sql
class TestIsNull:
def test_isnull_keyword(self):
sql = t('IsNull([Field])')
assert "IS NULL" in sql
def test_not_isnull(self):
sql = t('!IsNull([Field])')
assert "NOT" in sql and "IS NULL" in sql
class TestFunctions:
def test_uppercase(self):
assert t('Uppercase([Name])') == "UPPER(\"Name\")"
def test_length(self):
assert "LENGTH" in t('Length([Name])')
def test_left(self):
assert "LEFT" in t('Left([Name], 3)')
def test_round(self):
assert "ROUND" in t('Round([Score], 2)')
def test_abs(self):
assert "ABS" in t('ABS([Val])')
def test_trim(self):
assert "TRIM" in t('Trim([Name])')
def test_nested(self):
sql = t('Uppercase(Trim([Name]))')
assert "UPPER" in sql
assert "TRIM" in sql
class TestArithmetic:
def test_add(self):
sql = t('[A] + [B]')
assert "+" in sql
def test_multiply(self):
sql = t('[A] * [B]')
assert "*" in sql
def test_divide(self):
sql = t('[A] / [B]')
assert "/" in sql
def test_complex(self):
sql = t('ROUND([Spend] / [Visits], 1)')
assert "ROUND" in sql
class TestDateFunctions:
def test_datetimenow(self):
sql = t('DateTimeNow()')
assert "NOW()" in sql
def test_datetimetoday(self):
sql = t('DateTimeToday()')
assert "CURRENT_DATE" in sql

View File

@ -0,0 +1,87 @@
"""Tool registry: Plugin string → tool class."""
from __future__ import annotations
from typing import Optional, Type
from tools.base import BaseTool
from tools.inout import InputDataTool, OutputDataTool, TextInputTool, BrowseTool
from tools.preparation import (
FilterTool, FormulaTool, SelectTool, SortTool, SampleTool,
UniqueTool, GenerateRowsTool, MultiRowFormulaTool,
MultiFieldFormulaTool, RecordIDTool, AutoFieldTool,
)
from tools.join import JoinTool, JoinMultipleTool, UnionTool, AppendFieldsTool, FindReplaceTool
from tools.parse import DateTimeTool, RegExTool, TextToColumnsTool
from tools.transform import SummarizeTool, CrossTabTool, TransposeTool
class _PassthroughTool(BaseTool):
def execute(self, inputs):
df = next(iter(inputs.values())) if inputs else __import__("polars").DataFrame()
return {"Output": df}
class _NullTool(BaseTool):
def execute(self, inputs):
return {}
_REGISTRY: dict[str, Type[BaseTool]] = {
# In/Out
"AlteryxBasePluginsGui.DbFileInput.DbFileInput": InputDataTool,
"AlteryxBasePluginsGui.DbFileOutput.DbFileOutput": OutputDataTool,
"AlteryxBasePluginsGui.TextInput.TextInput": TextInputTool,
"AlteryxBasePluginsGui.BrowseV2.BrowseV2": BrowseTool,
# Preparation
"AlteryxBasePluginsGui.Filter.Filter": FilterTool,
"AlteryxBasePluginsGui.Formula.Formula": FormulaTool,
"AlteryxBasePluginsGui.AlteryxSelect.AlteryxSelect": SelectTool,
"AlteryxBasePluginsGui.Sort.Sort": SortTool,
"AlteryxBasePluginsGui.Sample.Sample": SampleTool,
"AlteryxBasePluginsGui.Unique.Unique": UniqueTool,
"AlteryxBasePluginsGui.GenerateRows.GenerateRows": GenerateRowsTool,
"AlteryxBasePluginsGui.MultiRowFormula.MultiRowFormula": MultiRowFormulaTool,
"AlteryxBasePluginsGui.MultiFieldFormula.MultiFieldFormula": MultiFieldFormulaTool,
"AlteryxBasePluginsGui.RecordID.RecordID": RecordIDTool,
"AlteryxBasePluginsGui.AutoField.AutoField": AutoFieldTool,
# Join
"AlteryxBasePluginsGui.Join.Join": JoinTool,
"AlteryxBasePluginsGui.JoinMultiple.JoinMultiple": JoinMultipleTool,
"AlteryxBasePluginsGui.Union.Union": UnionTool,
"AlteryxBasePluginsGui.AppendFields.AppendFields": AppendFieldsTool,
"AlteryxBasePluginsGui.FindReplace.FindReplace": FindReplaceTool,
# Parse
"AlteryxBasePluginsGui.DateTime.DateTime": DateTimeTool,
"AlteryxBasePluginsGui.RegEx.RegEx": RegExTool,
"AlteryxBasePluginsGui.TextToColumns.TextToColumns": TextToColumnsTool,
# Transform
"AlteryxSpatialPluginsGui.Summarize.Summarize": SummarizeTool,
"AlteryxBasePluginsGui.CrossTab.CrossTab": CrossTabTool,
"AlteryxBasePluginsGui.Transpose.Transpose": TransposeTool,
# Documentation / no-op
"AlteryxGuiToolkit.ToolContainer.ToolContainer": _PassthroughTool,
"AlteryxGuiToolkit.TextBox.TextBox": _NullTool,
"AlteryxGuiToolkit.Comment.Comment": _NullTool,
"AlteryxBasePluginsGui.AlteryxAnnotation.AlteryxAnnotation": _NullTool,
}
def get_tool_class(plugin: str) -> Optional[Type[BaseTool]]:
"""Return the tool class for a given Plugin string, or None if unsupported."""
if not plugin:
return None
cls = _REGISTRY.get(plugin)
if cls is not None:
return cls
# Partial match fallback — useful for minor version differences in plugin names
for key, cls in _REGISTRY.items():
if plugin.endswith(key.split(".")[-1]) or key in plugin:
return cls
return None
def register_tool(plugin: str, cls: Type[BaseTool]) -> None:
"""Register a custom tool class for a given Plugin string."""
_REGISTRY[plugin] = cls
__all__ = ["get_tool_class", "register_tool", "BaseTool"]

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,30 @@
from __future__ import annotations
from abc import ABC, abstractmethod
from typing import Dict, Optional
import polars as pl
import xml.etree.ElementTree as ET
from engine.graph import NodeDef
from engine.context import RunContext
class BaseTool(ABC):
def __init__(self, node: NodeDef, ctx: RunContext):
self.node = node
self.ctx = ctx
self.config: Optional[ET.Element] = node.config
@abstractmethod
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
"""Execute the tool and return named output DataFrames."""
def _cfg(self, xpath: str, default: Optional[str] = None) -> Optional[str]:
el = self.config.find(xpath) if self.config is not None else None
return el.text if el is not None else default
def _cfg_attr(self, xpath: str, attr: str, default: Optional[str] = None) -> Optional[str]:
el = self.config.find(xpath) if self.config is not None else None
return el.attrib.get(attr, default) if el is not None else default
def _cfg_text(self, xpath: str, default: str = "") -> str:
val = self._cfg(xpath, default)
return val if val is not None else default

View File

@ -0,0 +1,6 @@
from .input_data import InputDataTool
from .output_data import OutputDataTool
from .text_input import TextInputTool
from .browse import BrowseTool
__all__ = ["InputDataTool", "OutputDataTool", "TextInputTool", "BrowseTool"]

View File

@ -0,0 +1,14 @@
from __future__ import annotations
from typing import Dict
import polars as pl
from tools.base import BaseTool
class BrowseTool(BaseTool):
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
df = inputs.get("Input", pl.DataFrame())
if self.ctx.verbose:
print(f"\n[Browse ToolID={self.node.tool_id}]")
print(f" rows={len(df)} cols={df.columns}")
print(df.head(20))
return {"Output": df}

View File

@ -0,0 +1,99 @@
from __future__ import annotations
from typing import Dict, Optional
import xml.etree.ElementTree as ET
import polars as pl
from tools.base import BaseTool
class InputDataTool(BaseTool):
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
if self.config is None:
return {"Output": pl.DataFrame()}
file_el = self.config.find("File")
if file_el is None or not file_el.text:
return {"Output": pl.DataFrame()}
raw_path = (file_el.text or "").strip()
fmt = int(file_el.attrib.get("FileFormat", "0"))
record_limit_str = file_el.attrib.get("RecordLimit", "").strip()
limit = int(record_limit_str) if record_limit_str else None
opts = self.config.find("FormatSpecificOptions")
if opts is None:
opts = ET.Element("x")
path_str, sheet = self._parse_path(raw_path)
resolved = self.ctx.resolve_path(path_str)
df = self._read(str(resolved), fmt, sheet, opts)
# Trim whitespace from string columns (matches Alteryx behavior)
for col in df.columns:
if df[col].dtype == pl.String:
df = df.with_columns(pl.col(col).str.strip_chars())
if limit:
df = df.head(limit)
return {"Output": df}
def _parse_path(self, raw: str) -> tuple[str, Optional[str]]:
if "|||" in raw:
path, sheet = raw.split("|||", 1)
return path.strip(), sheet.strip().strip("`").rstrip("$")
return raw.strip(), None
def _read(
self,
path: str,
fmt: int,
sheet: Optional[str],
opts: ET.Element,
) -> pl.DataFrame:
if fmt in (0, 6): # CSV / delimited
delim = opts.findtext("Delimeter") or opts.findtext("Delimiter") or ","
header_text = opts.findtext("HeaderRow") or "True"
has_header = header_text.strip().lower() in ("true", "1", "yes")
import_line = int(opts.findtext("ImportLine") or "1")
skip = max(0, import_line - 1)
return pl.read_csv(
path,
separator=delim,
has_header=has_header,
skip_rows=skip,
infer_schema_length=10000,
ignore_errors=True,
)
if fmt == 25: # Excel
read_header = (opts.findtext("FirstRowData") or "False").lower() != "true"
import_line = int(opts.findtext("ImportLine") or "1")
skip = max(0, import_line - 1)
return pl.read_excel(
path,
sheet_name=sheet or 0,
read_options={"has_header": read_header, "skip_rows": skip},
)
if fmt == 2: # Parquet
return pl.read_parquet(path)
if fmt == 19: # YXDB
try:
import yxdb
reader = yxdb.open_file(path)
rows = list(reader)
if rows:
return pl.DataFrame(rows)
return pl.DataFrame()
except ImportError:
raise NotImplementedError(
"YXDB format requires the 'yxdb' package: pip install yxdb"
)
if fmt == 56: # JSON
return pl.read_json(path)
# Fallback: try CSV
return pl.read_csv(path, infer_schema_length=10000, ignore_errors=True)

View File

@ -0,0 +1,88 @@
from __future__ import annotations
from pathlib import Path
from typing import Dict
import xml.etree.ElementTree as ET
import polars as pl
from tools.base import BaseTool
class OutputDataTool(BaseTool):
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
df = inputs.get("Input", pl.DataFrame())
if self.config is None or df.is_empty():
return {}
file_el = self.config.find("File")
if file_el is None or not file_el.text:
return {}
raw_path = (file_el.text or "").strip()
fmt = int(file_el.attrib.get("FileFormat", "0"))
max_records_str = (file_el.attrib.get("MaxRecords") or "").strip()
max_records = int(max_records_str) if max_records_str else None
opts = self.config.find("FormatSpecificOptions")
if opts is None:
opts = ET.Element("x")
multi_el = self.config.find("MultiFile")
multi_file = (multi_el.attrib.get("value", "False") if multi_el is not None else "False") == "True"
multi_field = (self.config.findtext("MultiFileField") or "").strip()
multi_type = (self.config.findtext("MultiFileType") or "Suffix").strip()
keep_field = (self.config.findtext("KeepField") or "True").strip().lower() == "true"
out_path = self.ctx.resolve_output_path(raw_path)
out_path.parent.mkdir(parents=True, exist_ok=True)
if multi_file and multi_field and multi_field in df.columns:
for val in df[multi_field].unique().to_list():
part = df.filter(pl.col(multi_field) == val)
if not keep_field:
part = part.drop(multi_field)
part_path = self._multi_path(out_path, str(val), multi_type)
self._write(part, part_path, fmt, opts)
elif max_records:
chunk_num = 0
for i in range(0, len(df), max_records):
chunk = df.slice(i, max_records)
chunk_path = out_path if chunk_num == 0 else out_path.with_stem(
f"{out_path.stem}_{chunk_num}"
)
self._write(chunk, chunk_path, fmt, opts)
chunk_num += 1
else:
self._write(df, out_path, fmt, opts)
if self.ctx.verbose:
print(f"[Output] Wrote {len(df)} rows → {out_path}")
return {}
def _multi_path(self, base: Path, value: str, mode: str) -> Path:
safe = "".join(c if c.isalnum() or c in "-_" else "_" for c in value)
if mode == "Suffix":
return base.with_stem(f"{base.stem}_{safe}")
return base.with_stem(f"{safe}_{base.stem}")
def _write(self, df: pl.DataFrame, path: Path, fmt: int, opts: ET.Element) -> None:
if fmt in (0, 6): # CSV
delim = opts.findtext("Delimeter") or opts.findtext("Delimiter") or ","
# Accept both HeaderRow and Headers attribute names
header_val = (
opts.findtext("HeaderRow")
or opts.findtext("Headers")
or "True"
)
header = header_val.lower() != "false"
df.write_csv(str(path), separator=delim, include_header=header)
elif fmt == 25: # Excel
df.write_excel(str(path))
elif fmt == 2: # Parquet
df.write_parquet(str(path))
elif fmt == 19: # YXDB — fall back to Parquet
fallback = path.with_suffix(".parquet")
df.write_parquet(str(fallback))
if self.ctx.verbose:
print(f"[Output] YXDB write not supported; wrote Parquet to {fallback}")
else:
df.write_csv(str(path))

View File

@ -0,0 +1,35 @@
from __future__ import annotations
from typing import Dict, Optional
import polars as pl
from tools.base import BaseTool
class TextInputTool(BaseTool):
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
if self.config is None:
return {"Output": pl.DataFrame()}
fields = [
f.attrib["name"]
for f in self.config.findall("Fields/Field")
]
if not fields:
return {"Output": pl.DataFrame()}
rows: list[dict] = []
for r in self.config.findall("Data/r"):
cells = r.findall("c")
row: dict[str, Optional[str]] = {}
for i, col_name in enumerate(fields):
el = cells[i] if i < len(cells) else None
text: Optional[str] = el.text if el is not None else None
# Empty text in XML → NULL
row[col_name] = text if text else None
rows.append(row)
if not rows:
schema = {f: pl.String for f in fields}
return {"Output": pl.DataFrame(schema=schema)}
df = pl.DataFrame(rows, schema={f: pl.String for f in fields})
return {"Output": df}

View File

@ -0,0 +1,7 @@
from .join_tool import JoinTool
from .join_multiple import JoinMultipleTool
from .union_tool import UnionTool
from .append_fields import AppendFieldsTool
from .find_replace import FindReplaceTool
__all__ = ["JoinTool", "JoinMultipleTool", "UnionTool", "AppendFieldsTool", "FindReplaceTool"]

Some files were not shown because too many files have changed in this diff Show More