initial commit with runner kinda working
commit
02e71a857c
|
|
@ -0,0 +1,157 @@
|
|||
# uv
|
||||
uv.lock
|
||||
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
*.so
|
||||
|
||||
# Distribution / packaging
|
||||
bin/
|
||||
build/
|
||||
develop-eggs/
|
||||
dist/
|
||||
downloads/
|
||||
eggs/
|
||||
.eggs/
|
||||
lib/
|
||||
lib64/
|
||||
parts/
|
||||
sdist/
|
||||
var/
|
||||
wheels/
|
||||
share/python-wheels/
|
||||
*.egg-info/
|
||||
.installed.cfg
|
||||
*.egg
|
||||
MANIFEST
|
||||
|
||||
# PyInstaller
|
||||
# Usually these files are written by a python script from a template
|
||||
# before PyInstaller builds the exe
|
||||
*.manifest
|
||||
*.spec
|
||||
|
||||
# Installer logs
|
||||
pip-log.txt
|
||||
pip-delete-this-directory.txt
|
||||
|
||||
# Unit test / coverage reports
|
||||
htmlcov/
|
||||
.toast/
|
||||
.cache/
|
||||
.pytest_cache/
|
||||
.noscript/
|
||||
.htmlcov/
|
||||
.coverage
|
||||
.coverage.*
|
||||
.cache
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
.mutmut-cache
|
||||
|
||||
# Translations
|
||||
*.mo
|
||||
*.pot
|
||||
|
||||
# Django stuff:
|
||||
*.log
|
||||
local_settings.py
|
||||
db.sqlite3
|
||||
db.sqlite3-journal
|
||||
|
||||
# Flask stuff:
|
||||
instance/
|
||||
.webassets-cache
|
||||
|
||||
# Scrapy stuff:
|
||||
.scrapy
|
||||
|
||||
# Sphinx documentation
|
||||
docs/_build/
|
||||
|
||||
# PyBuilder
|
||||
.pybuilder/
|
||||
target/
|
||||
|
||||
# Jupyter Notebook
|
||||
.ipynb_checkpoints
|
||||
|
||||
# IPython
|
||||
profile_default/
|
||||
ipython_config.py
|
||||
|
||||
# pyenv
|
||||
# For a discussion that covers why this should be excluded, see:
|
||||
# https://stackoverflow.com/questions/54315206/should-we-gitignore-the-python-version-file
|
||||
# .python-version
|
||||
|
||||
# pipenv
|
||||
# According to pypa/pipenv#118, it is recommended to include Pipfile.lock in version control.
|
||||
# However, if you are executing a library instead of an application, you might skip it.
|
||||
#Pipfile.lock
|
||||
|
||||
# poetry
|
||||
# Using Poetry requires committing poetry.lock alongside pyproject.toml
|
||||
# https://python-poetry.org
|
||||
#poetry.lock
|
||||
|
||||
# pdm
|
||||
# https://fming.dev
|
||||
.pdm-plugins/
|
||||
.pdm-build/
|
||||
|
||||
# Hatch
|
||||
.hatch/
|
||||
|
||||
# Celery stuff
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
ENV/
|
||||
env.bak/
|
||||
venv.bak/
|
||||
|
||||
# Spyder project settings
|
||||
.spyderproject
|
||||
.spyproject
|
||||
|
||||
# Rope project settings
|
||||
.ropeproject
|
||||
|
||||
# mkdocs documentation
|
||||
/site/
|
||||
|
||||
# mypy
|
||||
.mypy_cache/
|
||||
.dmypy.json
|
||||
dmypy.json
|
||||
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
# pytype
|
||||
.pytype/
|
||||
|
||||
# Cython debug symbols
|
||||
cython_debug/
|
||||
|
||||
# IDEs and Editors (Optional but recommended)
|
||||
.vscode/
|
||||
.idea/
|
||||
*.swp
|
||||
*.swo
|
||||
.DS_Store
|
||||
|
|
@ -0,0 +1,44 @@
|
|||
# Project Overview
|
||||
Pyteryx is a python-first reimplementation of the Alteryx tool.
|
||||
The intent is to keep the same features of obserability and a rich DAG based user interface, but take advantage of the ease of deploying python.
|
||||
|
||||
Pyteryx should have a near identical look-and-feel of Alteryx, but have the option of exporting a workflow (yxmd) as a python script.
|
||||
|
||||
### Alteryx Tools Documentation / UI Examples
|
||||
https://help.alteryx.com/current/en/designer/tools.html
|
||||
|
||||
### Alteryx Tool Logic / Conversion code
|
||||
./alteryx-to-python-migration-strategy-main
|
||||
This migration system helps organizations transition from Alteryx's visual workflow platform to Python-based data processing pipelines. It automatically converts Alteryx workflow XML files (.yxmd) into equivalent Python code using pandas, numpy, and other standard data science libraries.
|
||||
|
||||
./alteryx-to-python-migration-strategy-main/migration_toolkit.py
|
||||
This file contains conversion logic that could be used to create an Alteryx execution engine that can run within the Pyteryx app.
|
||||
|
||||
# Back End
|
||||
This is a Python project using `uv` for dependency management and environment setup.
|
||||
|
||||
## Agent instructions
|
||||
When interacting with this project, AI agents should adhere to the following guidelines:
|
||||
- Always use uv for dependency management and environment activation. Avoid using pip directly.
|
||||
- Do not manually edit pyproject.toml or uv.lock files. Use uv add or uv lock --upgrade for dependency changes.
|
||||
- Ensure the virtual environment is activated before executing Python scripts or commands. Use uv run for this purpose.
|
||||
- Prioritize using uv commands over direct Python or system commands for package management.
|
||||
- If suggesting package installations, always recommend using uv add <package-name> over pip install <package-name>
|
||||
|
||||
## Setup Commands
|
||||
### Install dependencies
|
||||
uv add <package-name>
|
||||
|
||||
### Running files
|
||||
uv run python <your_script.py>
|
||||
|
||||
### Clean cache
|
||||
uv clean
|
||||
|
||||
### Environment Variables / Auth
|
||||
.env file in the project root folder contains all required Auth strings in the following format
|
||||
name = 'string',
|
||||
|
||||
# Front End
|
||||
GoLang and the fyne UI library (http://fyne.io/) for the front end.
|
||||
|
||||
|
|
@ -0,0 +1,388 @@
|
|||
<?xml version="1.0"?>
|
||||
<AlteryxDocument yxmdVer="2022.3">
|
||||
<Nodes>
|
||||
<Node ToolID="2">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
|
||||
<Position x="162" y="378" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<Passwords />
|
||||
<File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv</File>
|
||||
<FormatSpecificOptions>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<IgnoreErrors>False</IgnoreErrors>
|
||||
<AllowShareWrite>False</AllowShareWrite>
|
||||
<ImportLine>1</ImportLine>
|
||||
<FieldLen>254</FieldLen>
|
||||
<SingleThreadRead>False</SingleThreadRead>
|
||||
<IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
|
||||
<Delimeter>,</Delimeter>
|
||||
<QuoteRecordBreak>False</QuoteRecordBreak>
|
||||
<CodePage>28591</CodePage>
|
||||
</FormatSpecificOptions>
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>products.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
<MetaInfo connection="Output">
|
||||
<RecordInfo>
|
||||
<Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
|
||||
<Field name="Product_Name" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
|
||||
<Field name="Product_Category" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
|
||||
<Field name="Product_Cost" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
|
||||
<Field name="Product_Price" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
|
||||
</RecordInfo>
|
||||
</MetaInfo>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
|
||||
</Node>
|
||||
<Node ToolID="3">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.Join.Join">
|
||||
<Position x="558" y="282" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration joinByRecordPos="False">
|
||||
<JoinInfo connection="Left">
|
||||
<Field field="Product_ID" />
|
||||
</JoinInfo>
|
||||
<JoinInfo connection="Right">
|
||||
<Field field="Product_ID" />
|
||||
</JoinInfo>
|
||||
<SelectConfiguration>
|
||||
<Configuration outputConnection="Join">
|
||||
<OrderChanged value="False" />
|
||||
<CommaDecimal value="False" />
|
||||
<SelectFields>
|
||||
<SelectField field="Right_Product_ID" selected="True" rename="Right_Product_ID" input="Right_" />
|
||||
<SelectField field="*Unknown" selected="True" />
|
||||
</SelectFields>
|
||||
</Configuration>
|
||||
</SelectConfiguration>
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText />
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxJoin" />
|
||||
</Node>
|
||||
<Node ToolID="4">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
|
||||
<Position x="162" y="270" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<Passwords />
|
||||
<File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv</File>
|
||||
<FormatSpecificOptions>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<IgnoreErrors>False</IgnoreErrors>
|
||||
<AllowShareWrite>False</AllowShareWrite>
|
||||
<ImportLine>1</ImportLine>
|
||||
<FieldLen>254</FieldLen>
|
||||
<SingleThreadRead>False</SingleThreadRead>
|
||||
<IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
|
||||
<Delimeter>,</Delimeter>
|
||||
<QuoteRecordBreak>False</QuoteRecordBreak>
|
||||
<CodePage>28591</CodePage>
|
||||
</FormatSpecificOptions>
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>inventory.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
<MetaInfo connection="Output">
|
||||
<RecordInfo>
|
||||
<Field name="Store_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
|
||||
<Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
|
||||
<Field name="Stock_On_Hand" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
|
||||
</RecordInfo>
|
||||
</MetaInfo>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
|
||||
</Node>
|
||||
<Node ToolID="6">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.Union.Union">
|
||||
<Position x="402" y="378" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<ByName_ErrorMode>Warning</ByName_ErrorMode>
|
||||
<ByName_OutputMode>All</ByName_OutputMode>
|
||||
<Mode>ByName</Mode>
|
||||
<SetOutputOrder value="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText />
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxUnion" />
|
||||
</Node>
|
||||
<Node ToolID="7">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.Sample.Sample">
|
||||
<Position x="258" y="378" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<Mode>First</Mode>
|
||||
<N>30</N>
|
||||
<GroupFields orderChanged="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>First 30 rows</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxSample" />
|
||||
</Node>
|
||||
<Node ToolID="8">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
|
||||
<Position x="258" y="474" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<NumRows value="1" />
|
||||
<Fields>
|
||||
<Field name="Product_ID" />
|
||||
<Field name="Product_Name" />
|
||||
<Field name="Product_Category" />
|
||||
<Field name="Product_Cost" />
|
||||
<Field name="Product_Price" />
|
||||
</Fields>
|
||||
<Data>
|
||||
<r>
|
||||
<c>100</c>
|
||||
<c>Non-product</c>
|
||||
<c>NoCat</c>
|
||||
<c>$1</c>
|
||||
<c>$1</c>
|
||||
</r>
|
||||
</Data>
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText />
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxTextInput" />
|
||||
</Node>
|
||||
<Node ToolID="9">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
|
||||
<Position x="510" y="438" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Products_before_join.csv</File>
|
||||
<Passwords />
|
||||
<Disable>False</Disable>
|
||||
<FormatSpecificOptions>
|
||||
<LineEndStyle>CRLF</LineEndStyle>
|
||||
<Delimeter>,</Delimeter>
|
||||
<ForceQuotes>False</ForceQuotes>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<CodePage>28591</CodePage>
|
||||
<WriteBOM>True</WriteBOM>
|
||||
</FormatSpecificOptions>
|
||||
<MultiFile value="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>Products_before_join.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
|
||||
</Node>
|
||||
<Node ToolID="10">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
|
||||
<Position x="774" y="414" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Join_out_R.csv</File>
|
||||
<Passwords />
|
||||
<Disable>False</Disable>
|
||||
<FormatSpecificOptions>
|
||||
<LineEndStyle>CRLF</LineEndStyle>
|
||||
<Delimeter>,</Delimeter>
|
||||
<ForceQuotes>False</ForceQuotes>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<CodePage>28591</CodePage>
|
||||
<WriteBOM>True</WriteBOM>
|
||||
</FormatSpecificOptions>
|
||||
<MultiFile value="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>Join_out_R.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
|
||||
</Node>
|
||||
<Node ToolID="11">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
|
||||
<Position x="774" y="282" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Join_out_J.csv</File>
|
||||
<Passwords />
|
||||
<Disable>False</Disable>
|
||||
<FormatSpecificOptions>
|
||||
<LineEndStyle>CRLF</LineEndStyle>
|
||||
<Delimeter>,</Delimeter>
|
||||
<ForceQuotes>False</ForceQuotes>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<CodePage>28591</CodePage>
|
||||
<WriteBOM>True</WriteBOM>
|
||||
</FormatSpecificOptions>
|
||||
<MultiFile value="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>Join_out_J.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
|
||||
</Node>
|
||||
<Node ToolID="12">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
|
||||
<Position x="774" y="138" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<File MaxRecords="" FileFormat="0">Z:\Pyteryx\Alteryx_TestWorkflows\JoinTesting\Output\Join_out_L.csv</File>
|
||||
<Passwords />
|
||||
<Disable>False</Disable>
|
||||
<FormatSpecificOptions>
|
||||
<LineEndStyle>CRLF</LineEndStyle>
|
||||
<Delimeter>,</Delimeter>
|
||||
<ForceQuotes>False</ForceQuotes>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<CodePage>28591</CodePage>
|
||||
<WriteBOM>True</WriteBOM>
|
||||
</FormatSpecificOptions>
|
||||
<MultiFile value="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>Join_out_L.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
|
||||
</Node>
|
||||
</Nodes>
|
||||
<Connections>
|
||||
<Connection>
|
||||
<Origin ToolID="2" Connection="Output" />
|
||||
<Destination ToolID="7" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="3" Connection="Right" />
|
||||
<Destination ToolID="10" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="3" Connection="Join" />
|
||||
<Destination ToolID="11" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="3" Connection="Left" />
|
||||
<Destination ToolID="12" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="4" Connection="Output" />
|
||||
<Destination ToolID="3" Connection="Left" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="6" Connection="Output" />
|
||||
<Destination ToolID="3" Connection="Right" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="6" Connection="Output" />
|
||||
<Destination ToolID="9" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection name="#1">
|
||||
<Origin ToolID="7" Connection="Output" />
|
||||
<Destination ToolID="6" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection name="#2">
|
||||
<Origin ToolID="8" Connection="Output" />
|
||||
<Destination ToolID="6" Connection="Input" />
|
||||
</Connection>
|
||||
</Connections>
|
||||
<Properties>
|
||||
<Memory default="True" />
|
||||
<GlobalRecordLimit value="0" />
|
||||
<TempFiles default="True" />
|
||||
<Annotation on="True" includeToolName="False" />
|
||||
<ConvErrorLimit value="10" />
|
||||
<ConvErrorLimit_Stop value="False" />
|
||||
<CancelOnError value="False" />
|
||||
<DisableBrowse value="False" />
|
||||
<EnablePerformanceProfiling value="False" />
|
||||
<RunWithE2 value="True" />
|
||||
<PredictiveToolsCodePage value="1252" />
|
||||
<DisableAllOutput value="False" />
|
||||
<ShowAllMacroMessages value="False" />
|
||||
<ShowConnectionStatusIsOn value="True" />
|
||||
<ShowConnectionStatusOnlyWhenRunning value="True" />
|
||||
<ZoomLevel value="0" />
|
||||
<LayoutType>Horizontal</LayoutType>
|
||||
<MetaInfo>
|
||||
<NameIsFileName value="True" />
|
||||
<Name>JoinTesting</Name>
|
||||
<Description />
|
||||
<RootToolName />
|
||||
<ToolVersion />
|
||||
<ToolInDb value="False" />
|
||||
<CategoryName />
|
||||
<SearchTags />
|
||||
<Author />
|
||||
<Company />
|
||||
<Copyright />
|
||||
<DescriptionLink actual="" displayed="" />
|
||||
<Example>
|
||||
<Description />
|
||||
<File />
|
||||
</Example>
|
||||
<WorkflowId value="b5723901-81a1-4ff2-8b26-42700d914cb2" />
|
||||
<Telemetry>
|
||||
<PreviousWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
|
||||
<OriginWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
|
||||
</Telemetry>
|
||||
</MetaInfo>
|
||||
<Events>
|
||||
<Enabled value="True" />
|
||||
</Events>
|
||||
</Properties>
|
||||
</AlteryxDocument>
|
||||
|
|
@ -0,0 +1,388 @@
|
|||
<?xml version="1.0"?>
|
||||
<AlteryxDocument yxmdVer="2022.3">
|
||||
<Nodes>
|
||||
<Node ToolID="2">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
|
||||
<Position x="162" y="378" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<Passwords />
|
||||
<File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">..\Maven Toys Data\products.csv</File>
|
||||
<FormatSpecificOptions>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<IgnoreErrors>False</IgnoreErrors>
|
||||
<AllowShareWrite>False</AllowShareWrite>
|
||||
<ImportLine>1</ImportLine>
|
||||
<FieldLen>254</FieldLen>
|
||||
<SingleThreadRead>False</SingleThreadRead>
|
||||
<IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
|
||||
<Delimeter>,</Delimeter>
|
||||
<QuoteRecordBreak>False</QuoteRecordBreak>
|
||||
<CodePage>28591</CodePage>
|
||||
</FormatSpecificOptions>
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>products.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
<MetaInfo connection="Output">
|
||||
<RecordInfo>
|
||||
<Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
|
||||
<Field name="Product_Name" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
|
||||
<Field name="Product_Category" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
|
||||
<Field name="Product_Cost" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
|
||||
<Field name="Product_Price" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\products.csv" type="V_String" />
|
||||
</RecordInfo>
|
||||
</MetaInfo>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
|
||||
</Node>
|
||||
<Node ToolID="3">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.Join.Join">
|
||||
<Position x="558" y="282" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration joinByRecordPos="False">
|
||||
<JoinInfo connection="Left">
|
||||
<Field field="Product_ID" />
|
||||
</JoinInfo>
|
||||
<JoinInfo connection="Right">
|
||||
<Field field="Product_ID" />
|
||||
</JoinInfo>
|
||||
<SelectConfiguration>
|
||||
<Configuration outputConnection="Join">
|
||||
<OrderChanged value="False" />
|
||||
<CommaDecimal value="False" />
|
||||
<SelectFields>
|
||||
<SelectField field="Right_Product_ID" selected="True" rename="Right_Product_ID" input="Right_" />
|
||||
<SelectField field="*Unknown" selected="True" />
|
||||
</SelectFields>
|
||||
</Configuration>
|
||||
</SelectConfiguration>
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText />
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxJoin" />
|
||||
</Node>
|
||||
<Node ToolID="4">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileInput.DbFileInput">
|
||||
<Position x="162" y="270" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<Passwords />
|
||||
<File RecordLimit="" SearchSubDirs="False" FileFormat="0" OutputFileName="">..\Maven Toys Data\inventory.csv</File>
|
||||
<FormatSpecificOptions>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<IgnoreErrors>False</IgnoreErrors>
|
||||
<AllowShareWrite>False</AllowShareWrite>
|
||||
<ImportLine>1</ImportLine>
|
||||
<FieldLen>254</FieldLen>
|
||||
<SingleThreadRead>False</SingleThreadRead>
|
||||
<IgnoreQuotes>DoubleQuotes</IgnoreQuotes>
|
||||
<Delimeter>,</Delimeter>
|
||||
<QuoteRecordBreak>False</QuoteRecordBreak>
|
||||
<CodePage>28591</CodePage>
|
||||
</FormatSpecificOptions>
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>inventory.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
<MetaInfo connection="Output">
|
||||
<RecordInfo>
|
||||
<Field name="Store_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
|
||||
<Field name="Product_ID" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
|
||||
<Field name="Stock_On_Hand" size="254" source="File: Z:\Pyteryx\Alteryx_TestWorkflows\Maven Toys Data\inventory.csv" type="V_String" />
|
||||
</RecordInfo>
|
||||
</MetaInfo>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileInput" />
|
||||
</Node>
|
||||
<Node ToolID="6">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.Union.Union">
|
||||
<Position x="402" y="378" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<ByName_ErrorMode>Warning</ByName_ErrorMode>
|
||||
<ByName_OutputMode>All</ByName_OutputMode>
|
||||
<Mode>ByName</Mode>
|
||||
<SetOutputOrder value="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText />
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxUnion" />
|
||||
</Node>
|
||||
<Node ToolID="7">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.Sample.Sample">
|
||||
<Position x="258" y="378" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<Mode>First</Mode>
|
||||
<N>30</N>
|
||||
<GroupFields orderChanged="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>First 30 rows</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxSample" />
|
||||
</Node>
|
||||
<Node ToolID="8">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
|
||||
<Position x="258" y="474" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<NumRows value="1" />
|
||||
<Fields>
|
||||
<Field name="Product_ID" />
|
||||
<Field name="Product_Name" />
|
||||
<Field name="Product_Category" />
|
||||
<Field name="Product_Cost" />
|
||||
<Field name="Product_Price" />
|
||||
</Fields>
|
||||
<Data>
|
||||
<r>
|
||||
<c>100</c>
|
||||
<c>Non-product</c>
|
||||
<c>NoCat</c>
|
||||
<c>$1</c>
|
||||
<c>$1</c>
|
||||
</r>
|
||||
</Data>
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText />
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxTextInput" />
|
||||
</Node>
|
||||
<Node ToolID="9">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
|
||||
<Position x="510" y="438" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<File MaxRecords="" FileFormat="0">.\Output\Products_before_join.csv</File>
|
||||
<Passwords />
|
||||
<Disable>False</Disable>
|
||||
<FormatSpecificOptions>
|
||||
<LineEndStyle>CRLF</LineEndStyle>
|
||||
<Delimeter>,</Delimeter>
|
||||
<ForceQuotes>False</ForceQuotes>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<CodePage>28591</CodePage>
|
||||
<WriteBOM>True</WriteBOM>
|
||||
</FormatSpecificOptions>
|
||||
<MultiFile value="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>Products_before_join.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
|
||||
</Node>
|
||||
<Node ToolID="10">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
|
||||
<Position x="774" y="414" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<File MaxRecords="" FileFormat="0">.\Output\Join_out_R.csv</File>
|
||||
<Passwords />
|
||||
<Disable>False</Disable>
|
||||
<FormatSpecificOptions>
|
||||
<LineEndStyle>CRLF</LineEndStyle>
|
||||
<Delimeter>,</Delimeter>
|
||||
<ForceQuotes>False</ForceQuotes>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<CodePage>28591</CodePage>
|
||||
<WriteBOM>True</WriteBOM>
|
||||
</FormatSpecificOptions>
|
||||
<MultiFile value="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>Join_out_R.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
|
||||
</Node>
|
||||
<Node ToolID="11">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
|
||||
<Position x="774" y="282" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<File MaxRecords="" FileFormat="0">.\Output\Join_out_J.csv</File>
|
||||
<Passwords />
|
||||
<Disable>False</Disable>
|
||||
<FormatSpecificOptions>
|
||||
<LineEndStyle>CRLF</LineEndStyle>
|
||||
<Delimeter>,</Delimeter>
|
||||
<ForceQuotes>False</ForceQuotes>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<CodePage>28591</CodePage>
|
||||
<WriteBOM>True</WriteBOM>
|
||||
</FormatSpecificOptions>
|
||||
<MultiFile value="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>Join_out_J.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
|
||||
</Node>
|
||||
<Node ToolID="12">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.DbFileOutput.DbFileOutput">
|
||||
<Position x="774" y="138" />
|
||||
</GuiSettings>
|
||||
<Properties>
|
||||
<Configuration>
|
||||
<File MaxRecords="" FileFormat="0">.\Output\Join_out_L.csv</File>
|
||||
<Passwords />
|
||||
<Disable>False</Disable>
|
||||
<FormatSpecificOptions>
|
||||
<LineEndStyle>CRLF</LineEndStyle>
|
||||
<Delimeter>,</Delimeter>
|
||||
<ForceQuotes>False</ForceQuotes>
|
||||
<HeaderRow>True</HeaderRow>
|
||||
<CodePage>28591</CodePage>
|
||||
<WriteBOM>True</WriteBOM>
|
||||
</FormatSpecificOptions>
|
||||
<MultiFile value="False" />
|
||||
</Configuration>
|
||||
<Annotation DisplayMode="0">
|
||||
<Name />
|
||||
<DefaultAnnotationText>Join_out_L.csv</DefaultAnnotationText>
|
||||
<Left value="False" />
|
||||
</Annotation>
|
||||
<Dependencies>
|
||||
<Implicit />
|
||||
</Dependencies>
|
||||
</Properties>
|
||||
<EngineSettings EngineDll="AlteryxBasePluginsEngine.dll" EngineDllEntryPoint="AlteryxDbFileOutput" />
|
||||
</Node>
|
||||
</Nodes>
|
||||
<Connections>
|
||||
<Connection>
|
||||
<Origin ToolID="2" Connection="Output" />
|
||||
<Destination ToolID="7" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="3" Connection="Right" />
|
||||
<Destination ToolID="10" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="3" Connection="Join" />
|
||||
<Destination ToolID="11" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="3" Connection="Left" />
|
||||
<Destination ToolID="12" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="4" Connection="Output" />
|
||||
<Destination ToolID="3" Connection="Left" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="6" Connection="Output" />
|
||||
<Destination ToolID="3" Connection="Right" />
|
||||
</Connection>
|
||||
<Connection>
|
||||
<Origin ToolID="6" Connection="Output" />
|
||||
<Destination ToolID="9" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection name="#1">
|
||||
<Origin ToolID="7" Connection="Output" />
|
||||
<Destination ToolID="6" Connection="Input" />
|
||||
</Connection>
|
||||
<Connection name="#2">
|
||||
<Origin ToolID="8" Connection="Output" />
|
||||
<Destination ToolID="6" Connection="Input" />
|
||||
</Connection>
|
||||
</Connections>
|
||||
<Properties>
|
||||
<Memory default="True" />
|
||||
<GlobalRecordLimit value="0" />
|
||||
<TempFiles default="True" />
|
||||
<Annotation on="True" includeToolName="False" />
|
||||
<ConvErrorLimit value="10" />
|
||||
<ConvErrorLimit_Stop value="False" />
|
||||
<CancelOnError value="False" />
|
||||
<DisableBrowse value="False" />
|
||||
<EnablePerformanceProfiling value="False" />
|
||||
<RunWithE2 value="True" />
|
||||
<PredictiveToolsCodePage value="1252" />
|
||||
<DisableAllOutput value="False" />
|
||||
<ShowAllMacroMessages value="False" />
|
||||
<ShowConnectionStatusIsOn value="True" />
|
||||
<ShowConnectionStatusOnlyWhenRunning value="True" />
|
||||
<ZoomLevel value="0" />
|
||||
<LayoutType>Horizontal</LayoutType>
|
||||
<MetaInfo>
|
||||
<NameIsFileName value="True" />
|
||||
<Name>JoinTesting</Name>
|
||||
<Description />
|
||||
<RootToolName />
|
||||
<ToolVersion />
|
||||
<ToolInDb value="False" />
|
||||
<CategoryName />
|
||||
<SearchTags />
|
||||
<Author />
|
||||
<Company />
|
||||
<Copyright />
|
||||
<DescriptionLink actual="" displayed="" />
|
||||
<Example>
|
||||
<Description />
|
||||
<File />
|
||||
</Example>
|
||||
<WorkflowId value="b5723901-81a1-4ff2-8b26-42700d914cb2" />
|
||||
<Telemetry>
|
||||
<PreviousWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
|
||||
<OriginWorkflowId value="ac869796-4ff0-4398-9782-f8fa79f30c4a" />
|
||||
</Telemetry>
|
||||
</MetaInfo>
|
||||
<Events>
|
||||
<Enabled value="True" />
|
||||
</Events>
|
||||
</Properties>
|
||||
</AlteryxDocument>
|
||||
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,226 @@
|
|||
Left_Store_ID,Product_ID,Left_Stock_On_Hand
|
||||
15,31,4
|
||||
15,32,16
|
||||
15,33,8
|
||||
15,34,7
|
||||
16,31,14
|
||||
16,32,7
|
||||
16,33,6
|
||||
16,34,2
|
||||
16,35,6
|
||||
17,31,20
|
||||
17,32,15
|
||||
17,33,27
|
||||
17,34,11
|
||||
18,31,4
|
||||
18,32,9
|
||||
18,33,9
|
||||
18,34,8
|
||||
18,35,10
|
||||
19,31,4
|
||||
19,32,5
|
||||
19,33,0
|
||||
19,34,15
|
||||
19,35,14
|
||||
20,31,10
|
||||
20,32,9
|
||||
20,33,28
|
||||
20,34,19
|
||||
21,31,19
|
||||
21,32,3
|
||||
21,33,16
|
||||
21,34,16
|
||||
22,31,34
|
||||
22,32,38
|
||||
22,33,8
|
||||
22,34,6
|
||||
22,35,2
|
||||
23,31,19
|
||||
23,32,11
|
||||
23,33,6
|
||||
23,34,18
|
||||
23,35,4
|
||||
24,31,10
|
||||
24,32,10
|
||||
24,33,4
|
||||
24,34,17
|
||||
24,35,19
|
||||
25,31,0
|
||||
25,32,10
|
||||
25,33,4
|
||||
25,34,23
|
||||
26,31,4
|
||||
26,32,2
|
||||
26,33,2
|
||||
26,34,17
|
||||
26,35,8
|
||||
27,31,13
|
||||
27,32,6
|
||||
27,33,7
|
||||
27,34,9
|
||||
28,31,18
|
||||
28,32,3
|
||||
28,33,9
|
||||
28,34,19
|
||||
29,31,3
|
||||
29,32,7
|
||||
29,33,6
|
||||
29,34,16
|
||||
30,31,20
|
||||
30,32,13
|
||||
30,33,10
|
||||
30,34,18
|
||||
31,31,39
|
||||
31,32,12
|
||||
31,33,20
|
||||
31,34,20
|
||||
32,31,4
|
||||
32,32,8
|
||||
32,33,13
|
||||
32,34,20
|
||||
33,31,7
|
||||
33,32,15
|
||||
33,33,9
|
||||
33,34,14
|
||||
33,35,18
|
||||
34,31,30
|
||||
34,32,19
|
||||
34,33,9
|
||||
34,34,17
|
||||
34,35,20
|
||||
35,31,74
|
||||
35,32,20
|
||||
35,33,14
|
||||
35,34,9
|
||||
36,31,6
|
||||
36,32,7
|
||||
36,33,21
|
||||
36,34,2
|
||||
36,35,12
|
||||
37,31,14
|
||||
37,32,0
|
||||
37,33,10
|
||||
37,34,13
|
||||
37,35,14
|
||||
38,31,17
|
||||
38,32,20
|
||||
38,33,9
|
||||
38,34,18
|
||||
38,35,2
|
||||
39,31,15
|
||||
39,32,5
|
||||
39,33,14
|
||||
39,34,4
|
||||
40,31,5
|
||||
40,32,7
|
||||
40,33,16
|
||||
40,34,5
|
||||
41,31,18
|
||||
41,32,29
|
||||
41,33,13
|
||||
41,34,15
|
||||
41,35,10
|
||||
1,31,7
|
||||
1,32,4
|
||||
1,33,2
|
||||
1,34,0
|
||||
1,35,12
|
||||
2,31,18
|
||||
2,32,10
|
||||
2,33,11
|
||||
2,34,18
|
||||
3,31,29
|
||||
3,32,4
|
||||
3,33,4
|
||||
3,34,7
|
||||
4,31,35
|
||||
4,32,6
|
||||
4,33,2
|
||||
4,34,0
|
||||
4,35,4
|
||||
5,31,31
|
||||
5,32,10
|
||||
5,33,17
|
||||
5,34,10
|
||||
6,31,17
|
||||
6,32,7
|
||||
6,33,7
|
||||
6,34,8
|
||||
6,35,3
|
||||
7,31,15
|
||||
7,32,3
|
||||
7,33,18
|
||||
7,34,2
|
||||
7,35,17
|
||||
8,31,27
|
||||
8,32,7
|
||||
8,33,17
|
||||
8,34,18
|
||||
8,35,8
|
||||
9,31,6
|
||||
9,32,3
|
||||
9,33,9
|
||||
9,34,5
|
||||
9,35,4
|
||||
10,31,7
|
||||
10,32,13
|
||||
10,33,12
|
||||
10,34,16
|
||||
10,35,2
|
||||
11,31,20
|
||||
11,32,4
|
||||
11,33,6
|
||||
11,34,9
|
||||
12,31,13
|
||||
12,32,9
|
||||
12,33,5
|
||||
12,34,9
|
||||
12,35,9
|
||||
13,31,24
|
||||
13,32,7
|
||||
13,33,3
|
||||
13,34,3
|
||||
14,31,5
|
||||
14,32,2
|
||||
14,33,2
|
||||
14,34,8
|
||||
42,31,11
|
||||
42,32,4
|
||||
42,33,18
|
||||
42,34,34
|
||||
42,35,13
|
||||
43,31,18
|
||||
43,32,38
|
||||
43,33,5
|
||||
43,34,7
|
||||
44,31,8
|
||||
44,32,29
|
||||
44,33,0
|
||||
44,34,22
|
||||
45,31,6
|
||||
45,32,6
|
||||
45,33,7
|
||||
45,34,3
|
||||
46,31,13
|
||||
46,32,8
|
||||
46,33,11
|
||||
46,34,24
|
||||
47,31,48
|
||||
47,32,6
|
||||
47,33,13
|
||||
47,34,3
|
||||
48,31,41
|
||||
48,32,7
|
||||
48,33,0
|
||||
48,34,39
|
||||
48,35,3
|
||||
49,31,51
|
||||
49,32,11
|
||||
49,33,15
|
||||
49,34,2
|
||||
49,35,19
|
||||
50,31,18
|
||||
50,32,9
|
||||
50,33,1
|
||||
50,34,17
|
||||
50,35,8
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Product_ID,Right_Product_Name,Right_Product_Category,Right_Product_Cost,Right_Product_Price
|
||||
100,Non-product,NoCat,$1,$1
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
|
||||
1,Action Figure,Toys,$9.99,$15.99
|
||||
2,Animal Figures,Toys,$9.99,$12.99
|
||||
3,Barrel O' Slime,Art & Crafts,$1.99,$3.99
|
||||
4,Chutes & Ladders,Games,$9.99,$12.99
|
||||
5,Classic Dominoes,Games,$7.99,$9.99
|
||||
6,Colorbuds,Electronics,$6.99,$14.99
|
||||
7,Dart Gun,Sports & Outdoors,$11.99,$15.99
|
||||
8,Deck Of Cards,Games,$3.99,$6.99
|
||||
9,Dino Egg,Toys,$9.99,$10.99
|
||||
10,Dinosaur Figures,Toys,$10.99,$14.99
|
||||
11,Etch A Sketch,Art & Crafts,$10.99,$20.99
|
||||
12,Foam Disk Launcher,Sports & Outdoors,$8.99,$11.99
|
||||
13,Gamer Headphones,Electronics,$14.99,$20.99
|
||||
14,Glass Marbles,Games,$5.99,$10.99
|
||||
15,Hot Wheels 5-Pack,Toys,$3.99,$5.99
|
||||
16,Jenga,Games,$2.99,$9.99
|
||||
17,Kids Makeup Kit,Art & Crafts,$13.99,$19.99
|
||||
18,Lego Bricks,Toys,$34.99,$39.99
|
||||
19,Magic Sand,Art & Crafts,$13.99,$15.99
|
||||
20,Mini Basketball Hoop,Sports & Outdoors,$8.99,$24.99
|
||||
21,Mini Ping Pong Set,Sports & Outdoors,$6.99,$9.99
|
||||
22,Monopoly,Games,$13.99,$19.99
|
||||
23,Mr. Potatohead,Toys,$4.99,$9.99
|
||||
24,Nerf Gun,Sports & Outdoors,$14.99,$19.99
|
||||
25,PlayDoh Can,Art & Crafts,$1.99,$2.99
|
||||
26,PlayDoh Playset,Art & Crafts,$20.99,$24.99
|
||||
27,PlayDoh Toolkit,Art & Crafts,$3.99,$4.99
|
||||
28,Playfoam,Art & Crafts,$3.99,$10.99
|
||||
29,Plush Pony,Toys,$8.99,$19.99
|
||||
30,Rubik's Cube,Games,$17.99,$19.99
|
||||
100,Non-product,NoCat,$1,$1
|
||||
|
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,226 @@
|
|||
Store_ID,Product_ID,Stock_On_Hand
|
||||
1,32,4
|
||||
2,32,10
|
||||
3,32,4
|
||||
4,32,6
|
||||
5,32,10
|
||||
6,32,7
|
||||
7,32,3
|
||||
8,32,7
|
||||
9,32,3
|
||||
10,32,13
|
||||
11,32,4
|
||||
12,32,9
|
||||
13,32,7
|
||||
14,32,2
|
||||
15,32,16
|
||||
16,32,7
|
||||
17,32,15
|
||||
18,32,9
|
||||
19,32,5
|
||||
20,32,9
|
||||
21,32,3
|
||||
22,32,38
|
||||
23,32,11
|
||||
24,32,10
|
||||
25,32,10
|
||||
26,32,2
|
||||
27,32,6
|
||||
28,32,3
|
||||
29,32,7
|
||||
30,32,13
|
||||
31,32,12
|
||||
32,32,8
|
||||
33,32,15
|
||||
34,32,19
|
||||
35,32,20
|
||||
36,32,7
|
||||
37,32,0
|
||||
38,32,20
|
||||
39,32,5
|
||||
40,32,7
|
||||
41,32,29
|
||||
42,32,4
|
||||
43,32,38
|
||||
44,32,29
|
||||
45,32,6
|
||||
46,32,8
|
||||
47,32,6
|
||||
48,32,7
|
||||
49,32,11
|
||||
50,32,9
|
||||
1,31,7
|
||||
2,31,18
|
||||
3,31,29
|
||||
4,31,35
|
||||
5,31,31
|
||||
6,31,17
|
||||
7,31,15
|
||||
8,31,27
|
||||
9,31,6
|
||||
10,31,7
|
||||
11,31,20
|
||||
12,31,13
|
||||
13,31,24
|
||||
14,31,5
|
||||
15,31,4
|
||||
16,31,14
|
||||
17,31,20
|
||||
18,31,4
|
||||
19,31,4
|
||||
20,31,10
|
||||
21,31,19
|
||||
22,31,34
|
||||
23,31,19
|
||||
24,31,10
|
||||
25,31,0
|
||||
26,31,4
|
||||
27,31,13
|
||||
28,31,18
|
||||
29,31,3
|
||||
30,31,20
|
||||
31,31,39
|
||||
32,31,4
|
||||
33,31,7
|
||||
34,31,30
|
||||
35,31,74
|
||||
36,31,6
|
||||
37,31,14
|
||||
38,31,17
|
||||
39,31,15
|
||||
40,31,5
|
||||
41,31,18
|
||||
42,31,11
|
||||
43,31,18
|
||||
44,31,8
|
||||
45,31,6
|
||||
46,31,13
|
||||
47,31,48
|
||||
48,31,41
|
||||
49,31,51
|
||||
50,31,18
|
||||
1,35,12
|
||||
4,35,4
|
||||
6,35,3
|
||||
7,35,17
|
||||
8,35,8
|
||||
9,35,4
|
||||
10,35,2
|
||||
12,35,9
|
||||
16,35,6
|
||||
18,35,10
|
||||
19,35,14
|
||||
22,35,2
|
||||
23,35,4
|
||||
24,35,19
|
||||
26,35,8
|
||||
33,35,18
|
||||
34,35,20
|
||||
36,35,12
|
||||
37,35,14
|
||||
38,35,2
|
||||
41,35,10
|
||||
42,35,13
|
||||
48,35,3
|
||||
49,35,19
|
||||
50,35,8
|
||||
1,34,0
|
||||
2,34,18
|
||||
3,34,7
|
||||
4,34,0
|
||||
5,34,10
|
||||
6,34,8
|
||||
7,34,2
|
||||
8,34,18
|
||||
9,34,5
|
||||
10,34,16
|
||||
11,34,9
|
||||
12,34,9
|
||||
13,34,3
|
||||
14,34,8
|
||||
15,34,7
|
||||
16,34,2
|
||||
17,34,11
|
||||
18,34,8
|
||||
19,34,15
|
||||
20,34,19
|
||||
21,34,16
|
||||
22,34,6
|
||||
23,34,18
|
||||
24,34,17
|
||||
25,34,23
|
||||
26,34,17
|
||||
27,34,9
|
||||
28,34,19
|
||||
29,34,16
|
||||
30,34,18
|
||||
31,34,20
|
||||
32,34,20
|
||||
33,34,14
|
||||
34,34,17
|
||||
35,34,9
|
||||
36,34,2
|
||||
37,34,13
|
||||
38,34,18
|
||||
39,34,4
|
||||
40,34,5
|
||||
41,34,15
|
||||
42,34,34
|
||||
43,34,7
|
||||
44,34,22
|
||||
45,34,3
|
||||
46,34,24
|
||||
47,34,3
|
||||
48,34,39
|
||||
49,34,2
|
||||
50,34,17
|
||||
1,33,2
|
||||
2,33,11
|
||||
3,33,4
|
||||
4,33,2
|
||||
5,33,17
|
||||
6,33,7
|
||||
7,33,18
|
||||
8,33,17
|
||||
9,33,9
|
||||
10,33,12
|
||||
11,33,6
|
||||
12,33,5
|
||||
13,33,3
|
||||
14,33,2
|
||||
15,33,8
|
||||
16,33,6
|
||||
17,33,27
|
||||
18,33,9
|
||||
19,33,0
|
||||
20,33,28
|
||||
21,33,16
|
||||
22,33,8
|
||||
23,33,6
|
||||
24,33,4
|
||||
25,33,4
|
||||
26,33,2
|
||||
27,33,7
|
||||
28,33,9
|
||||
29,33,6
|
||||
30,33,10
|
||||
31,33,20
|
||||
32,33,13
|
||||
33,33,9
|
||||
34,33,9
|
||||
35,33,14
|
||||
36,33,21
|
||||
37,33,10
|
||||
38,33,9
|
||||
39,33,14
|
||||
40,33,16
|
||||
41,33,13
|
||||
42,33,18
|
||||
43,33,5
|
||||
44,33,0
|
||||
45,33,7
|
||||
46,33,11
|
||||
47,33,13
|
||||
48,33,0
|
||||
49,33,15
|
||||
50,33,1
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
|
||||
100,Non-product,NoCat,$1,$1
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
|
||||
100,Non-product,NoCat,$1,$1
|
||||
|
|
|
@ -0,0 +1,100 @@
|
|||
# 📚 Maven Toys Dataset Schema Relationship Guide
|
||||
|
||||
This document outlines the schema relationships and foreign key connections between all CSV files in this directory, suggesting how they can be joined for comprehensive data analysis.
|
||||
|
||||
## 🧩 Entity/Dimension Tables (The "Who" and "What")
|
||||
|
||||
These tables define core entities and are typically used as lookup tables.
|
||||
|
||||
1. **`stores`**: Information about the physical retail locations.
|
||||
* **Primary Key (PK):** `Store_ID`
|
||||
2. **`products`**: Master list of all items sold.
|
||||
* **Primary Key (PK):** `Product_ID`
|
||||
3. **`calendar`**: Time dimension data for the business.
|
||||
* **Primary Key (PK):** `Date` (Assuming unique dates are recorded)
|
||||
|
||||
## 📊 Fact/Snapshot Tables (The "When" and "How Much")
|
||||
|
||||
These tables record events, measurements, or snapshots in time that link the dimensions together.
|
||||
|
||||
1. **`sales`**: The core transaction log. *This is the most frequently joined table.*
|
||||
* **Foreign Keys (FKs):** `Store_ID` (references `stores`), `Product_ID` (references `products`).
|
||||
2. **`inventory`**: Snapshot of stock levels at a point in time.
|
||||
* **Composite Key/FKs:** (`Store_ID`, `Product_ID`) $\to$ Links to both `stores` and `products`.
|
||||
3. **`data_dictionary`**: Metadata describing the other fields (Not used for joins, but crucial for understanding column definitions).
|
||||
|
||||
## 🗓️ Time Dimension
|
||||
|
||||
* The **`calendar`** table provides temporal context, which can be joined with `sales` records to analyze performance around holidays or specific periods.
|
||||
|
||||
---
|
||||
|
||||
# 🔗 Relationship Map and Join Paths
|
||||
|
||||
The following sections show the explicit paths you can use for joining data in SQL or Python (Pandas/DuckDB).
|
||||
|
||||
### 1. Sales Analysis Path
|
||||
* **Goal:** Analyzing a transaction's details, location, and item description.
|
||||
* **Join Chain:** `sales` $\to$ (`stores`, `products`)
|
||||
* **Example Join:** `FROM sales s JOIN stores st ON s.Store_ID = st.Store_ID JOIN products p ON s.Product_ID = p.Product_ID;`
|
||||
|
||||
### 2. Inventory Valuation Path
|
||||
* **Goal:** Calculating the total value of current stock across all stores.
|
||||
* **Join Chain:** `inventory` $\to$ (`stores`, `products`)
|
||||
* **Example Join:** `FROM inventory i JOIN stores st ON i.Store_ID = st.Store_ID JOIN products p ON i.Product_ID = p.Product_ID;`
|
||||
|
||||
### 3. Comprehensive Performance Path (The Full Picture)
|
||||
* **Goal:** Linking sales performance to store location details and calendar dates.
|
||||
* **Join Chain:** `sales` $\to$ (`stores`, `products`, `calendar`)
|
||||
* **Notes:** You can join on the date field from both `sales` and `calendar`.
|
||||
|
||||
---
|
||||
|
||||
# 💡 Example Queries (Ready for Use)
|
||||
|
||||
These queries demonstrate how to combine the tables.
|
||||
|
||||
### 1. Total Revenue Over Time
|
||||
Calculate the total revenue generated month-by-month, showing store performance over time.
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
strftime('%Y-%m', s.Date) AS sales_month, -- Grouping by Year and Month
|
||||
st.Store_Name,
|
||||
COUNT(DISTINCT p.Product_ID) AS distinct_products_sold,
|
||||
SUM(s.Units * p.Product_Price) AS total_monthly_revenue
|
||||
FROM sales s
|
||||
JOIN stores st ON s.Store_ID = st.Store_ID
|
||||
JOIN products p ON s.Product_ID = p.Product_ID
|
||||
GROUP BY 1, 2
|
||||
ORDER BY 1 DESC, total_monthly_revenue DESC;
|
||||
```
|
||||
|
||||
### 2. Top Performing Product/Category Analysis
|
||||
Identify the best-selling categories and the top 5 specific products by units sold.
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
p.Product_Name,
|
||||
p.Product_Category,
|
||||
SUM(s.Units) AS total_units_sold
|
||||
FROM sales s
|
||||
JOIN products p ON s.Product_ID = p.Product_ID
|
||||
GROUP BY 1, 2
|
||||
ORDER BY total_units_sold DESC
|
||||
LIMIT 5;
|
||||
```
|
||||
|
||||
### 3. Low Stock Alerts (Inventory Management)
|
||||
List all stores and products where the current stock is below a specified threshold (e.g., < 50 units).
|
||||
|
||||
```sql
|
||||
SELECT
|
||||
st.Store_Name,
|
||||
p.Product_Name,
|
||||
i.Stock_On_Hand
|
||||
FROM inventory i
|
||||
JOIN stores st ON i.Store_ID = st.Store_ID
|
||||
JOIN products p ON i.Product_ID = p.Product_ID
|
||||
WHERE i.Stock_On_Hand < 50;
|
||||
```
|
||||
|
|
@ -0,0 +1,639 @@
|
|||
Date
|
||||
1/1/2022
|
||||
1/2/2022
|
||||
1/3/2022
|
||||
1/4/2022
|
||||
1/5/2022
|
||||
1/6/2022
|
||||
1/7/2022
|
||||
1/8/2022
|
||||
1/9/2022
|
||||
1/10/2022
|
||||
1/11/2022
|
||||
1/12/2022
|
||||
1/13/2022
|
||||
1/14/2022
|
||||
1/15/2022
|
||||
1/16/2022
|
||||
1/17/2022
|
||||
1/18/2022
|
||||
1/19/2022
|
||||
1/20/2022
|
||||
1/21/2022
|
||||
1/22/2022
|
||||
1/23/2022
|
||||
1/24/2022
|
||||
1/25/2022
|
||||
1/26/2022
|
||||
1/27/2022
|
||||
1/28/2022
|
||||
1/29/2022
|
||||
1/30/2022
|
||||
1/31/2022
|
||||
2/1/2022
|
||||
2/2/2022
|
||||
2/3/2022
|
||||
2/4/2022
|
||||
2/5/2022
|
||||
2/6/2022
|
||||
2/7/2022
|
||||
2/8/2022
|
||||
2/9/2022
|
||||
2/10/2022
|
||||
2/11/2022
|
||||
2/12/2022
|
||||
2/13/2022
|
||||
2/14/2022
|
||||
2/15/2022
|
||||
2/16/2022
|
||||
2/17/2022
|
||||
2/18/2022
|
||||
2/19/2022
|
||||
2/20/2022
|
||||
2/21/2022
|
||||
2/22/2022
|
||||
2/23/2022
|
||||
2/24/2022
|
||||
2/25/2022
|
||||
2/26/2022
|
||||
2/27/2022
|
||||
2/28/2022
|
||||
3/1/2022
|
||||
3/2/2022
|
||||
3/3/2022
|
||||
3/4/2022
|
||||
3/5/2022
|
||||
3/6/2022
|
||||
3/7/2022
|
||||
3/8/2022
|
||||
3/9/2022
|
||||
3/10/2022
|
||||
3/11/2022
|
||||
3/12/2022
|
||||
3/13/2022
|
||||
3/14/2022
|
||||
3/15/2022
|
||||
3/16/2022
|
||||
3/17/2022
|
||||
3/18/2022
|
||||
3/19/2022
|
||||
3/20/2022
|
||||
3/21/2022
|
||||
3/22/2022
|
||||
3/23/2022
|
||||
3/24/2022
|
||||
3/25/2022
|
||||
3/26/2022
|
||||
3/27/2022
|
||||
3/28/2022
|
||||
3/29/2022
|
||||
3/30/2022
|
||||
3/31/2022
|
||||
4/1/2022
|
||||
4/2/2022
|
||||
4/3/2022
|
||||
4/4/2022
|
||||
4/5/2022
|
||||
4/6/2022
|
||||
4/7/2022
|
||||
4/8/2022
|
||||
4/9/2022
|
||||
4/10/2022
|
||||
4/11/2022
|
||||
4/12/2022
|
||||
4/13/2022
|
||||
4/14/2022
|
||||
4/15/2022
|
||||
4/16/2022
|
||||
4/17/2022
|
||||
4/18/2022
|
||||
4/19/2022
|
||||
4/20/2022
|
||||
4/21/2022
|
||||
4/22/2022
|
||||
4/23/2022
|
||||
4/24/2022
|
||||
4/25/2022
|
||||
4/26/2022
|
||||
4/27/2022
|
||||
4/28/2022
|
||||
4/29/2022
|
||||
4/30/2022
|
||||
5/1/2022
|
||||
5/2/2022
|
||||
5/3/2022
|
||||
5/4/2022
|
||||
5/5/2022
|
||||
5/6/2022
|
||||
5/7/2022
|
||||
5/8/2022
|
||||
5/9/2022
|
||||
5/10/2022
|
||||
5/11/2022
|
||||
5/12/2022
|
||||
5/13/2022
|
||||
5/14/2022
|
||||
5/15/2022
|
||||
5/16/2022
|
||||
5/17/2022
|
||||
5/18/2022
|
||||
5/19/2022
|
||||
5/20/2022
|
||||
5/21/2022
|
||||
5/22/2022
|
||||
5/23/2022
|
||||
5/24/2022
|
||||
5/25/2022
|
||||
5/26/2022
|
||||
5/27/2022
|
||||
5/28/2022
|
||||
5/29/2022
|
||||
5/30/2022
|
||||
5/31/2022
|
||||
6/1/2022
|
||||
6/2/2022
|
||||
6/3/2022
|
||||
6/4/2022
|
||||
6/5/2022
|
||||
6/6/2022
|
||||
6/7/2022
|
||||
6/8/2022
|
||||
6/9/2022
|
||||
6/10/2022
|
||||
6/11/2022
|
||||
6/12/2022
|
||||
6/13/2022
|
||||
6/14/2022
|
||||
6/15/2022
|
||||
6/16/2022
|
||||
6/17/2022
|
||||
6/18/2022
|
||||
6/19/2022
|
||||
6/20/2022
|
||||
6/21/2022
|
||||
6/22/2022
|
||||
6/23/2022
|
||||
6/24/2022
|
||||
6/25/2022
|
||||
6/26/2022
|
||||
6/27/2022
|
||||
6/28/2022
|
||||
6/29/2022
|
||||
6/30/2022
|
||||
7/1/2022
|
||||
7/2/2022
|
||||
7/3/2022
|
||||
7/4/2022
|
||||
7/5/2022
|
||||
7/6/2022
|
||||
7/7/2022
|
||||
7/8/2022
|
||||
7/9/2022
|
||||
7/10/2022
|
||||
7/11/2022
|
||||
7/12/2022
|
||||
7/13/2022
|
||||
7/14/2022
|
||||
7/15/2022
|
||||
7/16/2022
|
||||
7/17/2022
|
||||
7/18/2022
|
||||
7/19/2022
|
||||
7/20/2022
|
||||
7/21/2022
|
||||
7/22/2022
|
||||
7/23/2022
|
||||
7/24/2022
|
||||
7/25/2022
|
||||
7/26/2022
|
||||
7/27/2022
|
||||
7/28/2022
|
||||
7/29/2022
|
||||
7/30/2022
|
||||
7/31/2022
|
||||
8/1/2022
|
||||
8/2/2022
|
||||
8/3/2022
|
||||
8/4/2022
|
||||
8/5/2022
|
||||
8/6/2022
|
||||
8/7/2022
|
||||
8/8/2022
|
||||
8/9/2022
|
||||
8/10/2022
|
||||
8/11/2022
|
||||
8/12/2022
|
||||
8/13/2022
|
||||
8/14/2022
|
||||
8/15/2022
|
||||
8/16/2022
|
||||
8/17/2022
|
||||
8/18/2022
|
||||
8/19/2022
|
||||
8/20/2022
|
||||
8/21/2022
|
||||
8/22/2022
|
||||
8/23/2022
|
||||
8/24/2022
|
||||
8/25/2022
|
||||
8/26/2022
|
||||
8/27/2022
|
||||
8/28/2022
|
||||
8/29/2022
|
||||
8/30/2022
|
||||
8/31/2022
|
||||
9/1/2022
|
||||
9/2/2022
|
||||
9/3/2022
|
||||
9/4/2022
|
||||
9/5/2022
|
||||
9/6/2022
|
||||
9/7/2022
|
||||
9/8/2022
|
||||
9/9/2022
|
||||
9/10/2022
|
||||
9/11/2022
|
||||
9/12/2022
|
||||
9/13/2022
|
||||
9/14/2022
|
||||
9/15/2022
|
||||
9/16/2022
|
||||
9/17/2022
|
||||
9/18/2022
|
||||
9/19/2022
|
||||
9/20/2022
|
||||
9/21/2022
|
||||
9/22/2022
|
||||
9/23/2022
|
||||
9/24/2022
|
||||
9/25/2022
|
||||
9/26/2022
|
||||
9/27/2022
|
||||
9/28/2022
|
||||
9/29/2022
|
||||
9/30/2022
|
||||
10/1/2022
|
||||
10/2/2022
|
||||
10/3/2022
|
||||
10/4/2022
|
||||
10/5/2022
|
||||
10/6/2022
|
||||
10/7/2022
|
||||
10/8/2022
|
||||
10/9/2022
|
||||
10/10/2022
|
||||
10/11/2022
|
||||
10/12/2022
|
||||
10/13/2022
|
||||
10/14/2022
|
||||
10/15/2022
|
||||
10/16/2022
|
||||
10/17/2022
|
||||
10/18/2022
|
||||
10/19/2022
|
||||
10/20/2022
|
||||
10/21/2022
|
||||
10/22/2022
|
||||
10/23/2022
|
||||
10/24/2022
|
||||
10/25/2022
|
||||
10/26/2022
|
||||
10/27/2022
|
||||
10/28/2022
|
||||
10/29/2022
|
||||
10/30/2022
|
||||
10/31/2022
|
||||
11/1/2022
|
||||
11/2/2022
|
||||
11/3/2022
|
||||
11/4/2022
|
||||
11/5/2022
|
||||
11/6/2022
|
||||
11/7/2022
|
||||
11/8/2022
|
||||
11/9/2022
|
||||
11/10/2022
|
||||
11/11/2022
|
||||
11/12/2022
|
||||
11/13/2022
|
||||
11/14/2022
|
||||
11/15/2022
|
||||
11/16/2022
|
||||
11/17/2022
|
||||
11/18/2022
|
||||
11/19/2022
|
||||
11/20/2022
|
||||
11/21/2022
|
||||
11/22/2022
|
||||
11/23/2022
|
||||
11/24/2022
|
||||
11/25/2022
|
||||
11/26/2022
|
||||
11/27/2022
|
||||
11/28/2022
|
||||
11/29/2022
|
||||
11/30/2022
|
||||
12/1/2022
|
||||
12/2/2022
|
||||
12/3/2022
|
||||
12/4/2022
|
||||
12/5/2022
|
||||
12/6/2022
|
||||
12/7/2022
|
||||
12/8/2022
|
||||
12/9/2022
|
||||
12/10/2022
|
||||
12/11/2022
|
||||
12/12/2022
|
||||
12/13/2022
|
||||
12/14/2022
|
||||
12/15/2022
|
||||
12/16/2022
|
||||
12/17/2022
|
||||
12/18/2022
|
||||
12/19/2022
|
||||
12/20/2022
|
||||
12/21/2022
|
||||
12/22/2022
|
||||
12/23/2022
|
||||
12/24/2022
|
||||
12/25/2022
|
||||
12/26/2022
|
||||
12/27/2022
|
||||
12/28/2022
|
||||
12/29/2022
|
||||
12/30/2022
|
||||
12/31/2022
|
||||
1/1/2023
|
||||
1/2/2023
|
||||
1/3/2023
|
||||
1/4/2023
|
||||
1/5/2023
|
||||
1/6/2023
|
||||
1/7/2023
|
||||
1/8/2023
|
||||
1/9/2023
|
||||
1/10/2023
|
||||
1/11/2023
|
||||
1/12/2023
|
||||
1/13/2023
|
||||
1/14/2023
|
||||
1/15/2023
|
||||
1/16/2023
|
||||
1/17/2023
|
||||
1/18/2023
|
||||
1/19/2023
|
||||
1/20/2023
|
||||
1/21/2023
|
||||
1/22/2023
|
||||
1/23/2023
|
||||
1/24/2023
|
||||
1/25/2023
|
||||
1/26/2023
|
||||
1/27/2023
|
||||
1/28/2023
|
||||
1/29/2023
|
||||
1/30/2023
|
||||
1/31/2023
|
||||
2/1/2023
|
||||
2/2/2023
|
||||
2/3/2023
|
||||
2/4/2023
|
||||
2/5/2023
|
||||
2/6/2023
|
||||
2/7/2023
|
||||
2/8/2023
|
||||
2/9/2023
|
||||
2/10/2023
|
||||
2/11/2023
|
||||
2/12/2023
|
||||
2/13/2023
|
||||
2/14/2023
|
||||
2/15/2023
|
||||
2/16/2023
|
||||
2/17/2023
|
||||
2/18/2023
|
||||
2/19/2023
|
||||
2/20/2023
|
||||
2/21/2023
|
||||
2/22/2023
|
||||
2/23/2023
|
||||
2/24/2023
|
||||
2/25/2023
|
||||
2/26/2023
|
||||
2/27/2023
|
||||
2/28/2023
|
||||
3/1/2023
|
||||
3/2/2023
|
||||
3/3/2023
|
||||
3/4/2023
|
||||
3/5/2023
|
||||
3/6/2023
|
||||
3/7/2023
|
||||
3/8/2023
|
||||
3/9/2023
|
||||
3/10/2023
|
||||
3/11/2023
|
||||
3/12/2023
|
||||
3/13/2023
|
||||
3/14/2023
|
||||
3/15/2023
|
||||
3/16/2023
|
||||
3/17/2023
|
||||
3/18/2023
|
||||
3/19/2023
|
||||
3/20/2023
|
||||
3/21/2023
|
||||
3/22/2023
|
||||
3/23/2023
|
||||
3/24/2023
|
||||
3/25/2023
|
||||
3/26/2023
|
||||
3/27/2023
|
||||
3/28/2023
|
||||
3/29/2023
|
||||
3/30/2023
|
||||
3/31/2023
|
||||
4/1/2023
|
||||
4/2/2023
|
||||
4/3/2023
|
||||
4/4/2023
|
||||
4/5/2023
|
||||
4/6/2023
|
||||
4/7/2023
|
||||
4/8/2023
|
||||
4/9/2023
|
||||
4/10/2023
|
||||
4/11/2023
|
||||
4/12/2023
|
||||
4/13/2023
|
||||
4/14/2023
|
||||
4/15/2023
|
||||
4/16/2023
|
||||
4/17/2023
|
||||
4/18/2023
|
||||
4/19/2023
|
||||
4/20/2023
|
||||
4/21/2023
|
||||
4/22/2023
|
||||
4/23/2023
|
||||
4/24/2023
|
||||
4/25/2023
|
||||
4/26/2023
|
||||
4/27/2023
|
||||
4/28/2023
|
||||
4/29/2023
|
||||
4/30/2023
|
||||
5/1/2023
|
||||
5/2/2023
|
||||
5/3/2023
|
||||
5/4/2023
|
||||
5/5/2023
|
||||
5/6/2023
|
||||
5/7/2023
|
||||
5/8/2023
|
||||
5/9/2023
|
||||
5/10/2023
|
||||
5/11/2023
|
||||
5/12/2023
|
||||
5/13/2023
|
||||
5/14/2023
|
||||
5/15/2023
|
||||
5/16/2023
|
||||
5/17/2023
|
||||
5/18/2023
|
||||
5/19/2023
|
||||
5/20/2023
|
||||
5/21/2023
|
||||
5/22/2023
|
||||
5/23/2023
|
||||
5/24/2023
|
||||
5/25/2023
|
||||
5/26/2023
|
||||
5/27/2023
|
||||
5/28/2023
|
||||
5/29/2023
|
||||
5/30/2023
|
||||
5/31/2023
|
||||
6/1/2023
|
||||
6/2/2023
|
||||
6/3/2023
|
||||
6/4/2023
|
||||
6/5/2023
|
||||
6/6/2023
|
||||
6/7/2023
|
||||
6/8/2023
|
||||
6/9/2023
|
||||
6/10/2023
|
||||
6/11/2023
|
||||
6/12/2023
|
||||
6/13/2023
|
||||
6/14/2023
|
||||
6/15/2023
|
||||
6/16/2023
|
||||
6/17/2023
|
||||
6/18/2023
|
||||
6/19/2023
|
||||
6/20/2023
|
||||
6/21/2023
|
||||
6/22/2023
|
||||
6/23/2023
|
||||
6/24/2023
|
||||
6/25/2023
|
||||
6/26/2023
|
||||
6/27/2023
|
||||
6/28/2023
|
||||
6/29/2023
|
||||
6/30/2023
|
||||
7/1/2023
|
||||
7/2/2023
|
||||
7/3/2023
|
||||
7/4/2023
|
||||
7/5/2023
|
||||
7/6/2023
|
||||
7/7/2023
|
||||
7/8/2023
|
||||
7/9/2023
|
||||
7/10/2023
|
||||
7/11/2023
|
||||
7/12/2023
|
||||
7/13/2023
|
||||
7/14/2023
|
||||
7/15/2023
|
||||
7/16/2023
|
||||
7/17/2023
|
||||
7/18/2023
|
||||
7/19/2023
|
||||
7/20/2023
|
||||
7/21/2023
|
||||
7/22/2023
|
||||
7/23/2023
|
||||
7/24/2023
|
||||
7/25/2023
|
||||
7/26/2023
|
||||
7/27/2023
|
||||
7/28/2023
|
||||
7/29/2023
|
||||
7/30/2023
|
||||
7/31/2023
|
||||
8/1/2023
|
||||
8/2/2023
|
||||
8/3/2023
|
||||
8/4/2023
|
||||
8/5/2023
|
||||
8/6/2023
|
||||
8/7/2023
|
||||
8/8/2023
|
||||
8/9/2023
|
||||
8/10/2023
|
||||
8/11/2023
|
||||
8/12/2023
|
||||
8/13/2023
|
||||
8/14/2023
|
||||
8/15/2023
|
||||
8/16/2023
|
||||
8/17/2023
|
||||
8/18/2023
|
||||
8/19/2023
|
||||
8/20/2023
|
||||
8/21/2023
|
||||
8/22/2023
|
||||
8/23/2023
|
||||
8/24/2023
|
||||
8/25/2023
|
||||
8/26/2023
|
||||
8/27/2023
|
||||
8/28/2023
|
||||
8/29/2023
|
||||
8/30/2023
|
||||
8/31/2023
|
||||
9/1/2023
|
||||
9/2/2023
|
||||
9/3/2023
|
||||
9/4/2023
|
||||
9/5/2023
|
||||
9/6/2023
|
||||
9/7/2023
|
||||
9/8/2023
|
||||
9/9/2023
|
||||
9/10/2023
|
||||
9/11/2023
|
||||
9/12/2023
|
||||
9/13/2023
|
||||
9/14/2023
|
||||
9/15/2023
|
||||
9/16/2023
|
||||
9/17/2023
|
||||
9/18/2023
|
||||
9/19/2023
|
||||
9/20/2023
|
||||
9/21/2023
|
||||
9/22/2023
|
||||
9/23/2023
|
||||
9/24/2023
|
||||
9/25/2023
|
||||
9/26/2023
|
||||
9/27/2023
|
||||
9/28/2023
|
||||
9/29/2023
|
||||
9/30/2023
|
||||
|
|
|
@ -0,0 +1,20 @@
|
|||
Table,Field,Description
|
||||
Products,Product_ID,Product ID
|
||||
Products,Product_Name,Product name
|
||||
Products,Product_Category,Product Category
|
||||
Products,Product_Cost,Product cost ($USD)
|
||||
Products,Product_Price,Product retail price ($USD)
|
||||
Inventory,Store_ID,Store ID
|
||||
Inventory,Product_ID,Product ID
|
||||
Inventory,Stock_On_Hand,Stock quantity of the product in the store (inventory)
|
||||
Stores,Store_ID,Store ID
|
||||
Stores,Store_Name,Store name
|
||||
Stores,Store_City,City in Mexico where the store is located
|
||||
Stores,Store_Location,Location in the city where the store is located
|
||||
Stores,Store_Open_Date,Date when the store was opened
|
||||
Sales,Sale_ID,Sale ID
|
||||
Sales,Date,Date of the transaction
|
||||
Sales,Store_ID,Store ID
|
||||
Sales,Product_ID,Product ID
|
||||
Sales,Units,Units sold
|
||||
Calendar,Date,Calendar date
|
||||
|
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,36 @@
|
|||
Product_ID,Product_Name,Product_Category,Product_Cost,Product_Price
|
||||
1,Action Figure,Toys,$9.99 ,$15.99
|
||||
2,Animal Figures,Toys,$9.99 ,$12.99
|
||||
3,Barrel O' Slime,Art & Crafts,$1.99 ,$3.99
|
||||
4,Chutes & Ladders,Games,$9.99 ,$12.99
|
||||
5,Classic Dominoes,Games,$7.99 ,$9.99
|
||||
6,Colorbuds,Electronics,$6.99 ,$14.99
|
||||
7,Dart Gun,Sports & Outdoors,$11.99 ,$15.99
|
||||
8,Deck Of Cards,Games,$3.99 ,$6.99
|
||||
9,Dino Egg,Toys,$9.99 ,$10.99
|
||||
10,Dinosaur Figures,Toys,$10.99 ,$14.99
|
||||
11,Etch A Sketch,Art & Crafts,$10.99 ,$20.99
|
||||
12,Foam Disk Launcher,Sports & Outdoors,$8.99 ,$11.99
|
||||
13,Gamer Headphones,Electronics,$14.99 ,$20.99
|
||||
14,Glass Marbles,Games,$5.99 ,$10.99
|
||||
15,Hot Wheels 5-Pack,Toys,$3.99 ,$5.99
|
||||
16,Jenga,Games,$2.99 ,$9.99
|
||||
17,Kids Makeup Kit,Art & Crafts,$13.99 ,$19.99
|
||||
18,Lego Bricks,Toys,$34.99 ,$39.99
|
||||
19,Magic Sand,Art & Crafts,$13.99 ,$15.99
|
||||
20,Mini Basketball Hoop,Sports & Outdoors,$8.99 ,$24.99
|
||||
21,Mini Ping Pong Set,Sports & Outdoors,$6.99 ,$9.99
|
||||
22,Monopoly,Games,$13.99 ,$19.99
|
||||
23,Mr. Potatohead,Toys,$4.99 ,$9.99
|
||||
24,Nerf Gun,Sports & Outdoors,$14.99 ,$19.99
|
||||
25,PlayDoh Can,Art & Crafts,$1.99 ,$2.99
|
||||
26,PlayDoh Playset,Art & Crafts,$20.99 ,$24.99
|
||||
27,PlayDoh Toolkit,Art & Crafts,$3.99 ,$4.99
|
||||
28,Playfoam,Art & Crafts,$3.99 ,$10.99
|
||||
29,Plush Pony,Toys,$8.99 ,$19.99
|
||||
30,Rubik's Cube,Games,$17.99 ,$19.99
|
||||
31,Splash Balls,Sports & Outdoors,$7.99 ,$8.99
|
||||
32,Supersoaker Water Gun,Sports & Outdoors,$11.99 ,$14.99
|
||||
33,Teddy Bear,Toys,$10.99 ,$12.99
|
||||
34,Toy Robot,Electronics,$20.99 ,$25.99
|
||||
35,Uno Card Game,Games,$3.99 ,$7.99
|
||||
|
File diff suppressed because it is too large
Load Diff
|
|
@ -0,0 +1,51 @@
|
|||
Store_ID,Store_Name,Store_City,Store_Location,Store_Open_Date
|
||||
1,Maven Toys Guadalajara 1,Guadalajara,Residential,1992-09-18
|
||||
2,Maven Toys Monterrey 1,Monterrey,Residential,1995-04-27
|
||||
3,Maven Toys Guadalajara 2,Guadalajara,Commercial,1999-12-27
|
||||
4,Maven Toys Saltillo 1,Saltillo,Downtown,2000-01-01
|
||||
5,Maven Toys La Paz 1,La Paz,Downtown,2001-05-31
|
||||
6,Maven Toys Mexicali 1,Mexicali,Commercial,2003-12-13
|
||||
7,Maven Toys Monterrey 2,Monterrey,Downtown,2003-12-25
|
||||
8,Maven Toys Pachuca 1,Pachuca,Downtown,2004-10-14
|
||||
9,Maven Toys Ciudad de Mexico 1,Cuidad de Mexico,Downtown,2004-10-15
|
||||
10,Maven Toys Campeche 1,Campeche,Downtown,2005-01-14
|
||||
11,Maven Toys Cuernavaca 1,Cuernavaca,Downtown,2005-04-19
|
||||
12,Maven Toys Chetumal 1,Chetumal,Downtown,2006-05-05
|
||||
13,Maven Toys Mexicali 2,Mexicali,Downtown,2006-08-30
|
||||
14,Maven Toys Guanajuato 1,Guanajuato,Downtown,2007-01-31
|
||||
15,Maven Toys Tuxtla Gutierrez 1,Tuxtla Gutierrez,Downtown,2007-03-05
|
||||
16,Maven Toys San Luis Potosi 1,San Luis Potosi,Downtown,2007-05-19
|
||||
17,Maven Toys Toluca 1,Toluca,Downtown,2007-12-09
|
||||
18,Maven Toys Merida 1,Merida,Downtown,2008-08-22
|
||||
19,Maven Toys Puebla 1,Puebla,Commercial,2008-12-16
|
||||
20,Maven Toys Zacatecas 1,Zacatecas,Downtown,2009-05-29
|
||||
21,Maven Toys Santiago 1,Santiago,Downtown,2009-11-23
|
||||
22,Maven Toys Guanajuato 2,Guanajuato,Commercial,2010-03-29
|
||||
23,Maven Toys Chihuahua 1,Chihuahua,Commercial,2010-06-12
|
||||
24,Maven Toys Aguascalientes 1,Aguascalientes,Downtown,2010-07-31
|
||||
25,Maven Toys Ciudad Victoria 1,Ciudad Victoria,Downtown,2010-09-08
|
||||
26,Maven Toys Campeche 2,Campeche,Commercial,2010-09-15
|
||||
27,Maven Toys Oaxaca 1,Oaxaca,Downtown,2010-10-02
|
||||
28,Maven Toys Puebla 2,Puebla,Downtown,2011-04-01
|
||||
29,Maven Toys Xalapa 1,Xalapa,Commercial,2011-06-21
|
||||
30,Maven Toys Guadalajara 3,Guadalajara,Airport,2011-10-20
|
||||
31,Maven Toys Ciudad de Mexico 2,Cuidad de Mexico,Airport,2012-05-04
|
||||
32,Maven Toys Hermosillo 1,Hermosillo,Residential,2012-08-31
|
||||
33,Maven Toys Monterrey 3,Monterrey,Airport,2013-03-17
|
||||
34,Maven Toys Villahermosa 1,Villahermosa,Downtown,2013-06-07
|
||||
35,Maven Toys Chilpancingo 1,Chilpancingo,Downtown,2013-06-11
|
||||
36,Maven Toys Morelia 1,Morelia,Downtown,2013-07-01
|
||||
37,Maven Toys Ciudad de Mexico 3,Cuidad de Mexico,Residential,2013-11-28
|
||||
38,Maven Toys Chihuahua 2,Chihuahua,Downtown,2014-03-18
|
||||
39,Maven Toys Xalapa 2,Xalapa,Downtown,2014-04-21
|
||||
40,Maven Toys Toluca 2,Toluca,Commercial,2014-05-27
|
||||
41,Maven Toys Hermosillo 2,Hermosillo,Downtown,2014-06-01
|
||||
42,Maven Toys Hermosillo 3,Hermosillo,Commercial,2014-06-27
|
||||
43,Maven Toys Durango 1,Durango,Downtown,2014-06-30
|
||||
44,Maven Toys Puebla 3,Puebla,Residential,2014-12-27
|
||||
45,Maven Toys Ciudad de Mexico 4,Cuidad de Mexico,Commercial,2015-06-21
|
||||
46,Maven Toys Guadalajara 4,Guadalajara,Downtown,2015-10-31
|
||||
47,Maven Toys Monterrey 4,Monterrey,Commercial,2015-11-21
|
||||
48,Maven Toys Saltillo 2,Saltillo,Commercial,2016-03-23
|
||||
49,Maven Toys Culiacan 1,Culiacan,Downtown,2016-05-10
|
||||
50,Maven Toys Guanajuato 3,Guanajuato,Residential,2016-05-18
|
||||
|
Binary file not shown.
|
|
@ -0,0 +1,2 @@
|
|||
"""Alteryx workflow runner — Python-native .yxmd executor."""
|
||||
__version__ = "0.1.0"
|
||||
|
|
@ -0,0 +1,13 @@
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# Add the package root to sys.path so imports resolve correctly when run with
|
||||
# `python -m alteryx_runner` from the project root.
|
||||
_pkg_dir = Path(__file__).parent # alteryx_runner/
|
||||
if str(_pkg_dir) not in sys.path:
|
||||
sys.path.insert(0, str(_pkg_dir))
|
||||
|
||||
from cli import main
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,94 @@
|
|||
"""CLI entry point: python -m alteryx_runner run workflow.yxmd [options]"""
|
||||
from __future__ import annotations
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import click
|
||||
import polars as pl
|
||||
|
||||
|
||||
@click.group()
|
||||
def main():
|
||||
"""Alteryx workflow runner — execute .yxmd files without Alteryx."""
|
||||
|
||||
|
||||
@main.command()
|
||||
@click.argument("workflow", type=click.Path(exists=True, path_type=Path))
|
||||
@click.option("--output-dir", default=None, type=click.Path(path_type=Path),
|
||||
help="Write output files to this directory.")
|
||||
@click.option("--param", multiple=True, metavar="KEY=VALUE",
|
||||
help="Set workflow constant (repeatable).")
|
||||
@click.option("--verbose", is_flag=True, default=False,
|
||||
help="Print Browse results and execution log.")
|
||||
@click.option("--dry-run", is_flag=True, default=False,
|
||||
help="Parse and validate only; do not execute.")
|
||||
@click.option("--format", "fmt",
|
||||
type=click.Choice(["json", "csv", "parquet"]), default="csv",
|
||||
help="Default output format for Browse nodes.")
|
||||
def run(
|
||||
workflow: Path,
|
||||
output_dir: Path | None,
|
||||
param: tuple[str, ...],
|
||||
verbose: bool,
|
||||
dry_run: bool,
|
||||
fmt: str,
|
||||
) -> None:
|
||||
"""Execute WORKFLOW (.yxmd file)."""
|
||||
# Import here so CLI loads fast even if deps are missing
|
||||
from engine.parser import parse_workflow
|
||||
from engine.executor import execute
|
||||
from engine.context import RunContext
|
||||
|
||||
params: dict[str, str] = {}
|
||||
for p in param:
|
||||
if "=" in p:
|
||||
k, v = p.split("=", 1)
|
||||
params[k.strip()] = v.strip()
|
||||
else:
|
||||
click.echo(f"Warning: --param {p!r} ignored (no '=' found)", err=True)
|
||||
|
||||
click.echo(f"Parsing {workflow} …")
|
||||
try:
|
||||
graph = parse_workflow(str(workflow))
|
||||
except Exception as e:
|
||||
click.echo(f"Parse error: {e}", err=True)
|
||||
sys.exit(1)
|
||||
|
||||
click.echo(
|
||||
f" {len(graph.nodes)} nodes, {len(graph.connections)} connections"
|
||||
)
|
||||
|
||||
if dry_run:
|
||||
click.echo("Dry run complete — no execution.")
|
||||
return
|
||||
|
||||
ctx = RunContext(
|
||||
workflow_dir=str(workflow.parent),
|
||||
verbose=verbose,
|
||||
output_dir=str(output_dir) if output_dir else None,
|
||||
params=params,
|
||||
)
|
||||
|
||||
click.echo("Executing …")
|
||||
try:
|
||||
outputs = execute(graph, ctx)
|
||||
except Exception as e:
|
||||
click.echo(f"Execution error: {e}", err=True)
|
||||
if verbose:
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.exit(1)
|
||||
|
||||
n_frames = sum(1 for df in outputs.values() if isinstance(df, pl.DataFrame) and len(df) > 0)
|
||||
click.echo(f"Done. {n_frames} non-empty output frames produced.")
|
||||
|
||||
|
||||
@main.command("list-tools")
|
||||
def list_tools() -> None:
|
||||
"""List all registered tool Plugin strings."""
|
||||
from tools import _REGISTRY
|
||||
for plugin, cls in sorted(_REGISTRY.items()):
|
||||
click.echo(f" {plugin:<70} → {cls.__name__}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
|
@ -0,0 +1,10 @@
|
|||
from .parser import parse_workflow
|
||||
from .executor import execute
|
||||
from .context import RunContext
|
||||
from .graph import WorkflowGraph, NodeDef, ConnectionDef, FieldDef
|
||||
from .type_mapper import TypeMapper
|
||||
|
||||
__all__ = [
|
||||
"parse_workflow", "execute", "RunContext",
|
||||
"WorkflowGraph", "NodeDef", "ConnectionDef", "FieldDef", "TypeMapper",
|
||||
]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,52 @@
|
|||
from __future__ import annotations
|
||||
import tempfile
|
||||
from pathlib import Path
|
||||
import duckdb
|
||||
|
||||
from .type_mapper import TypeMapper
|
||||
from expression.transpiler import ExpressionTranspiler
|
||||
|
||||
|
||||
class RunContext:
|
||||
def __init__(
|
||||
self,
|
||||
workflow_dir: str,
|
||||
verbose: bool = False,
|
||||
output_dir: str | None = None,
|
||||
params: dict | None = None,
|
||||
):
|
||||
self.workflow_dir = Path(workflow_dir)
|
||||
self.verbose = verbose
|
||||
self.output_dir: Path | None = Path(output_dir) if output_dir else None
|
||||
self.duckdb_con = duckdb.connect(":memory:")
|
||||
self.temp_dir = Path(tempfile.mkdtemp(prefix="alteryx_runner_"))
|
||||
self.type_mapper = TypeMapper()
|
||||
self.transpiler = ExpressionTranspiler(self.duckdb_con)
|
||||
self.constants: dict = params or {}
|
||||
|
||||
def resolve_path(self, path: str) -> Path:
|
||||
# Normalise Windows backslashes so relative segments like .. work on
|
||||
# POSIX platforms (workflow XMLs are authored on Windows).
|
||||
path = path.replace("\\", "/")
|
||||
path = path.replace("%temp%", str(self.temp_dir) + "/")
|
||||
path = path.replace("%Desktop%", str(Path.home() / "Desktop") + "/")
|
||||
# Substitute workflow constants
|
||||
for k, v in self.constants.items():
|
||||
path = path.replace(f"%{k}%", v)
|
||||
p = Path(path)
|
||||
if not p.is_absolute():
|
||||
p = self.workflow_dir / p
|
||||
# If output_dir override active, remap file-write destinations
|
||||
return p
|
||||
|
||||
def resolve_output_path(self, path: str) -> Path:
|
||||
p = self.resolve_path(path)
|
||||
if self.output_dir is not None:
|
||||
return self.output_dir / p.name
|
||||
return p
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
self.duckdb_con.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
|
@ -0,0 +1,95 @@
|
|||
from __future__ import annotations
|
||||
from collections import defaultdict, deque
|
||||
import polars as pl
|
||||
|
||||
from .graph import WorkflowGraph, ConnectionDef
|
||||
from .context import RunContext
|
||||
from tools import get_tool_class
|
||||
|
||||
|
||||
def execute(graph: WorkflowGraph, ctx: RunContext) -> dict[tuple, pl.DataFrame]:
|
||||
"""Execute a WorkflowGraph in topological (BFS) order."""
|
||||
in_degree: dict[int, int] = defaultdict(int)
|
||||
successors: dict[int, list[ConnectionDef]] = defaultdict(list)
|
||||
predecessors: dict[int, list[ConnectionDef]] = defaultdict(list)
|
||||
|
||||
for c in graph.connections:
|
||||
in_degree[c.dest_id] += 1
|
||||
successors[c.origin_id].append(c)
|
||||
predecessors[c.dest_id].append(c)
|
||||
|
||||
for tid in graph.nodes:
|
||||
if tid not in in_degree:
|
||||
in_degree[tid] = 0
|
||||
|
||||
# (tool_id, anchor) → DataFrame
|
||||
outputs: dict[tuple[int, str], pl.DataFrame] = {}
|
||||
|
||||
queue: deque[int] = deque(
|
||||
tid for tid, deg in in_degree.items() if deg == 0
|
||||
)
|
||||
|
||||
while queue:
|
||||
tid = queue.popleft()
|
||||
node = graph.nodes[tid]
|
||||
tool_cls = get_tool_class(node.plugin)
|
||||
|
||||
if tool_cls is None:
|
||||
if ctx.verbose:
|
||||
print(f"[SKIP] ToolID={tid} plugin={node.plugin!r} (unsupported)")
|
||||
_passthrough(tid, predecessors, outputs, successors, in_degree, queue)
|
||||
continue
|
||||
|
||||
tool = tool_cls(node, ctx)
|
||||
|
||||
inputs: dict[str, pl.DataFrame] = {}
|
||||
# Track duplicate dest_anchors to handle multi-input tools like Union
|
||||
anchor_counts: dict[str, int] = defaultdict(int)
|
||||
for c in predecessors[tid]:
|
||||
anchor_counts[c.dest_anchor] += 1
|
||||
|
||||
for c in predecessors[tid]:
|
||||
df = outputs.get((c.origin_id, c.origin_anchor))
|
||||
if df is not None:
|
||||
key = c.dest_anchor
|
||||
# If multiple connections share the same dest_anchor,
|
||||
# use the connection name (e.g., '#1', '#2') as the key
|
||||
if anchor_counts[c.dest_anchor] > 1 and c.name:
|
||||
key = c.name
|
||||
inputs[key] = df
|
||||
|
||||
if ctx.verbose:
|
||||
print(f"[RUN ] ToolID={tid} plugin={node.plugin!r}")
|
||||
|
||||
result = tool.execute(inputs)
|
||||
|
||||
for anchor, df in result.items():
|
||||
outputs[(tid, anchor)] = df
|
||||
|
||||
for c in successors[tid]:
|
||||
in_degree[c.dest_id] -= 1
|
||||
if in_degree[c.dest_id] == 0:
|
||||
queue.append(c.dest_id)
|
||||
|
||||
return outputs
|
||||
|
||||
|
||||
def _passthrough(
|
||||
tid: int,
|
||||
predecessors: dict[int, list[ConnectionDef]],
|
||||
outputs: dict[tuple[int, str], pl.DataFrame],
|
||||
successors: dict[int, list[ConnectionDef]],
|
||||
in_degree: dict[int, int],
|
||||
queue: deque[int],
|
||||
) -> None:
|
||||
"""Propagate a single upstream output through a no-op node."""
|
||||
preds = predecessors.get(tid, [])
|
||||
df = pl.DataFrame()
|
||||
if preds:
|
||||
first = preds[0]
|
||||
df = outputs.get((first.origin_id, first.origin_anchor), pl.DataFrame())
|
||||
outputs[(tid, "Output")] = df
|
||||
for c in successors.get(tid, []):
|
||||
in_degree[c.dest_id] -= 1
|
||||
if in_degree[c.dest_id] == 0:
|
||||
queue.append(c.dest_id)
|
||||
|
|
@ -0,0 +1,38 @@
|
|||
from __future__ import annotations
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Dict, List, Optional
|
||||
import xml.etree.ElementTree as ET
|
||||
|
||||
|
||||
@dataclass
|
||||
class FieldDef:
|
||||
name: str
|
||||
type: str
|
||||
size: Optional[int] = None
|
||||
source: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class NodeDef:
|
||||
tool_id: int
|
||||
plugin: str
|
||||
config: Optional[ET.Element]
|
||||
output_schema: List[FieldDef] = field(default_factory=list)
|
||||
position: tuple = (0, 0)
|
||||
|
||||
|
||||
@dataclass
|
||||
class ConnectionDef:
|
||||
origin_id: int
|
||||
origin_anchor: str
|
||||
dest_id: int
|
||||
dest_anchor: str
|
||||
name: Optional[str] = None
|
||||
wireless: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkflowGraph:
|
||||
nodes: Dict[int, NodeDef]
|
||||
connections: List[ConnectionDef]
|
||||
properties: Optional[ET.Element]
|
||||
|
|
@ -0,0 +1,82 @@
|
|||
from __future__ import annotations
|
||||
import xml.etree.ElementTree as ET
|
||||
from typing import Dict, List, Optional
|
||||
from .graph import FieldDef, NodeDef, ConnectionDef, WorkflowGraph
|
||||
|
||||
|
||||
def parse_workflow(path: str) -> WorkflowGraph:
|
||||
"""Parse a .yxmd XML file into a WorkflowGraph."""
|
||||
tree = ET.parse(path)
|
||||
root = tree.getroot()
|
||||
|
||||
nodes: Dict[int, NodeDef] = {}
|
||||
_collect_nodes(root, nodes)
|
||||
|
||||
connections: List[ConnectionDef] = []
|
||||
for conn_el in root.findall("Connections/Connection"):
|
||||
orig = conn_el.find("Origin")
|
||||
dest = conn_el.find("Destination")
|
||||
if orig is None or dest is None:
|
||||
continue
|
||||
connections.append(ConnectionDef(
|
||||
origin_id=int(orig.attrib["ToolID"]),
|
||||
origin_anchor=orig.attrib.get("Connection", "Output"),
|
||||
dest_id=int(dest.attrib["ToolID"]),
|
||||
dest_anchor=dest.attrib.get("Connection", "Input"),
|
||||
name=conn_el.attrib.get("name"),
|
||||
wireless=conn_el.attrib.get("Wireless", "False") == "True",
|
||||
))
|
||||
|
||||
props = root.find("Properties")
|
||||
return WorkflowGraph(nodes=nodes, connections=connections, properties=props)
|
||||
|
||||
|
||||
def _collect_nodes(parent: ET.Element, nodes: Dict[int, NodeDef]) -> None:
|
||||
"""Recursively collect Node elements, flattening ChildNodes containers."""
|
||||
for node_el in parent.findall("Nodes/Node"):
|
||||
_parse_node(node_el, nodes)
|
||||
# Recurse into ChildNodes (tool containers)
|
||||
child_nodes = node_el.find("ChildNodes")
|
||||
if child_nodes is not None:
|
||||
for child in child_nodes.findall("Node"):
|
||||
_parse_node(child, nodes)
|
||||
grandchildren = child.find("ChildNodes")
|
||||
if grandchildren is not None:
|
||||
_collect_nodes_flat(grandchildren, nodes)
|
||||
|
||||
|
||||
def _collect_nodes_flat(parent: ET.Element, nodes: Dict[int, NodeDef]) -> None:
|
||||
for node_el in parent.findall("Node"):
|
||||
_parse_node(node_el, nodes)
|
||||
child_nodes = node_el.find("ChildNodes")
|
||||
if child_nodes is not None:
|
||||
_collect_nodes_flat(child_nodes, nodes)
|
||||
|
||||
|
||||
def _parse_node(node_el: ET.Element, nodes: Dict[int, NodeDef]) -> None:
|
||||
tid = int(node_el.attrib["ToolID"])
|
||||
gui = node_el.find("GuiSettings")
|
||||
plugin = gui.attrib.get("Plugin", "") if gui is not None else ""
|
||||
config = node_el.find("Properties/Configuration")
|
||||
pos_el = gui.find("Position") if gui is not None else None
|
||||
pos = (
|
||||
int(pos_el.attrib.get("x", 0)),
|
||||
int(pos_el.attrib.get("y", 0)),
|
||||
) if pos_el is not None else (0, 0)
|
||||
schema = _parse_schema(node_el)
|
||||
nodes[tid] = NodeDef(tool_id=tid, plugin=plugin, config=config,
|
||||
output_schema=schema, position=pos)
|
||||
|
||||
|
||||
def _parse_schema(node_el: ET.Element) -> List[FieldDef]:
|
||||
fields = []
|
||||
for f in node_el.findall(".//MetaInfo/RecordInfo/Field"):
|
||||
size_str = f.attrib.get("size")
|
||||
size = int(float(size_str)) if size_str else None
|
||||
fields.append(FieldDef(
|
||||
name=f.attrib["name"],
|
||||
type=f.attrib.get("type", "V_String"),
|
||||
size=size,
|
||||
source=f.attrib.get("source"),
|
||||
))
|
||||
return fields
|
||||
|
|
@ -0,0 +1,66 @@
|
|||
from __future__ import annotations
|
||||
from typing import Optional
|
||||
import polars as pl
|
||||
|
||||
|
||||
class TypeMapper:
|
||||
"""Maps Alteryx field types to Polars dtypes and DuckDB type strings."""
|
||||
|
||||
_POLARS: dict[str, pl.PolarsDataType] = {
|
||||
"Bool": pl.Boolean,
|
||||
"Byte": pl.UInt8,
|
||||
"Int16": pl.Int16,
|
||||
"Int32": pl.Int32,
|
||||
"Int64": pl.Int64,
|
||||
"Float": pl.Float32,
|
||||
"Double": pl.Float64,
|
||||
"String": pl.String,
|
||||
"V_String": pl.String,
|
||||
"WString": pl.String,
|
||||
"V_WString": pl.String,
|
||||
"Date": pl.Date,
|
||||
"Time": pl.Time,
|
||||
"DateTime": pl.Datetime,
|
||||
"SpatialObj": pl.String,
|
||||
"Blob": pl.Binary,
|
||||
}
|
||||
|
||||
_DUCKDB: dict[str, str] = {
|
||||
"Bool": "BOOLEAN",
|
||||
"Byte": "UTINYINT",
|
||||
"Int16": "SMALLINT",
|
||||
"Int32": "INTEGER",
|
||||
"Int64": "BIGINT",
|
||||
"Float": "FLOAT",
|
||||
"Double": "DOUBLE",
|
||||
"String": "VARCHAR",
|
||||
"V_String": "VARCHAR",
|
||||
"WString": "VARCHAR",
|
||||
"V_WString": "VARCHAR",
|
||||
"Date": "DATE",
|
||||
"Time": "TIME",
|
||||
"DateTime": "TIMESTAMP",
|
||||
"SpatialObj": "VARCHAR",
|
||||
"Blob": "BLOB",
|
||||
"FixedDecimal": "DECIMAL",
|
||||
}
|
||||
|
||||
def map(self, alteryx_type: str, size: Optional[str] = None) -> pl.PolarsDataType:
|
||||
if alteryx_type == "FixedDecimal":
|
||||
if size:
|
||||
parts = size.split(".")
|
||||
precision = int(parts[0])
|
||||
scale = int(parts[1]) if len(parts) > 1 else 0
|
||||
return pl.Decimal(precision=precision, scale=scale)
|
||||
return pl.Decimal(precision=19, scale=2)
|
||||
return self._POLARS.get(alteryx_type, pl.String)
|
||||
|
||||
def map_duckdb(self, alteryx_type: str, size: Optional[str] = None) -> str:
|
||||
if alteryx_type == "FixedDecimal":
|
||||
if size:
|
||||
parts = size.split(".")
|
||||
precision = int(parts[0])
|
||||
scale = int(parts[1]) if len(parts) > 1 else 0
|
||||
return f"DECIMAL({precision},{scale})"
|
||||
return "DECIMAL(19,2)"
|
||||
return self._DUCKDB.get(alteryx_type, "VARCHAR")
|
||||
|
|
@ -0,0 +1,4 @@
|
|||
from .transpiler import ExpressionTranspiler, transpile, UnsupportedExpressionError
|
||||
from .functions import get_function_sql
|
||||
|
||||
__all__ = ["ExpressionTranspiler", "transpile", "UnsupportedExpressionError", "get_function_sql"]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,152 @@
|
|||
"""Mapping of Alteryx built-in functions to DuckDB SQL equivalents."""
|
||||
from __future__ import annotations
|
||||
|
||||
# Single-argument function translations (name → SQL template using {0}, {1}, etc.)
|
||||
FUNCTION_MAP: dict[str, str] = {
|
||||
# String
|
||||
"Uppercase": "UPPER({0})",
|
||||
"Lowercase": "LOWER({0})",
|
||||
"Trim": "TRIM({0})",
|
||||
"LTrim": "LTRIM({0})",
|
||||
"RTrim": "RTRIM({0})",
|
||||
"Length": "LENGTH({0})",
|
||||
"Left": "LEFT({0}, {1})",
|
||||
"Right": "RIGHT({0}, {1})",
|
||||
"Substring": "SUBSTR({0}, {1}, {2})",
|
||||
"FindString": "INSTR({0}, {1})",
|
||||
"ReplaceChar": "REPLACE({0}, {1}, {2})",
|
||||
"StringToDate": "STRPTIME({0}, {1})",
|
||||
"ToString": "PRINTF('%.' || {1} || 'f', {0})",
|
||||
"Contains": "CONTAINS({0}, {1})",
|
||||
"StartsWith": "STARTS_WITH({0}, {1})",
|
||||
"EndsWith": "ENDS_WITH({0}, {1})",
|
||||
"REGEX_Match": "REGEXP_MATCHES({0}, {1})",
|
||||
"REGEX_Replace": "REGEXP_REPLACE({0}, {1}, {2})",
|
||||
"PadLeft": "LPAD({0}, {1}, {2})",
|
||||
"PadRight": "RPAD({0}, {1}, {2})",
|
||||
"GetWord": "list_extract(str_split_regex({0}, '\\\\s+'), {1} + 1)",
|
||||
"CountWords": "array_length(str_split_regex(TRIM({0}), '\\\\s+'))",
|
||||
"CharFromInt": "chr({0}::INTEGER)",
|
||||
"IntFromChar": "ascii({0})",
|
||||
"ConvertFromCodePage": "{0}",
|
||||
"ReverseString": "reverse({0})",
|
||||
"DecomposeUnicodeForMatch": "strip_accents(UPPER({0}))",
|
||||
# Math
|
||||
"ABS": "ABS({0})",
|
||||
"Abs": "ABS({0})",
|
||||
"CEIL": "CEIL({0})",
|
||||
"Ceil": "CEIL({0})",
|
||||
"FLOOR": "FLOOR({0})",
|
||||
"Floor": "FLOOR({0})",
|
||||
"ROUND": "ROUND({0}, {1})",
|
||||
"Round": "ROUND({0}, {1})",
|
||||
"SQRT": "SQRT({0})",
|
||||
"Sqrt": "SQRT({0})",
|
||||
"POW": "POWER({0}, {1})",
|
||||
"Pow": "POWER({0}, {1})",
|
||||
"LOG": "LN({0})",
|
||||
"Log": "LN({0})",
|
||||
"LOG10": "LOG10({0})",
|
||||
"Log10": "LOG10({0})",
|
||||
"MOD": "({0} % {1})",
|
||||
"Mod": "({0} % {1})",
|
||||
"MIN": "LEAST({0}, {1})",
|
||||
"Max": "GREATEST({0}, {1})",
|
||||
"MAX": "GREATEST({0}, {1})",
|
||||
"Min": "LEAST({0}, {1})",
|
||||
"RandInt": "FLOOR(RANDOM() * {0})::BIGINT",
|
||||
"Random": "RANDOM()",
|
||||
"PI": "PI()",
|
||||
"SIN": "SIN({0})",
|
||||
"COS": "COS({0})",
|
||||
"TAN": "TAN({0})",
|
||||
"ASIN": "ASIN({0})",
|
||||
"ACOS": "ACOS({0})",
|
||||
"ATAN": "ATAN({0})",
|
||||
"ATAN2": "ATAN2({0}, {1})",
|
||||
"EXP": "EXP({0})",
|
||||
"Sign": "SIGN({0})",
|
||||
# Null handling
|
||||
"IsNull": "({0} IS NULL)",
|
||||
"IsEmpty": "({0} IS NULL OR {0} = '')",
|
||||
"NullConvert": "NULLIF({0}, '')",
|
||||
"Null": "NULL",
|
||||
# Type conversion
|
||||
"ToNumber": "TRY_CAST({0} AS DOUBLE)",
|
||||
"ToString_num": "CAST({0} AS VARCHAR)",
|
||||
"TOBOOL": "CAST({0} AS BOOLEAN)",
|
||||
# Date/Time
|
||||
"DateTimeNow": "NOW()",
|
||||
"DateTimeToday": "CURRENT_DATE",
|
||||
"DateTimeAdd": "({0} + INTERVAL ({1}) {2})",
|
||||
"DateTimeDiff": "DATEDIFF({2}, {1}, {0})",
|
||||
"DateTimeFormat": "STRFTIME({0}, {1})",
|
||||
"ToDate": "CAST({0} AS DATE)",
|
||||
"DateTimeYear": "YEAR({0})",
|
||||
"DateTimeMonth": "MONTH({0})",
|
||||
"DateTimeDay": "DAY({0})",
|
||||
"DateTimeHour": "HOUR({0})",
|
||||
"DateTimeMinute": "MINUTE({0})",
|
||||
"DateTimeSecond": "SECOND({0})",
|
||||
"DateTimeFirstOfMonth": "DATE_TRUNC('month', {0})",
|
||||
"DateTimeLastOfMonth": "(DATE_TRUNC('month', {0}) + INTERVAL '1 month' - INTERVAL '1 day')::DATE",
|
||||
"DateTimeFirstOfYear": "DATE_TRUNC('year', {0})",
|
||||
"DateTimeQuarter": "QUARTER({0})",
|
||||
"DateTimeTrim": "DATE_TRUNC({1}, {0})",
|
||||
# Conditional
|
||||
"IIF": "(CASE WHEN {0} THEN {1} ELSE {2} END)",
|
||||
"Switch": None, # handled separately
|
||||
# Misc
|
||||
"TOPN": None, # not a scalar function
|
||||
}
|
||||
|
||||
|
||||
def get_function_sql(name: str, args: list[str]) -> str:
|
||||
"""Render a function call to DuckDB SQL given evaluated argument SQL strings."""
|
||||
# Case-insensitive lookup
|
||||
template = FUNCTION_MAP.get(name)
|
||||
if template is None:
|
||||
canon = name.lower()
|
||||
for k, v in FUNCTION_MAP.items():
|
||||
if k.lower() == canon:
|
||||
template = v
|
||||
break
|
||||
|
||||
if template is None:
|
||||
# Unknown function — pass through as-is (may work in DuckDB natively)
|
||||
args_joined = ", ".join(args)
|
||||
return f"{name}({args_joined})"
|
||||
|
||||
if name in ("Switch", "switch"):
|
||||
return _render_switch(args)
|
||||
|
||||
try:
|
||||
result = template
|
||||
for i, arg in enumerate(args):
|
||||
result = result.replace(f"{{{i}}}", arg)
|
||||
return result
|
||||
except Exception:
|
||||
args_joined = ", ".join(args)
|
||||
return f"{name}({args_joined})"
|
||||
|
||||
|
||||
def _render_switch(args: list[str]) -> str:
|
||||
"""Switch(val, default, v1, r1, v2, r2, ...) → CASE val WHEN v1 THEN r1 ... ELSE default END"""
|
||||
if len(args) < 2:
|
||||
return "NULL"
|
||||
val = args[0]
|
||||
default = args[1]
|
||||
pairs = args[2:]
|
||||
cases = []
|
||||
for i in range(0, len(pairs) - 1, 2):
|
||||
cases.append(f"WHEN {pairs[i]} THEN {pairs[i+1]}")
|
||||
cases_sql = " ".join(cases)
|
||||
return f"CASE {val} {cases_sql} ELSE {default} END"
|
||||
|
||||
|
||||
def titlecase_sql(col: str) -> str:
|
||||
"""Approximate Titlecase via DuckDB: capitalise first letter of each word."""
|
||||
return (
|
||||
f"array_to_string(list_transform(str_split({col}, ' '), "
|
||||
f"x -> UPPER(LEFT(x,1)) || LOWER(SUBSTR(x,2))), ' ')"
|
||||
)
|
||||
|
|
@ -0,0 +1,630 @@
|
|||
"""
|
||||
Alteryx expression → DuckDB SQL transpiler.
|
||||
|
||||
Handles:
|
||||
[ColumnName] → "ColumnName"
|
||||
"string" → 'string' (double → single quotes)
|
||||
IF...THEN...ENDIF → CASE WHEN...END
|
||||
IIF(c,t,f) → CASE WHEN c THEN t ELSE f END
|
||||
IsNull/IsEmpty → IS NULL checks
|
||||
NULL() → NULL
|
||||
AND/OR/NOT → AND/OR/NOT
|
||||
== / != → = / <>
|
||||
Row references → not supported in SQL mode (raises)
|
||||
All functions in expression/functions.py
|
||||
"""
|
||||
from __future__ import annotations
|
||||
import re
|
||||
from enum import Enum, auto
|
||||
from typing import Optional
|
||||
import polars as pl
|
||||
import duckdb
|
||||
|
||||
from .functions import get_function_sql, titlecase_sql
|
||||
|
||||
|
||||
class UnsupportedExpressionError(Exception):
|
||||
pass
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Tokeniser
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TT(Enum):
|
||||
LBRACKET = auto() # [
|
||||
RBRACKET = auto() # ]
|
||||
LPAREN = auto() # (
|
||||
RPAREN = auto() # )
|
||||
COMMA = auto() # ,
|
||||
PLUS = auto() # +
|
||||
MINUS = auto() # -
|
||||
STAR = auto() # *
|
||||
SLASH = auto() # /
|
||||
PERCENT = auto() # %
|
||||
CONCAT = auto() # + (string, same as PLUS — resolved by context)
|
||||
EQ = auto() # == or =
|
||||
NEQ = auto() # != or <>
|
||||
LT = auto() # <
|
||||
LE = auto() # <=
|
||||
GT = auto() # >
|
||||
GE = auto() # >=
|
||||
AND = auto()
|
||||
OR = auto()
|
||||
NOT = auto()
|
||||
IF = auto()
|
||||
THEN = auto()
|
||||
ELSEIF = auto()
|
||||
ELSE = auto()
|
||||
ENDIF = auto()
|
||||
IIF = auto()
|
||||
NULL_FUNC = auto() # NULL()
|
||||
ISNULL = auto()
|
||||
ISEMPTY = auto()
|
||||
NUMBER = auto()
|
||||
STRING = auto() # double-quoted string literal
|
||||
IDENT = auto() # function name or keyword
|
||||
COLUMN = auto() # [ColName] — after stripping brackets
|
||||
EOF = auto()
|
||||
BANG = auto() # ! (prefix not)
|
||||
PIPE2 = auto() # || (string concat in SQL)
|
||||
POWER = auto() # ^
|
||||
|
||||
|
||||
_KEYWORDS = {
|
||||
"AND": TT.AND,
|
||||
"OR": TT.OR,
|
||||
"NOT": TT.NOT,
|
||||
"IF": TT.IF,
|
||||
"THEN": TT.THEN,
|
||||
"ELSEIF": TT.ELSEIF,
|
||||
"ELSE": TT.ELSE,
|
||||
"ENDIF": TT.ENDIF,
|
||||
"IIF": TT.IIF,
|
||||
"NULL": TT.NULL_FUNC,
|
||||
"ISNULL": TT.ISNULL,
|
||||
"ISEMPTY": TT.ISEMPTY,
|
||||
"ISNUMBER": TT.IDENT, # keep as IDENT, handled in primary
|
||||
"TRUE": TT.IDENT,
|
||||
"FALSE": TT.IDENT,
|
||||
}
|
||||
|
||||
|
||||
class Token:
|
||||
__slots__ = ("type", "value")
|
||||
|
||||
def __init__(self, type_: TT, value: object = None):
|
||||
self.type = type_
|
||||
self.value = value
|
||||
|
||||
def __repr__(self):
|
||||
return f"Token({self.type}, {self.value!r})"
|
||||
|
||||
|
||||
_TOKEN_RE = re.compile(
|
||||
r"""
|
||||
(?P<SPACE>\s+)
|
||||
| (?P<COLUMN>\[[^\]]*\])
|
||||
| (?P<NUMBER>-?\d+(?:\.\d+)?(?:[eE][+-]?\d+)?)
|
||||
| (?P<STRING>"(?:[^"\\]|\\.)*")
|
||||
| (?P<LE><=)
|
||||
| (?P<GE>>=)
|
||||
| (?P<NEQ>!=|<>)
|
||||
| (?P<EQ>==|=)
|
||||
| (?P<LT><)
|
||||
| (?P<GT>>)
|
||||
| (?P<PIPE2>\|\|)
|
||||
| (?P<CONCAT>\+)
|
||||
| (?P<MINUS>-)
|
||||
| (?P<STAR>\*)
|
||||
| (?P<SLASH>/)
|
||||
| (?P<PERCENT>%)
|
||||
| (?P<POWER>\^)
|
||||
| (?P<BANG>!)
|
||||
| (?P<LPAREN>\()
|
||||
| (?P<RPAREN>\))
|
||||
| (?P<COMMA>,)
|
||||
| (?P<IDENT>[A-Za-z_]\w*)
|
||||
""",
|
||||
re.VERBOSE,
|
||||
)
|
||||
|
||||
|
||||
def tokenise(text: str) -> list[Token]:
|
||||
tokens: list[Token] = []
|
||||
pos = 0
|
||||
while pos < len(text):
|
||||
m = _TOKEN_RE.match(text, pos)
|
||||
if not m:
|
||||
raise UnsupportedExpressionError(
|
||||
f"Unexpected character {text[pos]!r} at pos {pos} in: {text!r}"
|
||||
)
|
||||
pos = m.end()
|
||||
kind = m.lastgroup
|
||||
raw = m.group()
|
||||
|
||||
if kind == "SPACE":
|
||||
continue
|
||||
if kind == "COLUMN":
|
||||
tokens.append(Token(TT.COLUMN, raw[1:-1])) # strip [ ]
|
||||
elif kind == "NUMBER":
|
||||
tokens.append(Token(TT.NUMBER, raw))
|
||||
elif kind == "STRING":
|
||||
# Convert double-quoted Alteryx string to single-quoted SQL
|
||||
inner = raw[1:-1].replace("\\'", "'").replace("'", "''").replace('\\"', '"')
|
||||
tokens.append(Token(TT.STRING, inner))
|
||||
elif kind == "LE":
|
||||
tokens.append(Token(TT.LE))
|
||||
elif kind == "GE":
|
||||
tokens.append(Token(TT.GE))
|
||||
elif kind == "NEQ":
|
||||
tokens.append(Token(TT.NEQ))
|
||||
elif kind == "EQ":
|
||||
tokens.append(Token(TT.EQ))
|
||||
elif kind == "LT":
|
||||
tokens.append(Token(TT.LT))
|
||||
elif kind == "GT":
|
||||
tokens.append(Token(TT.GT))
|
||||
elif kind == "PIPE2":
|
||||
tokens.append(Token(TT.PIPE2))
|
||||
elif kind == "CONCAT":
|
||||
tokens.append(Token(TT.PLUS))
|
||||
elif kind == "MINUS":
|
||||
tokens.append(Token(TT.MINUS))
|
||||
elif kind == "STAR":
|
||||
tokens.append(Token(TT.STAR))
|
||||
elif kind == "SLASH":
|
||||
tokens.append(Token(TT.SLASH))
|
||||
elif kind == "PERCENT":
|
||||
tokens.append(Token(TT.PERCENT))
|
||||
elif kind == "POWER":
|
||||
tokens.append(Token(TT.POWER))
|
||||
elif kind == "BANG":
|
||||
tokens.append(Token(TT.BANG))
|
||||
elif kind == "LPAREN":
|
||||
tokens.append(Token(TT.LPAREN))
|
||||
elif kind == "RPAREN":
|
||||
tokens.append(Token(TT.RPAREN))
|
||||
elif kind == "COMMA":
|
||||
tokens.append(Token(TT.COMMA))
|
||||
elif kind == "IDENT":
|
||||
upper = raw.upper()
|
||||
tt = _KEYWORDS.get(upper, TT.IDENT)
|
||||
tokens.append(Token(tt, raw))
|
||||
else:
|
||||
raise UnsupportedExpressionError(f"Unhandled token kind {kind}")
|
||||
|
||||
tokens.append(Token(TT.EOF))
|
||||
return tokens
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Parser / code generator (recursive descent → DuckDB SQL string)
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class _Parser:
|
||||
def __init__(self, tokens: list[Token]):
|
||||
self._tokens = tokens
|
||||
self._pos = 0
|
||||
|
||||
@property
|
||||
def _cur(self) -> Token:
|
||||
return self._tokens[self._pos]
|
||||
|
||||
def _peek(self, offset: int = 1) -> Token:
|
||||
idx = self._pos + offset
|
||||
if idx >= len(self._tokens):
|
||||
return Token(TT.EOF)
|
||||
return self._tokens[idx]
|
||||
|
||||
def _advance(self) -> Token:
|
||||
tok = self._tokens[self._pos]
|
||||
self._pos += 1
|
||||
return tok
|
||||
|
||||
def _expect(self, tt: TT) -> Token:
|
||||
tok = self._advance()
|
||||
if tok.type != tt:
|
||||
raise UnsupportedExpressionError(
|
||||
f"Expected {tt}, got {tok.type} ({tok.value!r})"
|
||||
)
|
||||
return tok
|
||||
|
||||
# ------------------------------------------------------------------ #
|
||||
|
||||
def parse(self) -> str:
|
||||
sql = self._parse_expr()
|
||||
if self._cur.type != TT.EOF:
|
||||
raise UnsupportedExpressionError(
|
||||
f"Unexpected token at end: {self._cur}"
|
||||
)
|
||||
return sql
|
||||
|
||||
def _parse_expr(self) -> str:
|
||||
return self._parse_or()
|
||||
|
||||
def _parse_or(self) -> str:
|
||||
left = self._parse_and()
|
||||
while self._cur.type == TT.OR:
|
||||
self._advance()
|
||||
right = self._parse_and()
|
||||
left = f"({left} OR {right})"
|
||||
return left
|
||||
|
||||
def _parse_and(self) -> str:
|
||||
left = self._parse_not()
|
||||
while self._cur.type == TT.AND:
|
||||
self._advance()
|
||||
right = self._parse_not()
|
||||
left = f"({left} AND {right})"
|
||||
return left
|
||||
|
||||
def _parse_not(self) -> str:
|
||||
if self._cur.type in (TT.NOT, TT.BANG):
|
||||
self._advance()
|
||||
operand = self._parse_not()
|
||||
return f"(NOT {operand})"
|
||||
return self._parse_comparison()
|
||||
|
||||
def _parse_comparison(self) -> str:
|
||||
left = self._parse_additive()
|
||||
cmp_map = {
|
||||
TT.EQ: "=",
|
||||
TT.NEQ: "<>",
|
||||
TT.LT: "<",
|
||||
TT.LE: "<=",
|
||||
TT.GT: ">",
|
||||
TT.GE: ">=",
|
||||
}
|
||||
if self._cur.type in cmp_map:
|
||||
op = cmp_map[self._advance().type]
|
||||
right = self._parse_additive()
|
||||
return f"({left} {op} {right})"
|
||||
return left
|
||||
|
||||
def _parse_additive(self) -> str:
|
||||
left = self._parse_multiplicative()
|
||||
while self._cur.type in (TT.PLUS, TT.MINUS, TT.PIPE2):
|
||||
op = self._advance()
|
||||
right = self._parse_multiplicative()
|
||||
if op.type == TT.PIPE2:
|
||||
left = f"({left} || {right})"
|
||||
elif op.type == TT.MINUS:
|
||||
left = f"({left} - {right})"
|
||||
else:
|
||||
left = f"({left} + {right})"
|
||||
return left
|
||||
|
||||
def _parse_multiplicative(self) -> str:
|
||||
left = self._parse_unary()
|
||||
while self._cur.type in (TT.STAR, TT.SLASH, TT.PERCENT, TT.POWER):
|
||||
op = self._advance()
|
||||
right = self._parse_unary()
|
||||
if op.type == TT.POWER:
|
||||
left = f"POWER({left}, {right})"
|
||||
elif op.type == TT.PERCENT:
|
||||
left = f"({left} % {right})"
|
||||
elif op.type == TT.SLASH:
|
||||
left = f"({left} / {right})"
|
||||
else:
|
||||
left = f"({left} * {right})"
|
||||
return left
|
||||
|
||||
def _parse_unary(self) -> str:
|
||||
if self._cur.type == TT.MINUS:
|
||||
self._advance()
|
||||
return f"(-{self._parse_primary()})"
|
||||
if self._cur.type == TT.PLUS:
|
||||
self._advance()
|
||||
return self._parse_primary()
|
||||
return self._parse_primary()
|
||||
|
||||
def _parse_primary(self) -> str: # noqa: C901 (complexity ok for parser)
|
||||
tok = self._cur
|
||||
|
||||
# Parenthesised sub-expression
|
||||
if tok.type == TT.LPAREN:
|
||||
self._advance()
|
||||
inner = self._parse_expr()
|
||||
self._expect(TT.RPAREN)
|
||||
return f"({inner})"
|
||||
|
||||
# Column reference
|
||||
if tok.type == TT.COLUMN:
|
||||
self._advance()
|
||||
# Row reference [Row-N:Field] or [Row+N:Field]
|
||||
col = tok.value
|
||||
row_m = re.match(r"^Row([+-]\d+):(.+)$", col, re.IGNORECASE)
|
||||
if row_m:
|
||||
offset = int(row_m.group(1))
|
||||
field = row_m.group(2)
|
||||
func = "LAG" if offset < 0 else "LEAD"
|
||||
return f'{func}("{field}", {abs(offset)}) OVER ()'
|
||||
return f'"{col}"'
|
||||
|
||||
# Numeric literal
|
||||
if tok.type == TT.NUMBER:
|
||||
self._advance()
|
||||
return tok.value
|
||||
|
||||
# String literal (already converted to single-quoted)
|
||||
if tok.type == TT.STRING:
|
||||
self._advance()
|
||||
return f"'{tok.value}'"
|
||||
|
||||
# IF … THEN … [ELSEIF … THEN …]* [ELSE …] ENDIF
|
||||
if tok.type == TT.IF:
|
||||
return self._parse_if()
|
||||
|
||||
# NULL() or bare NULL keyword
|
||||
if tok.type == TT.NULL_FUNC:
|
||||
self._advance()
|
||||
if self._cur.type == TT.LPAREN:
|
||||
self._advance()
|
||||
self._expect(TT.RPAREN)
|
||||
return "NULL"
|
||||
|
||||
# IsNull([F]) — keyword form
|
||||
if tok.type == TT.ISNULL:
|
||||
self._advance()
|
||||
self._expect(TT.LPAREN)
|
||||
inner = self._parse_expr()
|
||||
self._expect(TT.RPAREN)
|
||||
return f"({inner} IS NULL)"
|
||||
|
||||
# IsEmpty([F]) — keyword form
|
||||
if tok.type == TT.ISEMPTY:
|
||||
self._advance()
|
||||
self._expect(TT.LPAREN)
|
||||
inner = self._parse_expr()
|
||||
self._expect(TT.RPAREN)
|
||||
return f"({inner} IS NULL OR {inner} = '')"
|
||||
|
||||
# Function call or bare identifier
|
||||
if tok.type == TT.IDENT:
|
||||
name = tok.value
|
||||
upper = name.upper()
|
||||
self._advance()
|
||||
|
||||
# Bare boolean/null literals
|
||||
if upper == "TRUE":
|
||||
return "TRUE"
|
||||
if upper == "FALSE":
|
||||
return "FALSE"
|
||||
if upper == "NULL":
|
||||
if self._cur.type == TT.LPAREN:
|
||||
self._advance()
|
||||
self._expect(TT.RPAREN)
|
||||
return "NULL"
|
||||
|
||||
# IsNull / IsEmpty used as plain identifiers (case variations)
|
||||
if upper == "ISNULL":
|
||||
self._expect(TT.LPAREN)
|
||||
inner = self._parse_expr()
|
||||
self._expect(TT.RPAREN)
|
||||
return f"({inner} IS NULL)"
|
||||
if upper == "ISEMPTY":
|
||||
self._expect(TT.LPAREN)
|
||||
inner = self._parse_expr()
|
||||
self._expect(TT.RPAREN)
|
||||
return f"({inner} IS NULL OR {inner} = '')"
|
||||
if upper == "ISNUMBER":
|
||||
self._expect(TT.LPAREN)
|
||||
inner = self._parse_expr()
|
||||
self._expect(TT.RPAREN)
|
||||
return f"(TRY_CAST({inner} AS DOUBLE) IS NOT NULL)"
|
||||
|
||||
# Titlecase — special SQL rendering
|
||||
if upper == "TITLECASE":
|
||||
self._expect(TT.LPAREN)
|
||||
inner = self._parse_expr()
|
||||
self._expect(TT.RPAREN)
|
||||
return titlecase_sql(inner)
|
||||
|
||||
# DateTimeAdd / DateTimeDiff need string arg unquoted for INTERVAL
|
||||
if upper == "DATETIMEADD":
|
||||
self._expect(TT.LPAREN)
|
||||
d_arg = self._parse_expr()
|
||||
self._expect(TT.COMMA)
|
||||
n_arg = self._parse_expr()
|
||||
self._expect(TT.COMMA)
|
||||
unit_arg = self._parse_expr()
|
||||
self._expect(TT.RPAREN)
|
||||
# unit_arg is a SQL string like 'days' — strip quotes for INTERVAL keyword
|
||||
unit = unit_arg.strip("'").rstrip("s").upper()
|
||||
return f"({d_arg} + INTERVAL ({n_arg}) {unit})"
|
||||
if upper == "DATETIMEDIFF":
|
||||
self._expect(TT.LPAREN)
|
||||
d1 = self._parse_expr()
|
||||
self._expect(TT.COMMA)
|
||||
d2 = self._parse_expr()
|
||||
self._expect(TT.COMMA)
|
||||
unit_arg = self._parse_expr()
|
||||
self._expect(TT.RPAREN)
|
||||
unit = unit_arg.strip("'").rstrip("s").upper()
|
||||
return f"DATEDIFF('{unit}', {d2}, {d1})"
|
||||
|
||||
# IIF as identifier (keyword token is TT.IIF but may arrive as IDENT)
|
||||
if upper == "IIF":
|
||||
self._expect(TT.LPAREN)
|
||||
cond = self._parse_expr()
|
||||
self._expect(TT.COMMA)
|
||||
true_val = self._parse_expr()
|
||||
self._expect(TT.COMMA)
|
||||
false_val = self._parse_expr()
|
||||
self._expect(TT.RPAREN)
|
||||
return f"(CASE WHEN {cond} THEN {true_val} ELSE {false_val} END)"
|
||||
|
||||
if self._cur.type == TT.LPAREN:
|
||||
# Function call
|
||||
self._advance()
|
||||
args: list[str] = []
|
||||
if self._cur.type != TT.RPAREN:
|
||||
args.append(self._parse_expr())
|
||||
while self._cur.type == TT.COMMA:
|
||||
self._advance()
|
||||
args.append(self._parse_expr())
|
||||
self._expect(TT.RPAREN)
|
||||
return get_function_sql(name, args)
|
||||
|
||||
# Bare identifier (e.g. a column name without brackets — unusual)
|
||||
return f'"{name}"'
|
||||
|
||||
# IIF keyword token
|
||||
if tok.type == TT.IIF:
|
||||
self._advance()
|
||||
self._expect(TT.LPAREN)
|
||||
cond = self._parse_expr()
|
||||
self._expect(TT.COMMA)
|
||||
true_val = self._parse_expr()
|
||||
self._expect(TT.COMMA)
|
||||
false_val = self._parse_expr()
|
||||
self._expect(TT.RPAREN)
|
||||
return f"(CASE WHEN {cond} THEN {true_val} ELSE {false_val} END)"
|
||||
|
||||
raise UnsupportedExpressionError(f"Unexpected token: {tok}")
|
||||
|
||||
def _parse_if(self) -> str:
|
||||
self._expect(TT.IF)
|
||||
branches: list[tuple[str, str]] = []
|
||||
else_val: Optional[str] = None
|
||||
|
||||
cond = self._parse_expr()
|
||||
self._expect(TT.THEN)
|
||||
val = self._parse_expr()
|
||||
branches.append((cond, val))
|
||||
|
||||
while self._cur.type == TT.ELSEIF:
|
||||
self._advance()
|
||||
cond = self._parse_expr()
|
||||
self._expect(TT.THEN)
|
||||
val = self._parse_expr()
|
||||
branches.append((cond, val))
|
||||
|
||||
if self._cur.type == TT.ELSE:
|
||||
self._advance()
|
||||
else_val = self._parse_expr()
|
||||
|
||||
self._expect(TT.ENDIF)
|
||||
|
||||
parts = ["CASE"]
|
||||
for cond, val in branches:
|
||||
parts.append(f"WHEN {cond} THEN {val}")
|
||||
if else_val is not None:
|
||||
parts.append(f"ELSE {else_val}")
|
||||
parts.append("END")
|
||||
return " ".join(parts)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Public API
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
def transpile(expression: str) -> str:
|
||||
"""Convert an Alteryx expression string to a DuckDB SQL fragment."""
|
||||
expression = expression.strip()
|
||||
if not expression:
|
||||
return "NULL"
|
||||
tokens = tokenise(expression)
|
||||
return _Parser(tokens).parse()
|
||||
|
||||
|
||||
def _coerce_numeric_strings(df: pl.DataFrame) -> pl.DataFrame:
|
||||
"""Cast string columns that contain only numeric data to Int64 or Float64.
|
||||
|
||||
Alteryx implicitly coerces TextInput strings to numbers when the expression
|
||||
treats them numerically. This mirrors that behaviour.
|
||||
"""
|
||||
casts: list[pl.Expr] = []
|
||||
for col_name in df.columns:
|
||||
s = df[col_name]
|
||||
if s.dtype != pl.String:
|
||||
continue
|
||||
non_null = s.drop_nulls()
|
||||
if len(non_null) == 0:
|
||||
continue
|
||||
# Try integer first (covers integer-looking strings)
|
||||
int_s = non_null.cast(pl.Int64, strict=False)
|
||||
if int_s.null_count() == 0:
|
||||
casts.append(pl.col(col_name).cast(pl.Int64, strict=False))
|
||||
continue
|
||||
# Try float
|
||||
float_s = non_null.cast(pl.Float64, strict=False)
|
||||
if float_s.null_count() == 0:
|
||||
casts.append(pl.col(col_name).cast(pl.Float64, strict=False))
|
||||
return df.with_columns(casts) if casts else df
|
||||
|
||||
|
||||
class ExpressionTranspiler:
|
||||
"""Stateful transpiler bound to a DuckDB connection for evaluation."""
|
||||
|
||||
def __init__(self, con: duckdb.DuckDBPyConnection):
|
||||
self._con = con
|
||||
self._view_counter = 0
|
||||
|
||||
def _register(self, df: pl.DataFrame) -> str:
|
||||
name = f"_expr_df_{self._view_counter}"
|
||||
self._view_counter += 1
|
||||
self._con.register(name, df.to_arrow())
|
||||
return name
|
||||
|
||||
def eval_mask(self, df: pl.DataFrame, expression: str) -> pl.Series:
|
||||
"""Evaluate a boolean Alteryx expression against df, return bool Series."""
|
||||
sql_expr = transpile(expression)
|
||||
view = self._register(df)
|
||||
try:
|
||||
result = self._con.execute(
|
||||
f'SELECT ({sql_expr}) AS _mask FROM "{view}"'
|
||||
).pl()
|
||||
return result["_mask"]
|
||||
except duckdb.BinderException:
|
||||
# Type mismatch: retry after coercing numeric-looking string columns
|
||||
self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
|
||||
df2 = _coerce_numeric_strings(df)
|
||||
view = self._register(df2)
|
||||
result = self._con.execute(
|
||||
f'SELECT ({sql_expr}) AS _mask FROM "{view}"'
|
||||
).pl()
|
||||
return result["_mask"]
|
||||
finally:
|
||||
self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
|
||||
|
||||
def eval_series(
|
||||
self,
|
||||
df: pl.DataFrame,
|
||||
expression: str,
|
||||
field: str,
|
||||
dtype: pl.PolarsDataType,
|
||||
) -> pl.Series:
|
||||
"""Evaluate a scalar Alteryx expression against df, return a Series."""
|
||||
sql_expr = transpile(expression)
|
||||
view = self._register(df)
|
||||
try:
|
||||
result = self._con.execute(
|
||||
f'SELECT ({sql_expr}) AS "{field}" FROM "{view}"'
|
||||
).pl()
|
||||
series = result[field]
|
||||
try:
|
||||
return series.cast(dtype)
|
||||
except Exception:
|
||||
return series
|
||||
except duckdb.BinderException:
|
||||
self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
|
||||
df2 = _coerce_numeric_strings(df)
|
||||
view = self._register(df2)
|
||||
result = self._con.execute(
|
||||
f'SELECT ({sql_expr}) AS "{field}" FROM "{view}"'
|
||||
).pl()
|
||||
series = result[field]
|
||||
try:
|
||||
return series.cast(dtype)
|
||||
except Exception:
|
||||
return series
|
||||
finally:
|
||||
self._con.execute(f'DROP VIEW IF EXISTS "{view}"')
|
||||
|
||||
def eval_scalar(self, expression: str) -> object:
|
||||
"""Evaluate an expression that requires no input columns."""
|
||||
sql_expr = transpile(expression)
|
||||
result = self._con.execute(f"SELECT ({sql_expr})").fetchone()
|
||||
return result[0] if result else None
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,25 @@
|
|||
"""Shared fixtures for tests."""
|
||||
from __future__ import annotations
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
import duckdb
|
||||
|
||||
# Ensure the alteryx_runner package root is on sys.path
|
||||
PKG = Path(__file__).parent.parent # alteryx_runner/
|
||||
if str(PKG) not in sys.path:
|
||||
sys.path.insert(0, str(PKG))
|
||||
|
||||
from engine.context import RunContext
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ctx(tmp_path):
|
||||
return RunContext(workflow_dir=str(tmp_path), verbose=False)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def duckdb_con():
|
||||
con = duckdb.connect(":memory:")
|
||||
yield con
|
||||
con.close()
|
||||
|
|
@ -0,0 +1,89 @@
|
|||
"""Tests for the XML parser."""
|
||||
from __future__ import annotations
|
||||
import sys
|
||||
import textwrap
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
PKG = Path(__file__).parent.parent # alteryx_runner/
|
||||
if str(PKG) not in sys.path:
|
||||
sys.path.insert(0, str(PKG))
|
||||
|
||||
from engine.parser import parse_workflow
|
||||
|
||||
|
||||
def _write_yxmd(tmp_path: Path, body: str) -> Path:
|
||||
content = f'<AlteryxDocument yxmdVer="2022.1">{body}<Properties/></AlteryxDocument>'
|
||||
p = tmp_path / "test.yxmd"
|
||||
p.write_text(content)
|
||||
return p
|
||||
|
||||
|
||||
class TestParser:
|
||||
def test_simple_nodes(self, tmp_path):
|
||||
body = textwrap.dedent("""\
|
||||
<Nodes>
|
||||
<Node ToolID="1">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
|
||||
<Position x="0" y="0"/>
|
||||
</GuiSettings>
|
||||
<Properties><Configuration/></Properties>
|
||||
</Node>
|
||||
<Node ToolID="2">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.Filter.Filter">
|
||||
<Position x="100" y="0"/>
|
||||
</GuiSettings>
|
||||
<Properties><Configuration><Expression>True</Expression></Configuration></Properties>
|
||||
</Node>
|
||||
</Nodes>
|
||||
<Connections>
|
||||
<Connection>
|
||||
<Origin ToolID="1" Connection="Output"/>
|
||||
<Destination ToolID="2" Connection="Input"/>
|
||||
</Connection>
|
||||
</Connections>
|
||||
""")
|
||||
path = _write_yxmd(tmp_path, body)
|
||||
graph = parse_workflow(str(path))
|
||||
assert 1 in graph.nodes
|
||||
assert 2 in graph.nodes
|
||||
assert len(graph.connections) == 1
|
||||
assert graph.connections[0].origin_id == 1
|
||||
assert graph.connections[0].dest_id == 2
|
||||
|
||||
def test_wireless_connection(self, tmp_path):
|
||||
body = textwrap.dedent("""\
|
||||
<Nodes>
|
||||
<Node ToolID="10">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.TextInput.TextInput">
|
||||
<Position x="0" y="0"/>
|
||||
</GuiSettings>
|
||||
<Properties><Configuration/></Properties>
|
||||
</Node>
|
||||
</Nodes>
|
||||
<Connections>
|
||||
<Connection Wireless="True">
|
||||
<Origin ToolID="10" Connection="Output"/>
|
||||
<Destination ToolID="20" Connection="Input"/>
|
||||
</Connection>
|
||||
</Connections>
|
||||
""")
|
||||
path = _write_yxmd(tmp_path, body)
|
||||
graph = parse_workflow(str(path))
|
||||
assert graph.connections[0].wireless is True
|
||||
|
||||
def test_node_position(self, tmp_path):
|
||||
body = textwrap.dedent("""\
|
||||
<Nodes>
|
||||
<Node ToolID="5">
|
||||
<GuiSettings Plugin="AlteryxBasePluginsGui.Sort.Sort">
|
||||
<Position x="42" y="99"/>
|
||||
</GuiSettings>
|
||||
<Properties><Configuration/></Properties>
|
||||
</Node>
|
||||
</Nodes>
|
||||
<Connections/>
|
||||
""")
|
||||
path = _write_yxmd(tmp_path, body)
|
||||
graph = parse_workflow(str(path))
|
||||
assert graph.nodes[5].position == (42, 99)
|
||||
|
|
@ -0,0 +1,266 @@
|
|||
"""Integration-level tool tests using in-memory DataFrames (no .yxmd required)."""
|
||||
from __future__ import annotations
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import xml.etree.ElementTree as ET
|
||||
import pytest
|
||||
import polars as pl
|
||||
|
||||
PKG = Path(__file__).parent.parent # alteryx_runner/
|
||||
if str(PKG) not in sys.path:
|
||||
sys.path.insert(0, str(PKG))
|
||||
|
||||
from engine.graph import NodeDef
|
||||
from engine.context import RunContext
|
||||
|
||||
|
||||
def make_node(tool_id: int, plugin: str, config_xml: str) -> NodeDef:
|
||||
config = ET.fromstring(config_xml)
|
||||
return NodeDef(tool_id=tool_id, plugin=plugin, config=config)
|
||||
|
||||
|
||||
@pytest.fixture
|
||||
def ctx(tmp_path):
|
||||
return RunContext(workflow_dir=str(tmp_path), verbose=False)
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# TextInput
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTextInput:
|
||||
def test_basic(self, ctx):
|
||||
from tools.inout.text_input import TextInputTool
|
||||
xml = """<Configuration>
|
||||
<Fields><Field name="A"/><Field name="B"/></Fields>
|
||||
<Data>
|
||||
<r><c>1</c><c>hello</c></r>
|
||||
<r><c>2</c><c></c></r>
|
||||
</Data>
|
||||
</Configuration>"""
|
||||
node = make_node(1, "", xml)
|
||||
result = TextInputTool(node, ctx).execute({})
|
||||
df = result["Output"]
|
||||
assert df.shape == (2, 2)
|
||||
assert df["A"].to_list() == ["1", "2"]
|
||||
assert df["B"][1] is None # empty → NULL
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Filter
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestFilter:
|
||||
def _df(self) -> pl.DataFrame:
|
||||
return pl.DataFrame({"ID": [1, 2, 3, 4], "Region": ["North", "South", "North", "South"]})
|
||||
|
||||
def test_simple_gt(self, ctx):
|
||||
from tools.preparation.filter_tool import FilterTool
|
||||
xml = """<Configuration>
|
||||
<Mode>Simple</Mode>
|
||||
<Simple>
|
||||
<Operator>></Operator>
|
||||
<Field>ID</Field>
|
||||
<Operands><Operand>2</Operand><DateType>fixed</DateType></Operands>
|
||||
</Simple>
|
||||
<Expression>[ID] > 2</Expression>
|
||||
</Configuration>"""
|
||||
node = make_node(1, "", xml)
|
||||
result = FilterTool(node, ctx).execute({"Input": self._df()})
|
||||
assert len(result["True"]) == 2
|
||||
assert len(result["False"]) == 2
|
||||
|
||||
def test_custom_expr(self, ctx):
|
||||
from tools.preparation.filter_tool import FilterTool
|
||||
xml = """<Configuration>
|
||||
<Mode>Custom</Mode>
|
||||
<Expression>[Region] == "South"</Expression>
|
||||
</Configuration>"""
|
||||
node = make_node(1, "", xml)
|
||||
result = FilterTool(node, ctx).execute({"Input": self._df()})
|
||||
assert len(result["True"]) == 2
|
||||
assert all(v == "South" for v in result["True"]["Region"].to_list())
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Select
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSelect:
|
||||
def test_drop_and_rename(self, ctx):
|
||||
from tools.preparation.select_tool import SelectTool
|
||||
df = pl.DataFrame({"A": [1, 2], "B": [3, 4], "C": [5, 6]})
|
||||
xml = """<Configuration OrderChanged="False">
|
||||
<SelectFields>
|
||||
<SelectField field="A" selected="True" rename="Alpha"/>
|
||||
<SelectField field="B" selected="False"/>
|
||||
<SelectField field="*Unknown" selected="True"/>
|
||||
</SelectFields>
|
||||
</Configuration>"""
|
||||
node = make_node(1, "", xml)
|
||||
result = SelectTool(node, ctx).execute({"Input": df})["Output"]
|
||||
assert "Alpha" in result.columns
|
||||
assert "B" not in result.columns
|
||||
assert "C" in result.columns # *Unknown passes through
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sort
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSort:
|
||||
def test_ascending(self, ctx):
|
||||
from tools.preparation.sort_tool import SortTool
|
||||
df = pl.DataFrame({"Name": ["Charlie", "Alice", "Bob"], "Score": [3, 1, 2]})
|
||||
xml = """<Configuration>
|
||||
<SortInfo locale="0">
|
||||
<Field field="Name" order="Ascending"/>
|
||||
</SortInfo>
|
||||
</Configuration>"""
|
||||
node = make_node(1, "", xml)
|
||||
result = SortTool(node, ctx).execute({"Input": df})["Output"]
|
||||
assert result["Name"].to_list() == ["Alice", "Bob", "Charlie"]
|
||||
|
||||
def test_descending(self, ctx):
|
||||
from tools.preparation.sort_tool import SortTool
|
||||
df = pl.DataFrame({"Score": [3, 1, 2]})
|
||||
xml = """<Configuration>
|
||||
<SortInfo locale="0">
|
||||
<Field field="Score" order="Descending"/>
|
||||
</SortInfo>
|
||||
</Configuration>"""
|
||||
node = make_node(1, "", xml)
|
||||
result = SortTool(node, ctx).execute({"Input": df})["Output"]
|
||||
assert result["Score"].to_list() == [3, 2, 1]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Unique
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestUnique:
|
||||
def test_unique_and_duplicate(self, ctx):
|
||||
from tools.preparation.unique_tool import UniqueTool
|
||||
df = pl.DataFrame({"Name": ["Alice", "Bob", "Alice", "Charlie", "Bob"]})
|
||||
xml = """<Configuration>
|
||||
<UniqueFields><Field name="Name"/></UniqueFields>
|
||||
</Configuration>"""
|
||||
node = make_node(1, "", xml)
|
||||
result = UniqueTool(node, ctx).execute({"Input": df})
|
||||
assert len(result["Unique"]) == 3
|
||||
assert len(result["Duplicate"]) == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Sample
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSample:
|
||||
def _df(self) -> pl.DataFrame:
|
||||
return pl.DataFrame({"N": list(range(10))})
|
||||
|
||||
def test_first(self, ctx):
|
||||
from tools.preparation.sample_tool import SampleTool
|
||||
xml = "<Configuration><Mode>First</Mode><N>3</N><GroupFields/></Configuration>"
|
||||
node = make_node(1, "", xml)
|
||||
result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
|
||||
assert len(result) == 3
|
||||
assert result["N"].to_list() == [0, 1, 2]
|
||||
|
||||
def test_last(self, ctx):
|
||||
from tools.preparation.sample_tool import SampleTool
|
||||
xml = "<Configuration><Mode>Last</Mode><N>2</N><GroupFields/></Configuration>"
|
||||
node = make_node(1, "", xml)
|
||||
result = SampleTool(node, ctx).execute({"Input": self._df()})["Output"]
|
||||
assert result["N"].to_list() == [8, 9]
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Union
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestUnion:
|
||||
def test_by_name(self, ctx):
|
||||
from tools.join.union_tool import UnionTool
|
||||
df1 = pl.DataFrame({"A": [1], "B": [2]})
|
||||
df2 = pl.DataFrame({"B": [4], "A": [3]})
|
||||
xml = "<Configuration><Mode>Auto</Mode></Configuration>"
|
||||
node = make_node(1, "", xml)
|
||||
result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
|
||||
assert len(result) == 2
|
||||
|
||||
def test_by_position(self, ctx):
|
||||
from tools.join.union_tool import UnionTool
|
||||
df1 = pl.DataFrame({"X": [1], "Y": [2]})
|
||||
df2 = pl.DataFrame({"P": [3], "Q": [4]})
|
||||
xml = "<Configuration><Mode>ByPosition</Mode></Configuration>"
|
||||
node = make_node(1, "", xml)
|
||||
result = UnionTool(node, ctx).execute({"Input1": df1, "Input2": df2})["Output"]
|
||||
assert result.columns == ["X", "Y"]
|
||||
assert len(result) == 2
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Summarize
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestSummarize:
|
||||
def test_group_sum(self, ctx):
|
||||
from tools.transform.summarize_tool import SummarizeTool
|
||||
df = pl.DataFrame({
|
||||
"Region": ["N", "S", "N", "S"],
|
||||
"Sales": [100, 200, 150, 250],
|
||||
})
|
||||
xml = """<Configuration>
|
||||
<SummarizeFields>
|
||||
<SummarizeField field="Region" action="GroupBy" rename="Region"/>
|
||||
<SummarizeField field="Sales" action="Sum" rename="Total"/>
|
||||
</SummarizeFields>
|
||||
</Configuration>"""
|
||||
node = make_node(1, "", xml)
|
||||
result = SummarizeTool(node, ctx).execute({"Input": df})["Output"]
|
||||
assert set(result.columns) == {"Region", "Total"}
|
||||
totals = dict(zip(result["Region"].to_list(), result["Total"].to_list()))
|
||||
assert totals["N"] == 250
|
||||
assert totals["S"] == 450
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# Transpose
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestTranspose:
|
||||
def test_unpivot(self, ctx):
|
||||
from tools.transform.transpose_tool import TransposeTool
|
||||
df = pl.DataFrame({"ID": [1, 2], "Visits": [5, 10], "Spend": [100, 200]})
|
||||
xml = """<Configuration>
|
||||
<KeyFields><Field name="ID"/></KeyFields>
|
||||
<DataFields>
|
||||
<Field name="Visits"/>
|
||||
<Field name="Spend"/>
|
||||
</DataFields>
|
||||
</Configuration>"""
|
||||
node = make_node(1, "", xml)
|
||||
result = TransposeTool(node, ctx).execute({"Input": df})["Output"]
|
||||
assert "Name" in result.columns
|
||||
assert "Value" in result.columns
|
||||
assert len(result) == 4 # 2 rows × 2 data cols
|
||||
|
||||
|
||||
# ---------------------------------------------------------------------------
|
||||
# RecordID
|
||||
# ---------------------------------------------------------------------------
|
||||
|
||||
class TestRecordID:
|
||||
def test_starts_at_one(self, ctx):
|
||||
from tools.preparation.record_id import RecordIDTool
|
||||
df = pl.DataFrame({"Name": ["A", "B", "C"]})
|
||||
xml = """<Configuration>
|
||||
<Field>ID</Field>
|
||||
<StartValue>1</StartValue>
|
||||
<FieldType>Int32</FieldType>
|
||||
</Configuration>"""
|
||||
node = make_node(1, "", xml)
|
||||
result = RecordIDTool(node, ctx).execute({"Input": df})["Output"]
|
||||
assert result["ID"].to_list() == [1, 2, 3]
|
||||
assert result.columns[0] == "ID"
|
||||
|
|
@ -0,0 +1,155 @@
|
|||
"""Unit tests for the Alteryx → DuckDB expression transpiler."""
|
||||
from __future__ import annotations
|
||||
import sys
|
||||
from pathlib import Path
|
||||
import pytest
|
||||
|
||||
PKG = Path(__file__).parent.parent # alteryx_runner/
|
||||
if str(PKG) not in sys.path:
|
||||
sys.path.insert(0, str(PKG))
|
||||
|
||||
from expression.transpiler import transpile, UnsupportedExpressionError
|
||||
|
||||
|
||||
def t(expr: str) -> str:
|
||||
return transpile(expr)
|
||||
|
||||
|
||||
class TestLiterals:
|
||||
def test_number(self):
|
||||
assert t("42") == "42"
|
||||
|
||||
def test_float(self):
|
||||
assert t("3.14") == "3.14"
|
||||
|
||||
def test_string(self):
|
||||
assert t('"hello"') == "'hello'"
|
||||
|
||||
def test_null(self):
|
||||
assert t("NULL()") == "NULL"
|
||||
|
||||
def test_true(self):
|
||||
assert t("True") == "TRUE"
|
||||
|
||||
def test_false(self):
|
||||
assert t("False") == "FALSE"
|
||||
|
||||
|
||||
class TestColumnRef:
|
||||
def test_simple(self):
|
||||
assert t("[CustomerID]") == '"CustomerID"'
|
||||
|
||||
def test_spaces(self):
|
||||
assert t("[First Name]") == '"First Name"'
|
||||
|
||||
|
||||
class TestOperators:
|
||||
def test_eq(self):
|
||||
assert t("[A] == [B]") == '("A" = "B")'
|
||||
|
||||
def test_neq(self):
|
||||
assert t("[A] != [B]") == '("A" <> "B")'
|
||||
|
||||
def test_gt(self):
|
||||
assert t("[Score] > 50") == '("Score" > 50)'
|
||||
|
||||
def test_and(self):
|
||||
sql = t('[A] > 0 AND [B] < 10')
|
||||
assert "AND" in sql
|
||||
|
||||
def test_or(self):
|
||||
sql = t('[A] > 0 OR [B] < 0')
|
||||
assert "OR" in sql
|
||||
|
||||
def test_not(self):
|
||||
sql = t('NOT [IsActive]')
|
||||
assert "NOT" in sql
|
||||
|
||||
def test_bang(self):
|
||||
sql = t('![IsActive]')
|
||||
assert "NOT" in sql
|
||||
|
||||
|
||||
class TestIfThenEndif:
|
||||
def test_simple(self):
|
||||
sql = t('IF [Score] > 50 THEN "Pass" ELSE "Fail" ENDIF')
|
||||
assert "CASE WHEN" in sql
|
||||
assert "'Pass'" in sql
|
||||
assert "'Fail'" in sql
|
||||
|
||||
def test_elseif(self):
|
||||
sql = t('IF [Score] > 90 THEN "A" ELSEIF [Score] > 70 THEN "B" ELSE "C" ENDIF')
|
||||
assert sql.count("WHEN") == 2
|
||||
|
||||
def test_no_else(self):
|
||||
sql = t('IF [Active] == "Y" THEN "Yes" ENDIF')
|
||||
assert "CASE WHEN" in sql
|
||||
|
||||
|
||||
class TestIIF:
|
||||
def test_iif(self):
|
||||
sql = t('IIF([Score] > 50, "Pass", "Fail")')
|
||||
assert "CASE WHEN" in sql
|
||||
|
||||
|
||||
class TestIsNull:
|
||||
def test_isnull_keyword(self):
|
||||
sql = t('IsNull([Field])')
|
||||
assert "IS NULL" in sql
|
||||
|
||||
def test_not_isnull(self):
|
||||
sql = t('!IsNull([Field])')
|
||||
assert "NOT" in sql and "IS NULL" in sql
|
||||
|
||||
|
||||
class TestFunctions:
|
||||
def test_uppercase(self):
|
||||
assert t('Uppercase([Name])') == "UPPER(\"Name\")"
|
||||
|
||||
def test_length(self):
|
||||
assert "LENGTH" in t('Length([Name])')
|
||||
|
||||
def test_left(self):
|
||||
assert "LEFT" in t('Left([Name], 3)')
|
||||
|
||||
def test_round(self):
|
||||
assert "ROUND" in t('Round([Score], 2)')
|
||||
|
||||
def test_abs(self):
|
||||
assert "ABS" in t('ABS([Val])')
|
||||
|
||||
def test_trim(self):
|
||||
assert "TRIM" in t('Trim([Name])')
|
||||
|
||||
def test_nested(self):
|
||||
sql = t('Uppercase(Trim([Name]))')
|
||||
assert "UPPER" in sql
|
||||
assert "TRIM" in sql
|
||||
|
||||
|
||||
class TestArithmetic:
|
||||
def test_add(self):
|
||||
sql = t('[A] + [B]')
|
||||
assert "+" in sql
|
||||
|
||||
def test_multiply(self):
|
||||
sql = t('[A] * [B]')
|
||||
assert "*" in sql
|
||||
|
||||
def test_divide(self):
|
||||
sql = t('[A] / [B]')
|
||||
assert "/" in sql
|
||||
|
||||
def test_complex(self):
|
||||
sql = t('ROUND([Spend] / [Visits], 1)')
|
||||
assert "ROUND" in sql
|
||||
|
||||
|
||||
class TestDateFunctions:
|
||||
def test_datetimenow(self):
|
||||
sql = t('DateTimeNow()')
|
||||
assert "NOW()" in sql
|
||||
|
||||
def test_datetimetoday(self):
|
||||
sql = t('DateTimeToday()')
|
||||
assert "CURRENT_DATE" in sql
|
||||
|
|
@ -0,0 +1,87 @@
|
|||
"""Tool registry: Plugin string → tool class."""
|
||||
from __future__ import annotations
|
||||
from typing import Optional, Type
|
||||
from tools.base import BaseTool
|
||||
|
||||
from tools.inout import InputDataTool, OutputDataTool, TextInputTool, BrowseTool
|
||||
from tools.preparation import (
|
||||
FilterTool, FormulaTool, SelectTool, SortTool, SampleTool,
|
||||
UniqueTool, GenerateRowsTool, MultiRowFormulaTool,
|
||||
MultiFieldFormulaTool, RecordIDTool, AutoFieldTool,
|
||||
)
|
||||
from tools.join import JoinTool, JoinMultipleTool, UnionTool, AppendFieldsTool, FindReplaceTool
|
||||
from tools.parse import DateTimeTool, RegExTool, TextToColumnsTool
|
||||
from tools.transform import SummarizeTool, CrossTabTool, TransposeTool
|
||||
|
||||
|
||||
class _PassthroughTool(BaseTool):
|
||||
def execute(self, inputs):
|
||||
df = next(iter(inputs.values())) if inputs else __import__("polars").DataFrame()
|
||||
return {"Output": df}
|
||||
|
||||
|
||||
class _NullTool(BaseTool):
|
||||
def execute(self, inputs):
|
||||
return {}
|
||||
|
||||
|
||||
_REGISTRY: dict[str, Type[BaseTool]] = {
|
||||
# In/Out
|
||||
"AlteryxBasePluginsGui.DbFileInput.DbFileInput": InputDataTool,
|
||||
"AlteryxBasePluginsGui.DbFileOutput.DbFileOutput": OutputDataTool,
|
||||
"AlteryxBasePluginsGui.TextInput.TextInput": TextInputTool,
|
||||
"AlteryxBasePluginsGui.BrowseV2.BrowseV2": BrowseTool,
|
||||
# Preparation
|
||||
"AlteryxBasePluginsGui.Filter.Filter": FilterTool,
|
||||
"AlteryxBasePluginsGui.Formula.Formula": FormulaTool,
|
||||
"AlteryxBasePluginsGui.AlteryxSelect.AlteryxSelect": SelectTool,
|
||||
"AlteryxBasePluginsGui.Sort.Sort": SortTool,
|
||||
"AlteryxBasePluginsGui.Sample.Sample": SampleTool,
|
||||
"AlteryxBasePluginsGui.Unique.Unique": UniqueTool,
|
||||
"AlteryxBasePluginsGui.GenerateRows.GenerateRows": GenerateRowsTool,
|
||||
"AlteryxBasePluginsGui.MultiRowFormula.MultiRowFormula": MultiRowFormulaTool,
|
||||
"AlteryxBasePluginsGui.MultiFieldFormula.MultiFieldFormula": MultiFieldFormulaTool,
|
||||
"AlteryxBasePluginsGui.RecordID.RecordID": RecordIDTool,
|
||||
"AlteryxBasePluginsGui.AutoField.AutoField": AutoFieldTool,
|
||||
# Join
|
||||
"AlteryxBasePluginsGui.Join.Join": JoinTool,
|
||||
"AlteryxBasePluginsGui.JoinMultiple.JoinMultiple": JoinMultipleTool,
|
||||
"AlteryxBasePluginsGui.Union.Union": UnionTool,
|
||||
"AlteryxBasePluginsGui.AppendFields.AppendFields": AppendFieldsTool,
|
||||
"AlteryxBasePluginsGui.FindReplace.FindReplace": FindReplaceTool,
|
||||
# Parse
|
||||
"AlteryxBasePluginsGui.DateTime.DateTime": DateTimeTool,
|
||||
"AlteryxBasePluginsGui.RegEx.RegEx": RegExTool,
|
||||
"AlteryxBasePluginsGui.TextToColumns.TextToColumns": TextToColumnsTool,
|
||||
# Transform
|
||||
"AlteryxSpatialPluginsGui.Summarize.Summarize": SummarizeTool,
|
||||
"AlteryxBasePluginsGui.CrossTab.CrossTab": CrossTabTool,
|
||||
"AlteryxBasePluginsGui.Transpose.Transpose": TransposeTool,
|
||||
# Documentation / no-op
|
||||
"AlteryxGuiToolkit.ToolContainer.ToolContainer": _PassthroughTool,
|
||||
"AlteryxGuiToolkit.TextBox.TextBox": _NullTool,
|
||||
"AlteryxGuiToolkit.Comment.Comment": _NullTool,
|
||||
"AlteryxBasePluginsGui.AlteryxAnnotation.AlteryxAnnotation": _NullTool,
|
||||
}
|
||||
|
||||
|
||||
def get_tool_class(plugin: str) -> Optional[Type[BaseTool]]:
|
||||
"""Return the tool class for a given Plugin string, or None if unsupported."""
|
||||
if not plugin:
|
||||
return None
|
||||
cls = _REGISTRY.get(plugin)
|
||||
if cls is not None:
|
||||
return cls
|
||||
# Partial match fallback — useful for minor version differences in plugin names
|
||||
for key, cls in _REGISTRY.items():
|
||||
if plugin.endswith(key.split(".")[-1]) or key in plugin:
|
||||
return cls
|
||||
return None
|
||||
|
||||
|
||||
def register_tool(plugin: str, cls: Type[BaseTool]) -> None:
|
||||
"""Register a custom tool class for a given Plugin string."""
|
||||
_REGISTRY[plugin] = cls
|
||||
|
||||
|
||||
__all__ = ["get_tool_class", "register_tool", "BaseTool"]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,30 @@
|
|||
from __future__ import annotations
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Dict, Optional
|
||||
import polars as pl
|
||||
import xml.etree.ElementTree as ET
|
||||
from engine.graph import NodeDef
|
||||
from engine.context import RunContext
|
||||
|
||||
|
||||
class BaseTool(ABC):
|
||||
def __init__(self, node: NodeDef, ctx: RunContext):
|
||||
self.node = node
|
||||
self.ctx = ctx
|
||||
self.config: Optional[ET.Element] = node.config
|
||||
|
||||
@abstractmethod
|
||||
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
|
||||
"""Execute the tool and return named output DataFrames."""
|
||||
|
||||
def _cfg(self, xpath: str, default: Optional[str] = None) -> Optional[str]:
|
||||
el = self.config.find(xpath) if self.config is not None else None
|
||||
return el.text if el is not None else default
|
||||
|
||||
def _cfg_attr(self, xpath: str, attr: str, default: Optional[str] = None) -> Optional[str]:
|
||||
el = self.config.find(xpath) if self.config is not None else None
|
||||
return el.attrib.get(attr, default) if el is not None else default
|
||||
|
||||
def _cfg_text(self, xpath: str, default: str = "") -> str:
|
||||
val = self._cfg(xpath, default)
|
||||
return val if val is not None else default
|
||||
|
|
@ -0,0 +1,6 @@
|
|||
from .input_data import InputDataTool
|
||||
from .output_data import OutputDataTool
|
||||
from .text_input import TextInputTool
|
||||
from .browse import BrowseTool
|
||||
|
||||
__all__ = ["InputDataTool", "OutputDataTool", "TextInputTool", "BrowseTool"]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
|
|
@ -0,0 +1,14 @@
|
|||
from __future__ import annotations
|
||||
from typing import Dict
|
||||
import polars as pl
|
||||
from tools.base import BaseTool
|
||||
|
||||
|
||||
class BrowseTool(BaseTool):
|
||||
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
|
||||
df = inputs.get("Input", pl.DataFrame())
|
||||
if self.ctx.verbose:
|
||||
print(f"\n[Browse ToolID={self.node.tool_id}]")
|
||||
print(f" rows={len(df)} cols={df.columns}")
|
||||
print(df.head(20))
|
||||
return {"Output": df}
|
||||
|
|
@ -0,0 +1,99 @@
|
|||
from __future__ import annotations
|
||||
from typing import Dict, Optional
|
||||
import xml.etree.ElementTree as ET
|
||||
import polars as pl
|
||||
from tools.base import BaseTool
|
||||
|
||||
|
||||
class InputDataTool(BaseTool):
|
||||
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
|
||||
if self.config is None:
|
||||
return {"Output": pl.DataFrame()}
|
||||
|
||||
file_el = self.config.find("File")
|
||||
if file_el is None or not file_el.text:
|
||||
return {"Output": pl.DataFrame()}
|
||||
|
||||
raw_path = (file_el.text or "").strip()
|
||||
fmt = int(file_el.attrib.get("FileFormat", "0"))
|
||||
record_limit_str = file_el.attrib.get("RecordLimit", "").strip()
|
||||
limit = int(record_limit_str) if record_limit_str else None
|
||||
|
||||
opts = self.config.find("FormatSpecificOptions")
|
||||
if opts is None:
|
||||
opts = ET.Element("x")
|
||||
|
||||
path_str, sheet = self._parse_path(raw_path)
|
||||
resolved = self.ctx.resolve_path(path_str)
|
||||
|
||||
df = self._read(str(resolved), fmt, sheet, opts)
|
||||
|
||||
# Trim whitespace from string columns (matches Alteryx behavior)
|
||||
for col in df.columns:
|
||||
if df[col].dtype == pl.String:
|
||||
df = df.with_columns(pl.col(col).str.strip_chars())
|
||||
|
||||
if limit:
|
||||
df = df.head(limit)
|
||||
|
||||
return {"Output": df}
|
||||
|
||||
def _parse_path(self, raw: str) -> tuple[str, Optional[str]]:
|
||||
if "|||" in raw:
|
||||
path, sheet = raw.split("|||", 1)
|
||||
return path.strip(), sheet.strip().strip("`").rstrip("$")
|
||||
return raw.strip(), None
|
||||
|
||||
def _read(
|
||||
self,
|
||||
path: str,
|
||||
fmt: int,
|
||||
sheet: Optional[str],
|
||||
opts: ET.Element,
|
||||
) -> pl.DataFrame:
|
||||
if fmt in (0, 6): # CSV / delimited
|
||||
delim = opts.findtext("Delimeter") or opts.findtext("Delimiter") or ","
|
||||
header_text = opts.findtext("HeaderRow") or "True"
|
||||
has_header = header_text.strip().lower() in ("true", "1", "yes")
|
||||
import_line = int(opts.findtext("ImportLine") or "1")
|
||||
skip = max(0, import_line - 1)
|
||||
return pl.read_csv(
|
||||
path,
|
||||
separator=delim,
|
||||
has_header=has_header,
|
||||
skip_rows=skip,
|
||||
infer_schema_length=10000,
|
||||
ignore_errors=True,
|
||||
)
|
||||
|
||||
if fmt == 25: # Excel
|
||||
read_header = (opts.findtext("FirstRowData") or "False").lower() != "true"
|
||||
import_line = int(opts.findtext("ImportLine") or "1")
|
||||
skip = max(0, import_line - 1)
|
||||
return pl.read_excel(
|
||||
path,
|
||||
sheet_name=sheet or 0,
|
||||
read_options={"has_header": read_header, "skip_rows": skip},
|
||||
)
|
||||
|
||||
if fmt == 2: # Parquet
|
||||
return pl.read_parquet(path)
|
||||
|
||||
if fmt == 19: # YXDB
|
||||
try:
|
||||
import yxdb
|
||||
reader = yxdb.open_file(path)
|
||||
rows = list(reader)
|
||||
if rows:
|
||||
return pl.DataFrame(rows)
|
||||
return pl.DataFrame()
|
||||
except ImportError:
|
||||
raise NotImplementedError(
|
||||
"YXDB format requires the 'yxdb' package: pip install yxdb"
|
||||
)
|
||||
|
||||
if fmt == 56: # JSON
|
||||
return pl.read_json(path)
|
||||
|
||||
# Fallback: try CSV
|
||||
return pl.read_csv(path, infer_schema_length=10000, ignore_errors=True)
|
||||
|
|
@ -0,0 +1,88 @@
|
|||
from __future__ import annotations
|
||||
from pathlib import Path
|
||||
from typing import Dict
|
||||
import xml.etree.ElementTree as ET
|
||||
import polars as pl
|
||||
from tools.base import BaseTool
|
||||
|
||||
|
||||
class OutputDataTool(BaseTool):
|
||||
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
|
||||
df = inputs.get("Input", pl.DataFrame())
|
||||
if self.config is None or df.is_empty():
|
||||
return {}
|
||||
|
||||
file_el = self.config.find("File")
|
||||
if file_el is None or not file_el.text:
|
||||
return {}
|
||||
|
||||
raw_path = (file_el.text or "").strip()
|
||||
fmt = int(file_el.attrib.get("FileFormat", "0"))
|
||||
max_records_str = (file_el.attrib.get("MaxRecords") or "").strip()
|
||||
max_records = int(max_records_str) if max_records_str else None
|
||||
|
||||
opts = self.config.find("FormatSpecificOptions")
|
||||
if opts is None:
|
||||
opts = ET.Element("x")
|
||||
|
||||
multi_el = self.config.find("MultiFile")
|
||||
multi_file = (multi_el.attrib.get("value", "False") if multi_el is not None else "False") == "True"
|
||||
multi_field = (self.config.findtext("MultiFileField") or "").strip()
|
||||
multi_type = (self.config.findtext("MultiFileType") or "Suffix").strip()
|
||||
keep_field = (self.config.findtext("KeepField") or "True").strip().lower() == "true"
|
||||
|
||||
out_path = self.ctx.resolve_output_path(raw_path)
|
||||
out_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
if multi_file and multi_field and multi_field in df.columns:
|
||||
for val in df[multi_field].unique().to_list():
|
||||
part = df.filter(pl.col(multi_field) == val)
|
||||
if not keep_field:
|
||||
part = part.drop(multi_field)
|
||||
part_path = self._multi_path(out_path, str(val), multi_type)
|
||||
self._write(part, part_path, fmt, opts)
|
||||
elif max_records:
|
||||
chunk_num = 0
|
||||
for i in range(0, len(df), max_records):
|
||||
chunk = df.slice(i, max_records)
|
||||
chunk_path = out_path if chunk_num == 0 else out_path.with_stem(
|
||||
f"{out_path.stem}_{chunk_num}"
|
||||
)
|
||||
self._write(chunk, chunk_path, fmt, opts)
|
||||
chunk_num += 1
|
||||
else:
|
||||
self._write(df, out_path, fmt, opts)
|
||||
|
||||
if self.ctx.verbose:
|
||||
print(f"[Output] Wrote {len(df)} rows → {out_path}")
|
||||
|
||||
return {}
|
||||
|
||||
def _multi_path(self, base: Path, value: str, mode: str) -> Path:
|
||||
safe = "".join(c if c.isalnum() or c in "-_" else "_" for c in value)
|
||||
if mode == "Suffix":
|
||||
return base.with_stem(f"{base.stem}_{safe}")
|
||||
return base.with_stem(f"{safe}_{base.stem}")
|
||||
|
||||
def _write(self, df: pl.DataFrame, path: Path, fmt: int, opts: ET.Element) -> None:
|
||||
if fmt in (0, 6): # CSV
|
||||
delim = opts.findtext("Delimeter") or opts.findtext("Delimiter") or ","
|
||||
# Accept both HeaderRow and Headers attribute names
|
||||
header_val = (
|
||||
opts.findtext("HeaderRow")
|
||||
or opts.findtext("Headers")
|
||||
or "True"
|
||||
)
|
||||
header = header_val.lower() != "false"
|
||||
df.write_csv(str(path), separator=delim, include_header=header)
|
||||
elif fmt == 25: # Excel
|
||||
df.write_excel(str(path))
|
||||
elif fmt == 2: # Parquet
|
||||
df.write_parquet(str(path))
|
||||
elif fmt == 19: # YXDB — fall back to Parquet
|
||||
fallback = path.with_suffix(".parquet")
|
||||
df.write_parquet(str(fallback))
|
||||
if self.ctx.verbose:
|
||||
print(f"[Output] YXDB write not supported; wrote Parquet to {fallback}")
|
||||
else:
|
||||
df.write_csv(str(path))
|
||||
|
|
@ -0,0 +1,35 @@
|
|||
from __future__ import annotations
|
||||
from typing import Dict, Optional
|
||||
import polars as pl
|
||||
from tools.base import BaseTool
|
||||
|
||||
|
||||
class TextInputTool(BaseTool):
|
||||
def execute(self, inputs: Dict[str, pl.DataFrame]) -> Dict[str, pl.DataFrame]:
|
||||
if self.config is None:
|
||||
return {"Output": pl.DataFrame()}
|
||||
|
||||
fields = [
|
||||
f.attrib["name"]
|
||||
for f in self.config.findall("Fields/Field")
|
||||
]
|
||||
if not fields:
|
||||
return {"Output": pl.DataFrame()}
|
||||
|
||||
rows: list[dict] = []
|
||||
for r in self.config.findall("Data/r"):
|
||||
cells = r.findall("c")
|
||||
row: dict[str, Optional[str]] = {}
|
||||
for i, col_name in enumerate(fields):
|
||||
el = cells[i] if i < len(cells) else None
|
||||
text: Optional[str] = el.text if el is not None else None
|
||||
# Empty text in XML → NULL
|
||||
row[col_name] = text if text else None
|
||||
rows.append(row)
|
||||
|
||||
if not rows:
|
||||
schema = {f: pl.String for f in fields}
|
||||
return {"Output": pl.DataFrame(schema=schema)}
|
||||
|
||||
df = pl.DataFrame(rows, schema={f: pl.String for f in fields})
|
||||
return {"Output": df}
|
||||
|
|
@ -0,0 +1,7 @@
|
|||
from .join_tool import JoinTool
|
||||
from .join_multiple import JoinMultipleTool
|
||||
from .union_tool import UnionTool
|
||||
from .append_fields import AppendFieldsTool
|
||||
from .find_replace import FindReplaceTool
|
||||
|
||||
__all__ = ["JoinTool", "JoinMultipleTool", "UnionTool", "AppendFieldsTool", "FindReplaceTool"]
|
||||
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Some files were not shown because too many files have changed in this diff Show More
Loading…
Reference in New Issue