This repository has been archived on 2026-06-13. You can view files and clone it, but cannot push or open issues/pull-requests.
Polaryx/scratchpad.ipynb

351 lines
11 KiB
Plaintext

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Setup"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [],
"source": [
"# Open Alteryx XML into a string \n",
"\n",
"import polars as pl \n",
"import xml.etree.ElementTree as ET\n",
"\n",
"xml_file_path = \"./SimpleWorkflow/SimpleWorkflow.yxmd\"\n",
"tree = ET.parse(xml_file_path)\n",
"root = tree.getroot()\n",
"\n",
"xml_string = ET.tostring(root, encoding='unicode')\n"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [],
"source": [
"# Parse out nodes (tool data) into a dict\n",
"\n",
"def extract_tool_id_and_contents(xml_string):\n",
" root = ET.fromstring(xml_string)\n",
" results = []\n",
" for node in root.iter('Node'):\n",
" tool_id = node.attrib.get('ToolID')\n",
" content = ET.tostring(node, encoding='unicode')\n",
" results.append((tool_id, content))\n",
" return results\n",
"\n",
"\n",
"results = extract_tool_id_and_contents(xml_string)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Tool Functions"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [],
"source": [
"def selectTool(df: pl.DataFrame, col_specs: dict):\n",
" \"\"\"\n",
" Reshape a Polars DataFrame by renaming and retyping columns according to the provided dictionary.\n",
"\n",
" Args:\n",
" df (pl.DataFrame): The input Polars DataFrame.\n",
" col_specs (dict): A dictionary where keys are column names in the original DataFrame,\n",
" and values are tuples containing the new column name and data type.\n",
"\n",
" Returns:\n",
" pl.DataFrame: The reshaped Polars DataFrame with renamed and retyped columns.\n",
" \"\"\"\n",
" for old_name, (new_name, dt) in col_specs.items():\n",
" df = df.rename({old_name: new_name})\n",
" if dt is not None:\n",
" df = df.with_column(pl.col(old_name).cast(dt))\n",
" return df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Parsing Tool Config data"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {},
"outputs": [],
"source": [
"def TextInputToDf(xml_string):\n",
" # Get XML for a Text input tool\n",
" root = ET.fromstring(results[3][1])\n",
" # Extract the field names\n",
" fields = [field.attrib['name'] for field in root.findall(\".//Fields/Field\")]\n",
" # Extract the data rows\n",
" data_rows = [[int(c.text) for c in row.findall(\"c\")] for row in root.findall(\".//Data/r\")]\n",
" # Create the polars dataframe\n",
" df = pl.DataFrame(data_rows, fields)\n",
" # Display the dataframe\n",
" return df"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"1 TextInput\n",
"shape: (3, 2)\n",
"┌──────────┬──────────┐\n",
"│ Column 3 ┆ Column 4 │\n",
"│ --- ┆ --- │\n",
"│ i64 ┆ i64 │\n",
"╞══════════╪══════════╡\n",
"│ 5 ┆ 8 │\n",
"│ 6 ┆ 9 │\n",
"│ 7 ┆ 10 │\n",
"└──────────┴──────────┘\n",
"2 AlteryxSelect\n",
"3 BrowseV2\n",
"4 TextInput\n",
"shape: (3, 2)\n",
"┌──────────┬──────────┐\n",
"│ Column 3 ┆ Column 4 │\n",
"│ --- ┆ --- │\n",
"│ i64 ┆ i64 │\n",
"╞══════════╪══════════╡\n",
"│ 5 ┆ 8 │\n",
"│ 6 ┆ 9 │\n",
"│ 7 ┆ 10 │\n",
"└──────────┴──────────┘\n",
"5 AlteryxSelect\n",
"6 Formula\n",
"7 Join\n",
"10 BrowseV2\n",
"11 Formula\n",
"12 Sample\n",
"13 BrowseV2\n",
"14 DbFileOutput\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"C:\\Users\\casey.morter\\AppData\\Local\\Temp\\ipykernel_4012\\3571569777.py:9: DataOrientationWarning: Row orientation inferred during DataFrame construction. Explicitly specify the orientation by passing `orient=\"row\"` to silence this warning.\n",
" df = pl.DataFrame(data_rows, fields)\n"
]
}
],
"source": [
"# Parse all tools in tools dict\n",
"for tool in results:\n",
"\n",
" ToolID = tool[0]\n",
" ToolXML = tool[1]\n",
" ToolType = ET.fromstring(ToolXML).find(\".//GuiSettings\").attrib['Plugin'].split(\".\")[2]\n",
"\n",
" print(ToolID, ToolType)\n",
"\n",
" if ToolType == 'TextInput':\n",
" print(TextInputToDf(ToolXML))"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"# Check out a tool\n",
"# 0 = TextInput\n",
"# 4 = select with rename\n",
"tool_xml = results[4][1]"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'c:\\\\Users\\\\casey.morter\\\\OneDrive - JLL\\\\Documents\\\\01 Workspace\\\\01 Python\\\\Polaryx'"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%pwd"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"ename": "KeyError",
"evalue": "'type'",
"output_type": "error",
"traceback": [
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[1;32mIn[31], line 16\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m field \u001b[38;5;129;01min\u001b[39;00m root\u001b[38;5;241m.\u001b[39mfindall(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.//SelectFields/SelectField\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m 15\u001b[0m field_name \u001b[38;5;241m=\u001b[39m field\u001b[38;5;241m.\u001b[39mattrib[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfield\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m---> 16\u001b[0m field_type \u001b[38;5;241m=\u001b[39m \u001b[43mfield\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrib\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtype\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 19\u001b[0m field_rename \u001b[38;5;241m=\u001b[39m field\u001b[38;5;241m.\u001b[39mattrib[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrename\u001b[39m\u001b[38;5;124m'\u001b[39m]\n",
"\u001b[1;31mKeyError\u001b[0m: 'type'"
]
}
],
"source": [
"col_specs = {\n",
" \"A\": (\"x\", int, False),\n",
" \"B\": (\"y\", str, False),\n",
" \"D\": (None, None, True) # drop this column\n",
"}\n",
"\n",
"\n",
"\n",
"# Parse the XML\n",
"root = ET.fromstring(tool_xml)\n",
"\n",
"dict_SelectTool = {}\n",
"\n",
"for field in root.findall(\".//SelectFields/SelectField\"):\n",
" field_name = field.attrib['field']\n",
"\n",
" try:\n",
" field_type = field.attrib['type']\n",
" except:\n",
" field_type = None\n",
" \n",
" try:\n",
" field_rename = field.attrib['rename']\n",
" except:\n",
" field_rename = None\n",
"\n",
"\n",
"\n",
"\n",
" dict_SelectTool[field_name] = (None, None, field_rename)\n",
" \n",
"\n",
" # dict_SelectTool['field']\n",
"\n",
" # if field.attrib['field'] != '*Unknown':\n",
" # field.attrib['selected'], \"Type: \", field.attrib['size']\n",
"\n",
"\n",
"\n",
" \n",
" # print(field.attrib['field'], \"| Selected:\", field.attrib['selected'], \"Type: \", field.attrib['size'])\n",
"\n",
"dict_SelectTool"
]
},
{
"cell_type": "code",
"execution_count": 108,
"metadata": {},
"outputs": [],
"source": [
"gui_settings_text = root.find(\".//GuiSettings\")"
]
},
{
"cell_type": "code",
"execution_count": 114,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"'AlteryxBasePluginsGui.TextInput.TextInput'"
]
},
"execution_count": 114,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"gui_settings_text.attrib['Plugin']"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import polars as pl\n",
"\n",
"def reshape_polars_df(df: pl.DataFrame, col_specs: dict):\n",
" \"\"\"\n",
" Reshape a Polars DataFrame by renaming and retyping columns according to the provided dictionary.\n",
"\n",
" Args:\n",
" df (pl.DataFrame): The input Polars DataFrame.\n",
" col_specs (dict): A dictionary where keys are column names in the original DataFrame,\n",
" and values are tuples containing the new column name, data type, and a boolean indicating whether\n",
" the column should be dropped or not.\n",
"\n",
" Returns:\n",
" pl.DataFrame: The reshaped Polars DataFrame with renamed and retyped columns.\n",
" \"\"\"\n",
" for old_name, (new_name, dt, drop) in col_specs.items():\n",
" if drop:\n",
" df = df.drop(old_name)\n",
" else:\n",
" df = df.rename({old_name: new_name})\n",
" if dt is not None:\n",
" df = df.with_column(pl.col(old_name).cast(dt))\n",
" return df\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "polaryx",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}