commit 91686ed5f772001b91dff7b638e7551aa5ae7933 Author: Casey Date: Sat Aug 10 09:24:22 2024 +1000 adding old files diff --git a/SimpleWorkflow/SimpleWorkflow.yxmd b/SimpleWorkflow/SimpleWorkflow.yxmd new file mode 100644 index 0000000..a9670e9 --- /dev/null +++ b/SimpleWorkflow/SimpleWorkflow.yxmd @@ -0,0 +1,377 @@ + + + + + + + + + + + + + + + + + 1 + 4 + + + 2 + 5 + + + 3 + 6 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + C:\Users\casey.morter\AppData\Local\Temp\Engine_1076_e92ddd5f524045a79cff90fef659b528_\Engine_33528_b9fe906502c14eb3ad38d38f04c81ab2_.yxdb + + + + + + + + + + + + + + + + + + + + + + + 5 + 8 + + + 6 + 9 + + + 7 + 10 + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + C:\Users\casey.morter\AppData\Local\Temp\Engine_1076_e92ddd5f524045a79cff90fef659b528_\Engine_33528_96b3fed5c185423dafa9c8e54e621b39_.yxdb + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + First + 1 + + + + + First 1 + + + + + + + + + + + + C:\Users\casey.morter\AppData\Local\Temp\Engine_1076_e92ddd5f524045a79cff90fef659b528_\Engine_33528_7a751c4879e24850ab691e9dd40c3227_.yxdb + + + + + + + + + + + + + + + + + + + + + + + + .\SimpleWorkflowOut.csv + + + CRLF + , + False + True + 28591 + True + + + + + + SimpleWorkflowOut.csv + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + Horizontal + + + SimpleWorkflow + + + + + + + + + + + + + + + + + + + + + + + + + \ No newline at end of file diff --git a/SimpleWorkflow/SimpleWorkflowOut.csv b/SimpleWorkflow/SimpleWorkflowOut.csv new file mode 100644 index 0000000..65b150c --- /dev/null +++ b/SimpleWorkflow/SimpleWorkflowOut.csv @@ -0,0 +1,4 @@ +Col_3_renamed,Right_Column 3,Right_Column 4 +5,5,15 +6,6,16 +7,7,17 diff --git a/scratchpad.ipynb b/scratchpad.ipynb new file mode 100644 index 0000000..641976e --- /dev/null +++ b/scratchpad.ipynb @@ -0,0 +1,350 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Setup" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "# Open Alteryx XML into a string \n", + "\n", + "import polars as pl \n", + "import xml.etree.ElementTree as ET\n", + "\n", + "xml_file_path = \"./SimpleWorkflow/SimpleWorkflow.yxmd\"\n", + "tree = ET.parse(xml_file_path)\n", + "root = tree.getroot()\n", + "\n", + "xml_string = ET.tostring(root, encoding='unicode')\n" + ] + }, + { + "cell_type": "code", + "execution_count": 24, + "metadata": {}, + "outputs": [], + "source": [ + "# Parse out nodes (tool data) into a dict\n", + "\n", + "def extract_tool_id_and_contents(xml_string):\n", + " root = ET.fromstring(xml_string)\n", + " results = []\n", + " for node in root.iter('Node'):\n", + " tool_id = node.attrib.get('ToolID')\n", + " content = ET.tostring(node, encoding='unicode')\n", + " results.append((tool_id, content))\n", + " return results\n", + "\n", + "\n", + "results = extract_tool_id_and_contents(xml_string)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Tool Functions" + ] + }, + { + "cell_type": "code", + "execution_count": 25, + "metadata": {}, + "outputs": [], + "source": [ + "def selectTool(df: pl.DataFrame, col_specs: dict):\n", + " \"\"\"\n", + " Reshape a Polars DataFrame by renaming and retyping columns according to the provided dictionary.\n", + "\n", + " Args:\n", + " df (pl.DataFrame): The input Polars DataFrame.\n", + " col_specs (dict): A dictionary where keys are column names in the original DataFrame,\n", + " and values are tuples containing the new column name and data type.\n", + "\n", + " Returns:\n", + " pl.DataFrame: The reshaped Polars DataFrame with renamed and retyped columns.\n", + " \"\"\"\n", + " for old_name, (new_name, dt) in col_specs.items():\n", + " df = df.rename({old_name: new_name})\n", + " if dt is not None:\n", + " df = df.with_column(pl.col(old_name).cast(dt))\n", + " return df" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Parsing Tool Config data" + ] + }, + { + "cell_type": "code", + "execution_count": 26, + "metadata": {}, + "outputs": [], + "source": [ + "def TextInputToDf(xml_string):\n", + " # Get XML for a Text input tool\n", + " root = ET.fromstring(results[3][1])\n", + " # Extract the field names\n", + " fields = [field.attrib['name'] for field in root.findall(\".//Fields/Field\")]\n", + " # Extract the data rows\n", + " data_rows = [[int(c.text) for c in row.findall(\"c\")] for row in root.findall(\".//Data/r\")]\n", + " # Create the polars dataframe\n", + " df = pl.DataFrame(data_rows, fields)\n", + " # Display the dataframe\n", + " return df" + ] + }, + { + "cell_type": "code", + "execution_count": 22, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "1 TextInput\n", + "shape: (3, 2)\n", + "┌──────────┬──────────┐\n", + "│ Column 3 ┆ Column 4 │\n", + "│ --- ┆ --- │\n", + "│ i64 ┆ i64 │\n", + "╞══════════╪══════════╡\n", + "│ 5 ┆ 8 │\n", + "│ 6 ┆ 9 │\n", + "│ 7 ┆ 10 │\n", + "└──────────┴──────────┘\n", + "2 AlteryxSelect\n", + "3 BrowseV2\n", + "4 TextInput\n", + "shape: (3, 2)\n", + "┌──────────┬──────────┐\n", + "│ Column 3 ┆ Column 4 │\n", + "│ --- ┆ --- │\n", + "│ i64 ┆ i64 │\n", + "╞══════════╪══════════╡\n", + "│ 5 ┆ 8 │\n", + "│ 6 ┆ 9 │\n", + "│ 7 ┆ 10 │\n", + "└──────────┴──────────┘\n", + "5 AlteryxSelect\n", + "6 Formula\n", + "7 Join\n", + "10 BrowseV2\n", + "11 Formula\n", + "12 Sample\n", + "13 BrowseV2\n", + "14 DbFileOutput\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "C:\\Users\\casey.morter\\AppData\\Local\\Temp\\ipykernel_4012\\3571569777.py:9: DataOrientationWarning: Row orientation inferred during DataFrame construction. Explicitly specify the orientation by passing `orient=\"row\"` to silence this warning.\n", + " df = pl.DataFrame(data_rows, fields)\n" + ] + } + ], + "source": [ + "# Parse all tools in tools dict\n", + "for tool in results:\n", + "\n", + " ToolID = tool[0]\n", + " ToolXML = tool[1]\n", + " ToolType = ET.fromstring(ToolXML).find(\".//GuiSettings\").attrib['Plugin'].split(\".\")[2]\n", + "\n", + " print(ToolID, ToolType)\n", + "\n", + " if ToolType == 'TextInput':\n", + " print(TextInputToDf(ToolXML))" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [], + "source": [ + "# Check out a tool\n", + "# 0 = TextInput\n", + "# 4 = select with rename\n", + "tool_xml = results[4][1]" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'c:\\\\Users\\\\casey.morter\\\\OneDrive - JLL\\\\Documents\\\\01 Workspace\\\\01 Python\\\\Polaryx'" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "%pwd" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "ename": "KeyError", + "evalue": "'type'", + "output_type": "error", + "traceback": [ + "\u001b[1;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[1;32mIn[31], line 16\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m field \u001b[38;5;129;01min\u001b[39;00m root\u001b[38;5;241m.\u001b[39mfindall(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.//SelectFields/SelectField\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m 15\u001b[0m field_name \u001b[38;5;241m=\u001b[39m field\u001b[38;5;241m.\u001b[39mattrib[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfield\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m---> 16\u001b[0m field_type \u001b[38;5;241m=\u001b[39m \u001b[43mfield\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrib\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtype\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 19\u001b[0m field_rename \u001b[38;5;241m=\u001b[39m field\u001b[38;5;241m.\u001b[39mattrib[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrename\u001b[39m\u001b[38;5;124m'\u001b[39m]\n", + "\u001b[1;31mKeyError\u001b[0m: 'type'" + ] + } + ], + "source": [ + "col_specs = {\n", + " \"A\": (\"x\", int, False),\n", + " \"B\": (\"y\", str, False),\n", + " \"D\": (None, None, True) # drop this column\n", + "}\n", + "\n", + "\n", + "\n", + "# Parse the XML\n", + "root = ET.fromstring(tool_xml)\n", + "\n", + "dict_SelectTool = {}\n", + "\n", + "for field in root.findall(\".//SelectFields/SelectField\"):\n", + " field_name = field.attrib['field']\n", + "\n", + " try:\n", + " field_type = field.attrib['type']\n", + " except:\n", + " field_type = None\n", + " \n", + " try:\n", + " field_rename = field.attrib['rename']\n", + " except:\n", + " field_rename = None\n", + "\n", + "\n", + "\n", + "\n", + " dict_SelectTool[field_name] = (None, None, field_rename)\n", + " \n", + "\n", + " # dict_SelectTool['field']\n", + "\n", + " # if field.attrib['field'] != '*Unknown':\n", + " # field.attrib['selected'], \"Type: \", field.attrib['size']\n", + "\n", + "\n", + "\n", + " \n", + " # print(field.attrib['field'], \"| Selected:\", field.attrib['selected'], \"Type: \", field.attrib['size'])\n", + "\n", + "dict_SelectTool" + ] + }, + { + "cell_type": "code", + "execution_count": 108, + "metadata": {}, + "outputs": [], + "source": [ + "gui_settings_text = root.find(\".//GuiSettings\")" + ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "'AlteryxBasePluginsGui.TextInput.TextInput'" + ] + }, + "execution_count": 114, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "gui_settings_text.attrib['Plugin']" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import polars as pl\n", + "\n", + "def reshape_polars_df(df: pl.DataFrame, col_specs: dict):\n", + " \"\"\"\n", + " Reshape a Polars DataFrame by renaming and retyping columns according to the provided dictionary.\n", + "\n", + " Args:\n", + " df (pl.DataFrame): The input Polars DataFrame.\n", + " col_specs (dict): A dictionary where keys are column names in the original DataFrame,\n", + " and values are tuples containing the new column name, data type, and a boolean indicating whether\n", + " the column should be dropped or not.\n", + "\n", + " Returns:\n", + " pl.DataFrame: The reshaped Polars DataFrame with renamed and retyped columns.\n", + " \"\"\"\n", + " for old_name, (new_name, dt, drop) in col_specs.items():\n", + " if drop:\n", + " df = df.drop(old_name)\n", + " else:\n", + " df = df.rename({old_name: new_name})\n", + " if dt is not None:\n", + " df = df.with_column(pl.col(old_name).cast(dt))\n", + " return df\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "polaryx", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}