Compare commits
2 Commits
91686ed5f7
...
2374fe0d15
| Author | SHA1 | Date |
|---|---|---|
|
|
2374fe0d15 | |
|
|
b7696e80ab |
Binary file not shown.
|
|
@ -0,0 +1,91 @@
|
||||||
|
anyio==4.4.0
|
||||||
|
argon2-cffi==23.1.0
|
||||||
|
argon2-cffi-bindings==21.2.0
|
||||||
|
arrow==1.3.0
|
||||||
|
asttokens==2.4.1
|
||||||
|
async-lru==2.0.4
|
||||||
|
attrs==24.2.0
|
||||||
|
babel==2.16.0
|
||||||
|
beautifulsoup4==4.12.3
|
||||||
|
bleach==6.1.0
|
||||||
|
certifi==2024.7.4
|
||||||
|
cffi==1.17.0
|
||||||
|
charset-normalizer==3.3.2
|
||||||
|
comm==0.2.2
|
||||||
|
debugpy==1.8.5
|
||||||
|
decorator==5.1.1
|
||||||
|
defusedxml==0.7.1
|
||||||
|
executing==2.0.1
|
||||||
|
fastjsonschema==2.20.0
|
||||||
|
fqdn==1.5.1
|
||||||
|
h11==0.14.0
|
||||||
|
httpcore==1.0.5
|
||||||
|
httpx==0.27.0
|
||||||
|
idna==3.7
|
||||||
|
ipykernel==6.29.5
|
||||||
|
ipython==8.26.0
|
||||||
|
isoduration==20.11.0
|
||||||
|
jedi==0.19.1
|
||||||
|
Jinja2==3.1.4
|
||||||
|
json5==0.9.25
|
||||||
|
jsonpointer==3.0.0
|
||||||
|
jsonschema==4.23.0
|
||||||
|
jsonschema-specifications==2023.12.1
|
||||||
|
jupyter-events==0.10.0
|
||||||
|
jupyter-lsp==2.2.5
|
||||||
|
jupyter_client==8.6.2
|
||||||
|
jupyter_core==5.7.2
|
||||||
|
jupyter_server==2.14.2
|
||||||
|
jupyter_server_terminals==0.5.3
|
||||||
|
jupyterlab==4.2.4
|
||||||
|
jupyterlab_pygments==0.3.0
|
||||||
|
jupyterlab_server==2.27.3
|
||||||
|
MarkupSafe==2.1.5
|
||||||
|
matplotlib-inline==0.1.7
|
||||||
|
mistune==3.0.2
|
||||||
|
nbclient==0.10.0
|
||||||
|
nbconvert==7.16.4
|
||||||
|
nbformat==5.10.4
|
||||||
|
nest-asyncio==1.6.0
|
||||||
|
notebook_shim==0.2.4
|
||||||
|
overrides==7.7.0
|
||||||
|
packaging==24.1
|
||||||
|
pandocfilters==1.5.1
|
||||||
|
parso==0.8.4
|
||||||
|
pexpect==4.9.0
|
||||||
|
platformdirs==4.2.2
|
||||||
|
polars==1.4.1
|
||||||
|
prometheus_client==0.20.0
|
||||||
|
prompt_toolkit==3.0.47
|
||||||
|
psutil==6.0.0
|
||||||
|
ptyprocess==0.7.0
|
||||||
|
pure_eval==0.2.3
|
||||||
|
pycparser==2.22
|
||||||
|
Pygments==2.18.0
|
||||||
|
python-dateutil==2.9.0.post0
|
||||||
|
python-json-logger==2.0.7
|
||||||
|
PyYAML==6.0.2
|
||||||
|
pyzmq==26.1.0
|
||||||
|
referencing==0.35.1
|
||||||
|
requests==2.32.3
|
||||||
|
rfc3339-validator==0.1.4
|
||||||
|
rfc3986-validator==0.1.1
|
||||||
|
rpds-py==0.20.0
|
||||||
|
Send2Trash==1.8.3
|
||||||
|
setuptools==72.1.0
|
||||||
|
six==1.16.0
|
||||||
|
sniffio==1.3.1
|
||||||
|
soupsieve==2.5
|
||||||
|
stack-data==0.6.3
|
||||||
|
terminado==0.18.1
|
||||||
|
tinycss2==1.3.0
|
||||||
|
tornado==6.4.1
|
||||||
|
traitlets==5.14.3
|
||||||
|
types-python-dateutil==2.9.0.20240316
|
||||||
|
uri-template==1.3.0
|
||||||
|
urllib3==2.2.2
|
||||||
|
wcwidth==0.2.13
|
||||||
|
webcolors==24.6.0
|
||||||
|
webencodings==0.5.1
|
||||||
|
websocket-client==1.8.0
|
||||||
|
wheel==0.43.0
|
||||||
404
scratchpad.ipynb
404
scratchpad.ipynb
|
|
@ -9,12 +9,11 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 23,
|
"execution_count": 293,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Open Alteryx XML into a string \n",
|
"# Open Alteryx XML into a string \n",
|
||||||
"\n",
|
|
||||||
"import polars as pl \n",
|
"import polars as pl \n",
|
||||||
"import xml.etree.ElementTree as ET\n",
|
"import xml.etree.ElementTree as ET\n",
|
||||||
"\n",
|
"\n",
|
||||||
|
|
@ -27,12 +26,11 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 24,
|
"execution_count": 294,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"# Parse out nodes (tool data) into a dict\n",
|
"# Parse out nodes (tool data) into a dict\n",
|
||||||
"\n",
|
|
||||||
"def extract_tool_id_and_contents(xml_string):\n",
|
"def extract_tool_id_and_contents(xml_string):\n",
|
||||||
" root = ET.fromstring(xml_string)\n",
|
" root = ET.fromstring(xml_string)\n",
|
||||||
" results = []\n",
|
" results = []\n",
|
||||||
|
|
@ -55,27 +53,41 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 25,
|
"execution_count": 254,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def selectTool(df: pl.DataFrame, col_specs: dict):\n",
|
"def tool_select(col_spec: dict):\n",
|
||||||
" \"\"\"\n",
|
" \"\"\" Generates select tool code\"\"\"\n",
|
||||||
" Reshape a Polars DataFrame by renaming and retyping columns according to the provided dictionary.\n",
|
" dynamic_code = \"df_output = df.with_columns(\\n\"\n",
|
||||||
|
" dynamic_code_suffix = ''\n",
|
||||||
|
" for old_name, (new_name, type, selected) in col_spec.items():\n",
|
||||||
"\n",
|
"\n",
|
||||||
" Args:\n",
|
" if old_name == '*Unknown':\n",
|
||||||
" df (pl.DataFrame): The input Polars DataFrame.\n",
|
" break \n",
|
||||||
" col_specs (dict): A dictionary where keys are column names in the original DataFrame,\n",
|
" \n",
|
||||||
" and values are tuples containing the new column name and data type.\n",
|
" if new_name:\n",
|
||||||
|
" alias = f\".alias('{new_name}')\"\n",
|
||||||
|
" dynamic_code_suffix += f\"df_output = df_output.drop(f'{old_name}')\\n\"\n",
|
||||||
|
" else:\n",
|
||||||
|
" alias = ''\n",
|
||||||
"\n",
|
"\n",
|
||||||
" Returns:\n",
|
" if type is not None:\n",
|
||||||
" pl.DataFrame: The reshaped Polars DataFrame with renamed and retyped columns.\n",
|
" if 'Int' in type:\n",
|
||||||
" \"\"\"\n",
|
" cast = f\".cast(pl.{pl.Int64})\"\n",
|
||||||
" for old_name, (new_name, dt) in col_specs.items():\n",
|
" elif 'String' in type:\n",
|
||||||
" df = df.rename({old_name: new_name})\n",
|
" cast = f\".cast(pl.{pl.String})\"\n",
|
||||||
" if dt is not None:\n",
|
" else:\n",
|
||||||
" df = df.with_column(pl.col(old_name).cast(dt))\n",
|
" cast = ''\n",
|
||||||
" return df"
|
"\n",
|
||||||
|
" if selected != 'False':\n",
|
||||||
|
" dynamic_code += f\"df.select(pl.col(f'{old_name}'){cast}{alias}),\\n\"\n",
|
||||||
|
" else:\n",
|
||||||
|
" dynamic_code_suffix += f\"df_output = df_output.drop(f'{old_name}')\\n\"\n",
|
||||||
|
"\n",
|
||||||
|
" dynamic_code += \")\\n\" + dynamic_code_suffix\n",
|
||||||
|
" \n",
|
||||||
|
" return dynamic_code"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|
@ -87,26 +99,139 @@
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 26,
|
"execution_count": 295,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": [
|
||||||
"def TextInputToDf(xml_string):\n",
|
"def input_textInput(xml_string):\n",
|
||||||
" # Get XML for a Text input tool\n",
|
" # Get XML for a Text input tool\n",
|
||||||
" root = ET.fromstring(results[3][1])\n",
|
" root = ET.fromstring(xml_string)\n",
|
||||||
" # Extract the field names\n",
|
" # Extract the field names\n",
|
||||||
" fields = [field.attrib['name'] for field in root.findall(\".//Fields/Field\")]\n",
|
" fields = [field.attrib['name'] for field in root.findall(\".//Fields/Field\")]\n",
|
||||||
" # Extract the data rows\n",
|
" # Extract the data rows\n",
|
||||||
" data_rows = [[int(c.text) for c in row.findall(\"c\")] for row in root.findall(\".//Data/r\")]\n",
|
" data_rows = [[int(c.text) for c in row.findall(\"c\")] for row in root.findall(\".//Data/r\")]\n",
|
||||||
" # Create the polars dataframe\n",
|
" # Create the polars dataframe\n",
|
||||||
" df = pl.DataFrame(data_rows, fields)\n",
|
" df = pl.DataFrame(data_rows, fields, orient=\"row\")\n",
|
||||||
" # Display the dataframe\n",
|
" # Display the dataframe\n",
|
||||||
" return df"
|
" return df"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 22,
|
"execution_count": 296,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [],
|
||||||
|
"source": [
|
||||||
|
"def getConf_Select(xml_string):\n",
|
||||||
|
" root = ET.fromstring(xml_string)\n",
|
||||||
|
" dict_SelectTool = {}\n",
|
||||||
|
"\n",
|
||||||
|
" for field in root.findall(\".//SelectFields/SelectField\"):\n",
|
||||||
|
" field_name = field.attrib['field']\n",
|
||||||
|
" field_selected = field.attrib['selected']\n",
|
||||||
|
"\n",
|
||||||
|
" try:\n",
|
||||||
|
" field_type = field.attrib['type']\n",
|
||||||
|
" except:\n",
|
||||||
|
" field_type = None\n",
|
||||||
|
" try:\n",
|
||||||
|
" field_rename = field.attrib['rename']\n",
|
||||||
|
" except:\n",
|
||||||
|
" field_rename = None\n",
|
||||||
|
"\n",
|
||||||
|
" dict_SelectTool[field_name] = (field_rename, field_type, field_selected)\n",
|
||||||
|
"\n",
|
||||||
|
" return dict_SelectTool"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 313,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"<Node ToolID=\"7\">\n",
|
||||||
|
" <GuiSettings Plugin=\"AlteryxBasePluginsGui.Join.Join\">\n",
|
||||||
|
" <Position x=\"318\" y=\"258\" />\n",
|
||||||
|
" </GuiSettings>\n",
|
||||||
|
" <Properties>\n",
|
||||||
|
" <Configuration joinByRecordPos=\"False\">\n",
|
||||||
|
" <JoinInfo connection=\"Left\">\n",
|
||||||
|
" <Field field=\"Col_3_renamed\" />\n",
|
||||||
|
" </JoinInfo>\n",
|
||||||
|
" <JoinInfo connection=\"Right\">\n",
|
||||||
|
" <Field field=\"Column 3\" />\n",
|
||||||
|
" </JoinInfo>\n",
|
||||||
|
" <SelectConfiguration>\n",
|
||||||
|
" <Configuration outputConnection=\"Join\">\n",
|
||||||
|
" <OrderChanged value=\"False\" />\n",
|
||||||
|
" <CommaDecimal value=\"False\" />\n",
|
||||||
|
" <SelectFields>\n",
|
||||||
|
" <SelectField field=\"Right_Column 3\" selected=\"True\" rename=\"Right_Column 3\" input=\"Right_\" />\n",
|
||||||
|
" <SelectField field=\"Right_Column 4\" selected=\"True\" rename=\"Right_Column 4\" input=\"Right_\" />\n",
|
||||||
|
" <SelectField field=\"*Unknown\" selected=\"True\" />\n",
|
||||||
|
" </SelectFields>\n",
|
||||||
|
" </Configuration>\n",
|
||||||
|
" </SelectConfiguration>\n",
|
||||||
|
" </Configuration>\n",
|
||||||
|
" <Annotation DisplayMode=\"0\">\n",
|
||||||
|
" <Name />\n",
|
||||||
|
" <DefaultAnnotationText />\n",
|
||||||
|
" <Left value=\"False\" />\n",
|
||||||
|
" </Annotation>\n",
|
||||||
|
" </Properties>\n",
|
||||||
|
" <EngineSettings EngineDll=\"AlteryxBasePluginsEngine.dll\" EngineDllEntryPoint=\"AlteryxJoin\" />\n",
|
||||||
|
" </Node>\n",
|
||||||
|
" \n",
|
||||||
|
"joinByRecordPos: False\n",
|
||||||
|
"{'joinParams': ('Col_3_renamed', 'Column 3')}\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"### WIP\n",
|
||||||
|
"\n",
|
||||||
|
"def getConf_Join(xml_string):\n",
|
||||||
|
" print(xml_string)\n",
|
||||||
|
" root = ET.fromstring(xml_string)\n",
|
||||||
|
" dict_JoinTool = {}\n",
|
||||||
|
"\n",
|
||||||
|
" print(\"joinByRecordPos:\", root.find(\".//Properties/Configuration\").attrib['joinByRecordPos'])\n",
|
||||||
|
"\n",
|
||||||
|
" # Join parameters\n",
|
||||||
|
" for joinField in root.findall(\".//Configuration/JoinInfo\"):\n",
|
||||||
|
" if joinField.attrib['connection'] == \"Left\":\n",
|
||||||
|
" left_on = joinField.find('Field').attrib['field']\n",
|
||||||
|
" elif joinField.attrib['connection'] == \"Right\":\n",
|
||||||
|
" right_on = joinField.find('Field').attrib['field']\n",
|
||||||
|
"\n",
|
||||||
|
" if left_on == right_on:\n",
|
||||||
|
" dict_JoinTool['joinParams'] = ('on', left_on)\n",
|
||||||
|
" else:\n",
|
||||||
|
" dict_JoinTool['joinParams'] = (left_on, right_on)\n",
|
||||||
|
"\n",
|
||||||
|
" # Select parameters\n",
|
||||||
|
" for joinField in root.findall(\".//Configuration/SelectConfiguration\"):\n",
|
||||||
|
"\n",
|
||||||
|
" return dict_JoinTool\n",
|
||||||
|
"\n",
|
||||||
|
"xml_join_tool = results[6][1]\n",
|
||||||
|
"print(getConf_Join(xml_join_tool))\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "markdown",
|
||||||
|
"metadata": {},
|
||||||
|
"source": [
|
||||||
|
"# Working with the XML file"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 40,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [
|
"outputs": [
|
||||||
{
|
{
|
||||||
|
|
@ -151,7 +276,7 @@
|
||||||
"name": "stderr",
|
"name": "stderr",
|
||||||
"output_type": "stream",
|
"output_type": "stream",
|
||||||
"text": [
|
"text": [
|
||||||
"C:\\Users\\casey.morter\\AppData\\Local\\Temp\\ipykernel_4012\\3571569777.py:9: DataOrientationWarning: Row orientation inferred during DataFrame construction. Explicitly specify the orientation by passing `orient=\"row\"` to silence this warning.\n",
|
"/tmp/ipykernel_579015/219306832.py:9: DataOrientationWarning: Row orientation inferred during DataFrame construction. Explicitly specify the orientation by passing `orient=\"row\"` to silence this warning.\n",
|
||||||
" df = pl.DataFrame(data_rows, fields)\n"
|
" df = pl.DataFrame(data_rows, fields)\n"
|
||||||
]
|
]
|
||||||
}
|
}
|
||||||
|
|
@ -167,163 +292,100 @@
|
||||||
" print(ToolID, ToolType)\n",
|
" print(ToolID, ToolType)\n",
|
||||||
"\n",
|
"\n",
|
||||||
" if ToolType == 'TextInput':\n",
|
" if ToolType == 'TextInput':\n",
|
||||||
" print(TextInputToDf(ToolXML))"
|
" print(input_textInput(ToolXML))"
|
||||||
]
|
]
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": 29,
|
"execution_count": 292,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"\n",
|
||||||
|
"############### Input dataframe (TextInput):\n",
|
||||||
|
"\n",
|
||||||
|
" shape: (3, 2)\n",
|
||||||
|
"┌──────────┬──────────┐\n",
|
||||||
|
"│ Column 3 ┆ Column 4 │\n",
|
||||||
|
"│ --- ┆ --- │\n",
|
||||||
|
"│ i64 ┆ i64 │\n",
|
||||||
|
"╞══════════╪══════════╡\n",
|
||||||
|
"│ 5 ┆ 8 │\n",
|
||||||
|
"│ 6 ┆ 9 │\n",
|
||||||
|
"│ 7 ┆ 10 │\n",
|
||||||
|
"└──────────┴──────────┘\n",
|
||||||
|
"\n",
|
||||||
|
"############### Generated code from Select tool: \n",
|
||||||
|
"\n",
|
||||||
|
" df_output = df.with_columns(\n",
|
||||||
|
"df.select(pl.col(f'Column 3').alias('Col_3_renamed')),\n",
|
||||||
|
")\n",
|
||||||
|
"df_output = df_output.drop(f'Column 3')\n",
|
||||||
|
"df_output = df_output.drop(f'Column 4')\n",
|
||||||
|
"\n",
|
||||||
|
"shape: (3, 1)\n",
|
||||||
|
"┌───────────────┐\n",
|
||||||
|
"│ Col_3_renamed │\n",
|
||||||
|
"│ --- │\n",
|
||||||
|
"│ i64 │\n",
|
||||||
|
"╞═══════════════╡\n",
|
||||||
|
"│ 5 │\n",
|
||||||
|
"│ 6 │\n",
|
||||||
|
"│ 7 │\n",
|
||||||
|
"└───────────────┘\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"name": "stderr",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"/tmp/ipykernel_579015/3571569777.py:9: DataOrientationWarning: Row orientation inferred during DataFrame construction. Explicitly specify the orientation by passing `orient=\"row\"` to silence this warning.\n",
|
||||||
|
" df = pl.DataFrame(data_rows, fields)\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"# Tool 1: TextInput\n",
|
||||||
|
"xml_tool1 = results[3][1]\n",
|
||||||
|
"df_in = TextInputToDf(ToolXML)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"\\n############### Input dataframe (TextInput):\\n\\n\", df_in)\n",
|
||||||
|
"\n",
|
||||||
|
"# Tool 2: Select\n",
|
||||||
|
"col_spec = getConf_Select(results[4][1])\n",
|
||||||
|
"\n",
|
||||||
|
"# print(\"\\n############### Select tool spec taken from XML:\\n\\n\", col_spec)\n",
|
||||||
|
"\n",
|
||||||
|
"\n",
|
||||||
|
"# Generate code from \n",
|
||||||
|
"code = tool_select(col_spec)\n",
|
||||||
|
"\n",
|
||||||
|
"print(\"\\n############### Generated code from Select tool: \\n\\n\", code)\n",
|
||||||
|
"\n",
|
||||||
|
"df = df_in\n",
|
||||||
|
"\n",
|
||||||
|
"exec(code)\n",
|
||||||
|
"\n",
|
||||||
|
"print(df_output)\n",
|
||||||
|
"\n"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 174,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": []
|
||||||
"# Check out a tool\n",
|
|
||||||
"# 0 = TextInput\n",
|
|
||||||
"# 4 = select with rename\n",
|
|
||||||
"tool_xml = results[4][1]"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 27,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"'c:\\\\Users\\\\casey.morter\\\\OneDrive - JLL\\\\Documents\\\\01 Workspace\\\\01 Python\\\\Polaryx'"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 27,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"%pwd"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 31,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"ename": "KeyError",
|
|
||||||
"evalue": "'type'",
|
|
||||||
"output_type": "error",
|
|
||||||
"traceback": [
|
|
||||||
"\u001b[1;31m---------------------------------------------------------------------------\u001b[0m",
|
|
||||||
"\u001b[1;31mKeyError\u001b[0m Traceback (most recent call last)",
|
|
||||||
"Cell \u001b[1;32mIn[31], line 16\u001b[0m\n\u001b[0;32m 14\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m field \u001b[38;5;129;01min\u001b[39;00m root\u001b[38;5;241m.\u001b[39mfindall(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m.//SelectFields/SelectField\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m 15\u001b[0m field_name \u001b[38;5;241m=\u001b[39m field\u001b[38;5;241m.\u001b[39mattrib[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mfield\u001b[39m\u001b[38;5;124m'\u001b[39m]\n\u001b[1;32m---> 16\u001b[0m field_type \u001b[38;5;241m=\u001b[39m \u001b[43mfield\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mattrib\u001b[49m\u001b[43m[\u001b[49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[38;5;124;43mtype\u001b[39;49m\u001b[38;5;124;43m'\u001b[39;49m\u001b[43m]\u001b[49m\n\u001b[0;32m 18\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m 19\u001b[0m field_rename \u001b[38;5;241m=\u001b[39m field\u001b[38;5;241m.\u001b[39mattrib[\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mrename\u001b[39m\u001b[38;5;124m'\u001b[39m]\n",
|
|
||||||
"\u001b[1;31mKeyError\u001b[0m: 'type'"
|
|
||||||
]
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"col_specs = {\n",
|
|
||||||
" \"A\": (\"x\", int, False),\n",
|
|
||||||
" \"B\": (\"y\", str, False),\n",
|
|
||||||
" \"D\": (None, None, True) # drop this column\n",
|
|
||||||
"}\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"# Parse the XML\n",
|
|
||||||
"root = ET.fromstring(tool_xml)\n",
|
|
||||||
"\n",
|
|
||||||
"dict_SelectTool = {}\n",
|
|
||||||
"\n",
|
|
||||||
"for field in root.findall(\".//SelectFields/SelectField\"):\n",
|
|
||||||
" field_name = field.attrib['field']\n",
|
|
||||||
"\n",
|
|
||||||
" try:\n",
|
|
||||||
" field_type = field.attrib['type']\n",
|
|
||||||
" except:\n",
|
|
||||||
" field_type = None\n",
|
|
||||||
" \n",
|
|
||||||
" try:\n",
|
|
||||||
" field_rename = field.attrib['rename']\n",
|
|
||||||
" except:\n",
|
|
||||||
" field_rename = None\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
" dict_SelectTool[field_name] = (None, None, field_rename)\n",
|
|
||||||
" \n",
|
|
||||||
"\n",
|
|
||||||
" # dict_SelectTool['field']\n",
|
|
||||||
"\n",
|
|
||||||
" # if field.attrib['field'] != '*Unknown':\n",
|
|
||||||
" # field.attrib['selected'], \"Type: \", field.attrib['size']\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
"\n",
|
|
||||||
" \n",
|
|
||||||
" # print(field.attrib['field'], \"| Selected:\", field.attrib['selected'], \"Type: \", field.attrib['size'])\n",
|
|
||||||
"\n",
|
|
||||||
"dict_SelectTool"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 108,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [],
|
|
||||||
"source": [
|
|
||||||
"gui_settings_text = root.find(\".//GuiSettings\")"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"cell_type": "code",
|
|
||||||
"execution_count": 114,
|
|
||||||
"metadata": {},
|
|
||||||
"outputs": [
|
|
||||||
{
|
|
||||||
"data": {
|
|
||||||
"text/plain": [
|
|
||||||
"'AlteryxBasePluginsGui.TextInput.TextInput'"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"execution_count": 114,
|
|
||||||
"metadata": {},
|
|
||||||
"output_type": "execute_result"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"source": [
|
|
||||||
"gui_settings_text.attrib['Plugin']"
|
|
||||||
]
|
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"cell_type": "code",
|
"cell_type": "code",
|
||||||
"execution_count": null,
|
"execution_count": null,
|
||||||
"metadata": {},
|
"metadata": {},
|
||||||
"outputs": [],
|
"outputs": [],
|
||||||
"source": [
|
"source": []
|
||||||
"import polars as pl\n",
|
|
||||||
"\n",
|
|
||||||
"def reshape_polars_df(df: pl.DataFrame, col_specs: dict):\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" Reshape a Polars DataFrame by renaming and retyping columns according to the provided dictionary.\n",
|
|
||||||
"\n",
|
|
||||||
" Args:\n",
|
|
||||||
" df (pl.DataFrame): The input Polars DataFrame.\n",
|
|
||||||
" col_specs (dict): A dictionary where keys are column names in the original DataFrame,\n",
|
|
||||||
" and values are tuples containing the new column name, data type, and a boolean indicating whether\n",
|
|
||||||
" the column should be dropped or not.\n",
|
|
||||||
"\n",
|
|
||||||
" Returns:\n",
|
|
||||||
" pl.DataFrame: The reshaped Polars DataFrame with renamed and retyped columns.\n",
|
|
||||||
" \"\"\"\n",
|
|
||||||
" for old_name, (new_name, dt, drop) in col_specs.items():\n",
|
|
||||||
" if drop:\n",
|
|
||||||
" df = df.drop(old_name)\n",
|
|
||||||
" else:\n",
|
|
||||||
" df = df.rename({old_name: new_name})\n",
|
|
||||||
" if dt is not None:\n",
|
|
||||||
" df = df.with_column(pl.col(old_name).cast(dt))\n",
|
|
||||||
" return df\n"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"metadata": {
|
"metadata": {
|
||||||
|
|
|
||||||
Reference in New Issue