L2 Hub
Pontus
agents
Data Analysis

L2 Construct: data_analysis

data ai agent

Description

Data Analysis Agent is an AI-powered tool for performing data analysis tasks on tabular data. It takes a task description and a set of input tables, and returns the processed result table along with the steps (iterations) taken to achieve the result.

Input:

  • task: A string describing the data analysis task to be performed.
  • tables: A map of table names to Table structures, representing the input data.

Output:

  • result_table: The final processed Table structure after performing the data analysis task.
  • iterations: An array of steps taken to achieve the result.

The agent uses to break down complex data analysis tasks into a series of simpler operations, using tools like Join, Filter, GroupBy, and Finish to manipulate the data.

L2 Data

  1. Provider: pontus
  2. Module: agents
  3. Action: data_analysis

Example Step

{
  "name": "insert-your-step-name",
  "type": "l2",
  "l2_data": {
    "provider": "pontus",
    "module": "agents",
    "action": "data_analysis"
  }
}
 

Input

Example

{
  "task": "Join sales and product tables, then calculate total sales by department",
  "tables": [
    {
      "name": "sales",
      "description": "All sales for the store",
      "table": {
        "headers": ["date", "product_id", "sale_amount"],
        "rows": [["2023-05-01", "P001", 100.5], ["2023-05-02", "P002", 200.75]]
      }
    },
    {
      "name": "products",
      "description": "Available products for purchase in the store by department",
      "table": {
        "headers": ["product_id", "product_name", "department"],
        "rows": [
          ["P001", "Widget A", "Electronics"],
          ["P002", "Gadget B", "Electronics"]
        ]
      }
    },
    {
      "name": "customers",
      "description": "Customer information for the store",
      "table": {
        "headers": ["customer_id", "customer_name", "age"],
        "rows": [
          ["C001", "John Doe", 30],
          ["C002", "Jane Smith", 25],
          ["C003", "Bob Johnson", 40]
        ]
      }
    }
  ]
}
 

Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://pontus.bpa/pkgs/l2/pontus/agents/data-analysis-agent-input",
  "$ref": "#/$defs/DataAnalysisAgentInput",
  "$defs": {
    "DataAnalysisAgentInput": {
      "properties": {
        "task": {
          "type": "string"
        },
        "tables": {
          "items": {
            "$ref": "#/$defs/TableDesc"
          },
          "type": "array"
        }
      },
      "additionalProperties": false,
      "type": "object",
      "required": ["task", "tables"]
    },
    "Table": {
      "properties": {
        "headers": {
          "items": {
            "type": "string"
          },
          "type": "array"
        },
        "rows": {
          "items": {
            "items": true,
            "type": "array"
          },
          "type": "array"
        }
      },
      "additionalProperties": false,
      "type": "object",
      "required": ["headers", "rows"]
    },
    "TableDesc": {
      "properties": {
        "name": {
          "type": "string"
        },
        "description": {
          "type": "string"
        },
        "table": {
          "$ref": "#/$defs/Table"
        }
      },
      "additionalProperties": false,
      "type": "object",
      "required": ["name", "description", "table"]
    }
  }
}
 

Output

Example

{
  "success": true,
  "iterations": [
    {
      "user_input": "We have the following tables: 'sales' (columns: date, product_id, sale_amount, customer_id), 'products' (columns: product_id, product_name, department), 'customers' (columns: customer_id, customer_name, age). Join the sales data with product and customer information, then group by department and calculate the total sales and average customer age for each department.",
      "observation": "We need to perform multiple operations: joining tables, filtering, and aggregating data.",
      "thought": "Let's start by joining the sales table with the products table to get product information for each sale.",
      "action": {
        "tool": "Join",
        "input": "{\"left_dataframe_name\": \"sales\", \"right_dataframe_name\": \"products\", \"left_on\": \"product_id\", \"right_on\": \"product_id\", \"how\": \"inner\"}"
      }
    },
    {
      "user_input": "Action1[{\"columns\":[\"date\",\"product_id\",\"sale_amount\",\"customer_id\",\"product_name\",\"department\"],\"types\":[\"string\",\"string\",\"float64\",\"string\",\"string\",\"string\"],\"records\":[[\"2023-05-01\",\"P001\",100.50,\"C001\",\"Widget A\",\"Electronics\"]...]",
      "observation": "We have successfully joined sales and products data. Now we need to add customer information.",
      "thought": "Let's join the result with the customers table to get customer information for each sale.",
      "action": {
        "tool": "Join",
        "input": "{\"left_dataframe_name\": \"Action1\", \"right_dataframe_name\": \"customers\", \"left_on\": \"customer_id\", \"right_on\": \"customer_id\", \"how\": \"inner\"}"
      }
    },
    {
      "user_input": "Action2[{\"columns\":[\"date\",\"product_id\",\"sale_amount\",\"customer_id\",\"product_name\",\"department\",\"customer_name\",\"age\"],\"types\":[\"string\",\"string\",\"float64\",\"string\",\"string\",\"string\",\"string\",\"int\"],\"records\":[[\"2023-05-01\",\"P001\",100.50,\"C001\",\"Widget A\",\"Electronics\",\"John Doe\",30]...]",
      "observation": "We now have a complete dataset with sales, product, and customer information.",
      "thought": "We can now group the data by department and calculate the total sales and average customer age.",
      "action": {
        "tool": "GroupBy",
        "input": "{\"dataframe_name\": \"Action2\", \"group_by_columns\": [\"department\"], \"aggregation_columns\": [\"sale_amount\", \"age\"], \"aggregation_functions\": [\"sum\", \"mean\"]}"
      }
    },
    {
      "user_input": "Action3[{\"columns\":[\"department\",\"sale_amount_sum\",\"age_mean\"],\"types\":[\"string\",\"float64\",\"float64\"],\"records\":[[\"Electronics\",5000.75,35.5],[\"Clothing\",3500.25,28.7]...]",
      "observation": "We have successfully grouped and aggregated the data as required.",
      "thought": "The task is complete. We should finish the ETL process and return the final dataframe.",
      "action": {
        "tool": "Finish",
        "input": "{\"final_dataframe_name\": \"Action3\"}"
      }
    }
  ],
  "result_table": {
    "headers": ["department", "total_sales"],
    "rows": [["Electronics", 301.25]]
  }
}
 

Schema

{
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "$id": "https://pontus.bpa/pkgs/l2/pontus/agents/data-analysis-agent-output",
  "$ref": "#/$defs/DataAnalysisAgentOutput",
  "$defs": {
    "Action": {
      "properties": {
        "tool": {
          "type": "string"
        },
        "input": {
          "type": "string"
        }
      },
      "additionalProperties": false,
      "type": "object",
      "required": ["tool", "input"]
    },
    "DataAnalysisAgentOutput": {
      "properties": {
        "success": {
          "type": "boolean"
        },
        "iterations": {
          "items": {
            "$ref": "#/$defs/ReactIteration"
          },
          "type": "array"
        },
        "result_table": {
          "$ref": "#/$defs/Table"
        }
      },
      "additionalProperties": false,
      "type": "object",
      "required": ["success", "iterations", "result_table"]
    },
    "ReactIteration": {
      "properties": {
        "user_input": {
          "type": "string"
        },
        "observation": {
          "type": "string"
        },
        "thought": {
          "type": "string"
        },
        "action": {
          "$ref": "#/$defs/Action"
        }
      },
      "additionalProperties": false,
      "type": "object",
      "required": ["user_input", "observation", "thought", "action"]
    },
    "Table": {
      "properties": {
        "headers": {
          "items": {
            "type": "string"
          },
          "type": "array"
        },
        "rows": {
          "items": {
            "items": true,
            "type": "array"
          },
          "type": "array"
        }
      },
      "additionalProperties": false,
      "type": "object",
      "required": ["headers", "rows"]
    }
  }
}