L2 Construct: data_analysis
data ai agentDescription
Data Analysis Agent is an AI-powered tool for performing data analysis tasks on tabular data. It takes a task description and a set of input tables, and returns the processed result table along with the steps (iterations) taken to achieve the result.
Input:
- task: A string describing the data analysis task to be performed.
- tables: A map of table names to Table structures, representing the input data.
Output:
- result_table: The final processed Table structure after performing the data analysis task.
- iterations: An array of steps taken to achieve the result.
The agent uses to break down complex data analysis tasks into a series of simpler operations, using tools like Join, Filter, GroupBy, and Finish to manipulate the data.
L2 Data
- Provider: pontus
- Module: agents
- Action: data_analysis
Example Step
{
"name": "insert-your-step-name",
"type": "l2",
"l2_data": {
"provider": "pontus",
"module": "agents",
"action": "data_analysis",
"metadata": {}
}
}
Input
Example
{
"task": "Join sales and product tables, then calculate total sales by department",
"tables": [
{
"name": "sales",
"description": "All sales for the store",
"headers": ["date", "product_id", "sale_amount"],
"rows": [["2023-05-01", "P001", 100.5], ["2023-05-02", "P002", 200.75]]
},
{
"name": "products",
"description": "Available products for purchase in the store by department",
"headers": ["product_id", "product_name", "department"],
"rows": [
["P001", "Widget A", "Electronics"],
["P002", "Gadget B", "Electronics"]
]
},
{
"name": "customers",
"description": "Customer information for the store",
"headers": ["customer_id", "customer_name", "age"],
"rows": [
["C001", "John Doe", 30],
["C002", "Jane Smith", 25],
["C003", "Bob Johnson", 40]
]
}
],
"display_options": {
"max_columns_to_show": 0
}
}
Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://pontus.bpa/pkgs/l2/pontus/agents/data-analysis-agent-input",
"$ref": "#/$defs/DataAnalysisAgentInput",
"$defs": {
"DataAnalysisAgentInput": {
"properties": {
"task": {
"type": "string"
},
"tables": {
"items": {
"$ref": "#/$defs/Table"
},
"type": "array"
},
"display_options": {
"$ref": "#/$defs/DisplayOptions"
}
},
"additionalProperties": false,
"type": "object",
"required": ["task", "tables"]
},
"DisplayOptions": {
"properties": {
"max_columns_to_show": {
"type": "integer"
}
},
"additionalProperties": false,
"type": "object",
"required": ["max_columns_to_show"]
},
"Table": {
"properties": {
"name": {
"type": "string"
},
"description": {
"type": "string"
},
"headers": {
"items": {
"type": "string"
},
"type": "array"
},
"rows": {
"items": {
"items": true,
"type": "array"
},
"type": "array"
}
},
"additionalProperties": false,
"type": "object",
"required": ["headers", "rows"]
}
}
}
Output
Example
{
"success": true,
"iterations": [
{
"user_input": "We have the following tables: 'sales' (columns: date, product_id, sale_amount, customer_id), 'products' (columns: product_id, product_name, department), 'customers' (columns: customer_id, customer_name, age). Join the sales data with product and customer information, then group by department and calculate the total sales and average customer age for each department.",
"observation": "We need to perform multiple operations: joining tables, filtering, and aggregating data.",
"thought": "Let's start by joining the sales table with the products table to get product information for each sale.",
"action": {
"tool": "Join",
"input": "{\"left_dataframe_name\": \"sales\", \"right_dataframe_name\": \"products\", \"left_on\": \"product_id\", \"right_on\": \"product_id\", \"how\": \"inner\"}"
}
},
{
"user_input": "Action1[{\"columns\":[\"date\",\"product_id\",\"sale_amount\",\"customer_id\",\"product_name\",\"department\"],\"types\":[\"string\",\"string\",\"float64\",\"string\",\"string\",\"string\"],\"records\":[[\"2023-05-01\",\"P001\",100.50,\"C001\",\"Widget A\",\"Electronics\"]...]",
"observation": "We have successfully joined sales and products data. Now we need to add customer information.",
"thought": "Let's join the result with the customers table to get customer information for each sale.",
"action": {
"tool": "Join",
"input": "{\"left_dataframe_name\": \"Action1\", \"right_dataframe_name\": \"customers\", \"left_on\": \"customer_id\", \"right_on\": \"customer_id\", \"how\": \"inner\"}"
}
},
{
"user_input": "Action2[{\"columns\":[\"date\",\"product_id\",\"sale_amount\",\"customer_id\",\"product_name\",\"department\",\"customer_name\",\"age\"],\"types\":[\"string\",\"string\",\"float64\",\"string\",\"string\",\"string\",\"string\",\"int\"],\"records\":[[\"2023-05-01\",\"P001\",100.50,\"C001\",\"Widget A\",\"Electronics\",\"John Doe\",30]...]",
"observation": "We now have a complete dataset with sales, product, and customer information.",
"thought": "We can now group the data by department and calculate the total sales and average customer age.",
"action": {
"tool": "GroupBy",
"input": "{\"dataframe_name\": \"Action2\", \"group_by_columns\": [\"department\"], \"aggregation_columns\": [\"sale_amount\", \"age\"], \"aggregation_functions\": [\"sum\", \"mean\"]}"
}
},
{
"user_input": "Action3[{\"columns\":[\"department\",\"sale_amount_sum\",\"age_mean\"],\"types\":[\"string\",\"float64\",\"float64\"],\"records\":[[\"Electronics\",5000.75,35.5],[\"Clothing\",3500.25,28.7]...]",
"observation": "We have successfully grouped and aggregated the data as required.",
"thought": "The task is complete. We should finish the ETL process and return the final dataframe.",
"action": {
"tool": "Finish",
"input": "{\"final_dataframe_name\": \"Action3\"}"
}
}
],
"result_table": {
"headers": ["department", "total_sales"],
"rows": [["Electronics", 301.25]]
}
}
Schema
{
"$schema": "https://json-schema.org/draft/2020-12/schema",
"$id": "https://pontus.bpa/pkgs/l2/pontus/agents/data-analysis-agent-output",
"$ref": "#/$defs/DataAnalysisAgentOutput",
"$defs": {
"Action": {
"properties": {
"tool": {
"type": "string"
},
"input": {
"type": "string"
}
},
"additionalProperties": false,
"type": "object",
"required": ["tool", "input"]
},
"DataAnalysisAgentOutput": {
"properties": {
"success": {
"type": "boolean"
},
"iterations": {
"items": {
"$ref": "#/$defs/ReactIteration"
},
"type": "array"
},
"result_table": {
"$ref": "#/$defs/Table"
}
},
"additionalProperties": false,
"type": "object",
"required": ["success", "iterations", "result_table"]
},
"ReactIteration": {
"properties": {
"user_input": {
"type": "string"
},
"observation": {
"type": "string"
},
"thought": {
"type": "string"
},
"action": {
"$ref": "#/$defs/Action"
}
},
"additionalProperties": false,
"type": "object",
"required": ["user_input", "observation", "thought", "action"]
},
"Table": {
"properties": {
"name": {
"type": "string"
},
"description": {
"type": "string"
},
"headers": {
"items": {
"type": "string"
},
"type": "array"
},
"rows": {
"items": {
"items": true,
"type": "array"
},
"type": "array"
}
},
"additionalProperties": false,
"type": "object",
"required": ["headers", "rows"]
}
}
}