-
Notifications
You must be signed in to change notification settings - Fork 804
Expand file tree
/
Copy pathgenerate_scenarios.py
More file actions
227 lines (190 loc) · 8.46 KB
/
generate_scenarios.py
File metadata and controls
227 lines (190 loc) · 8.46 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
"""Scenario generation for MCP tools."""
import json
import time
from typing import Any, Dict, List, Optional
import openai
from art.mcp.types import GeneratedScenarioCollection, MCPResource, MCPTool
from art.utils.logging import _C, dim, err, info, ok, step
def preview_scenarios(scenarios: List[Dict[str, Any]], n: int = 5):
"""Preview generated scenarios."""
n = min(n, len(scenarios))
for i in range(n):
s = scenarios[i]
task_preview = s["task"][:120].strip()
ellipsis = "&" if len(s["task"]) > 120 else ""
difficulty = s.get("difficulty", "N/A")
dim(
f" {i + 1}. {task_preview}{ellipsis} "
f"{_C.GRAY}(difficulty {difficulty}/5){_C.RESET}"
)
async def generate_scenarios(
tools: List[MCPTool] | List[Dict[str, Any]],
resources: List[MCPResource] | List[Dict[str, Any]] = [],
num_scenarios: int = 24,
show_preview: bool = True,
custom_instructions: Optional[str] = None,
generator_model: str = "openai/gpt-4.1-mini",
generator_api_key: Optional[str] = None,
generator_base_url: str = "https://openrouter.ai/api/v1",
) -> GeneratedScenarioCollection:
"""
Generate scenarios for MCP tools.
Args:
tools: List of Tool objects or list of tool dictionaries
resources: Optional list of Resource objects or list of resource dictionaries
num_scenarios: Number of scenarios to generate (default: 24)
show_preview: Whether to show a preview of generated scenarios (default: True)
custom_instructions: Optional custom instructions for scenario generation
generator_model: Model to use for generation (default: "openai/gpt-4.1-mini")
generator_api_key: API key for the generator model. If None, will use OPENROUTER_API_KEY env var
generator_base_url: Base URL for the API (default: OpenRouter)
Returns:
GeneratedScenarioCollection containing the generated scenarios
"""
import os
t0 = time.perf_counter()
# Handle API key
if generator_api_key is None:
generator_api_key = os.getenv("OPENROUTER_API_KEY")
if not generator_api_key:
raise ValueError(
"generator_api_key is required or OPENROUTER_API_KEY env var must be set"
)
# Validate that we have at least tools or resources
if not tools and not resources:
raise ValueError("At least one tool or resource must be provided")
ok(f"Using model: {generator_model}")
# Convert tools to dictionaries
if isinstance(tools, list) and tools and isinstance(tools[0], MCPTool):
tools_info = [tool.to_dict() for tool in tools] # type: ignore
else:
# Assume it's already a list of dictionaries
tools_info = [
{
"name": tool.get("name", "") # ty:ignore[no-matching-overload]
if isinstance(tool, dict)
else getattr(tool, "name", ""),
"description": tool.get("description", "") # ty:ignore[no-matching-overload]
if isinstance(tool, dict)
else getattr(tool, "description", ""),
"parameters": tool.get("parameters", {}) # ty:ignore[no-matching-overload]
if isinstance(tool, dict)
else getattr(tool, "parameters", {}),
}
for tool in tools
]
# Convert resources to dictionaries
if resources is None:
resources_info = []
elif (
isinstance(resources, list)
and resources
and isinstance(resources[0], MCPResource)
):
resources_info = [resource.to_dict() for resource in resources] # type: ignore
else:
# Assume it's already a list of dictionaries
resources_info = resources or []
info(f"Available: {len(tools_info)} tool(s), {len(resources_info)} resource(s).")
step("Preparing prompt & JSON schema &")
tools_description = json.dumps(tools_info, indent=2)
resources_description = (
json.dumps(resources_info, indent=2)
if resources_info
else "No resources available"
)
prompt = f"""You are an expert at creating realistic scenarios for testing AI agents that interact with MCP (Model Context Protocol) servers.
Given the following available tools and resources from an MCP server, generate {num_scenarios} diverse, realistic scenarios that a user might want to accomplish using these tools.
AVAILABLE TOOLS:
{tools_description}
AVAILABLE RESOURCES:
{resources_description}
Requirements for scenarios:
1. Each scenario should be a task that can be accomplished using the available tools
2. Scenarios should vary in complexity - some simple (1-2 tool calls), some complex (multiple tool calls)
3. Scenarios should cover different use cases and tool combinations (though the task should not specify which tools to use)
4. Each scenario should be realistic - something a real user might actually want to do
5. Assign a difficulty rating from 1 (easy, single tool call) to 5 (hard, complex multi-step analysis)
6. The task should always include generating a summary of the work done and a thorough analysis and report of the results
You must respond with a JSON object containing a "scenarios" array of exactly {num_scenarios} objects. Each object must have:
- "task": string describing the scenario
- "difficulty": integer from 1-5 representing complexity
"""
if custom_instructions:
prompt += f"\n\nPay close attention to the following instructions when generating scenarios:\n\n{custom_instructions}"
response_schema = {
"type": "object",
"properties": {
"scenarios": {
"type": "array",
"items": {
"type": "object",
"properties": {
"task": {"type": "string"},
"difficulty": {"type": "integer", "minimum": 1, "maximum": 5},
},
"required": ["task", "difficulty"],
"additionalProperties": False,
},
"minItems": num_scenarios,
"maxItems": num_scenarios,
}
},
"required": ["scenarios"],
"additionalProperties": False,
}
step(f"Calling model: {_C.BOLD}{generator_model}{_C.RESET} &")
client_openai = openai.OpenAI(
api_key=generator_api_key,
base_url=generator_base_url,
)
t1 = time.perf_counter()
response = client_openai.chat.completions.create(
model=generator_model,
messages=[{"role": "user", "content": prompt}],
max_completion_tokens=8000,
response_format={
"type": "json_schema",
"json_schema": {"name": "scenario_list", "schema": response_schema},
},
)
dt = time.perf_counter() - t1
ok(f"Model responded in {dt:.2f}s.")
content = response.choices[0].message.content
if content is None:
err("Model response content is None.")
raise ValueError("Model response content is None")
info(f"Raw content length: {len(content)} chars.")
# Parse JSON
try:
result = json.loads(content)
except Exception as e:
err("Failed to parse JSON from model response.")
dim(f" Exception: {e}")
dim(" First 500 chars of response content:")
dim(content[:500] if content else "No content")
raise
# Extract scenarios
if "scenarios" in result:
scenarios = result["scenarios"]
else:
scenarios = result if isinstance(result, list) else list(result.values())[0]
# Validate count
if len(scenarios) < num_scenarios:
err(f"Expected {num_scenarios} scenarios, got {len(scenarios)}.")
raise ValueError(f"Expected {num_scenarios} scenarios, got {len(scenarios)}")
elif len(scenarios) > num_scenarios:
ok(
f"Expected {num_scenarios} scenarios, got {len(scenarios)}. Truncating to {num_scenarios}."
)
scenarios = scenarios[:num_scenarios]
ok(f"Parsed {len(scenarios)} scenario(s) successfully.")
# Convert to ScenarioCollection
scenario_collection = GeneratedScenarioCollection.from_dicts(scenarios)
# Show difficulty distribution and preview using the collection methods
scenario_collection.print_difficulty_distribution()
if show_preview:
scenario_collection.preview(n=min(5, num_scenarios))
total_time = time.perf_counter() - t0
ok(f"Generated {len(scenario_collection)} scenarios in {total_time:.2f}s total.")
return scenario_collection