Skip to content

Commit d1e4a10

Browse files
authored
Merge pull request #213 from mkimhi/agentbench-lite-suite
Add lite presets for minimal local runs
2 parents 578c5e9 + a3cc91a commit d1e4a10

File tree

5 files changed

+187
-0
lines changed

5 files changed

+187
-0
lines changed

.github/workflows/lite-configs.yml

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
name: validate-lite-configs
2+
3+
on:
4+
pull_request:
5+
push:
6+
branches: [ main ]
7+
8+
jobs:
9+
validate:
10+
runs-on: ubuntu-latest
11+
steps:
12+
- uses: actions/checkout@v4
13+
14+
- name: Set up Python
15+
uses: actions/setup-python@v5
16+
with:
17+
python-version: "3.9"
18+
19+
- name: Install minimal deps
20+
run: |
21+
python -m pip install --upgrade pip
22+
python -m pip install pyyaml
23+
24+
- name: Validate lite configs
25+
run: |
26+
python scripts/validate_lite_configs.py

README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,9 @@ and [Program Entrance Guide](docs/Entrance_en.md).
151151

152152
Clone this repo and install the dependencies.
153153

154+
> **Python version note:** AgentBench pins older scientific Python deps (e.g. `numpy~=1.23.x`).
155+
> Using the recommended **Python 3.9** (via conda) is the most reliable way to install dependencies.
156+
154157
```bash
155158
cd AgentBench
156159
conda create -n agent-bench python=3.9
@@ -200,6 +203,14 @@ python -m src.start_task -a
200203
This will launch five task_workers each for `dbbench-std` and `os-std` tasks and automatically connect them
201204
to the controller on port 5000. **After executing this command, please allow approximately 1 minute for the task setup to complete.** If the terminal shows ".... 200 OK", you can open another terminal and follow step 4.
202205

206+
#### Lite preset (laptops / limited RAM)
207+
208+
If you want to start with minimal concurrency (1 worker per task), use the lite preset:
209+
210+
```bash
211+
python -m src.start_task -a --config configs/start_task_lite.yaml
212+
```
213+
203214
### Step 4. Start the assigner
204215

205216
This step is to actually start the tasks.
@@ -210,6 +221,12 @@ If everything is correctly configured so far, you can now initiate the task test
210221
python -m src.assigner
211222
```
212223

224+
If you started the task server with the lite preset, you can also run the lite evaluation preset:
225+
226+
```bash
227+
python -m src.assigner --config configs/assignments/lite.yaml
228+
```
229+
213230
## Next Steps
214231

215232
If you wish to launch more tasks or use other models, you can refer to the content

configs/assignments/lite.yaml

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
# Lite preset: evaluate only low-resource tasks with minimal concurrency.
2+
#
3+
# Usage:
4+
# python -m src.assigner --config configs/assignments/lite.yaml
5+
6+
import: definition.yaml
7+
8+
concurrency:
9+
task:
10+
dbbench-std: 1
11+
os-std: 1
12+
agent:
13+
gpt-3.5-turbo-0613: 1
14+
15+
assignments:
16+
- agent:
17+
- gpt-3.5-turbo-0613
18+
task:
19+
- dbbench-std
20+
- os-std
21+
22+
output: "outputs/{TIMESTAMP}"

configs/start_task_lite.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
# Lite preset: start only low-resource tasks.
2+
# Intended for laptops / limited RAM.
3+
#
4+
# Usage:
5+
# python -m src.start_task -a --config configs/start_task_lite.yaml
6+
#
7+
# You can still override tasks at runtime:
8+
# python -m src.start_task -a --config configs/start_task_lite.yaml -s dbbench-std 2 os-std 2
9+
10+
definition:
11+
import: tasks/task_assembly.yaml
12+
13+
start:
14+
dbbench-std: 1
15+
os-std: 1

scripts/validate_lite_configs.py

Lines changed: 107 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,107 @@
1+
#!/usr/bin/env python3
2+
"""Validate AgentBench lite preset configs.
3+
4+
Goal: a fast smoke check that doesn't require building Docker images or running
5+
any tasks. It only validates that:
6+
- lite config files exist and are valid YAML
7+
- referenced tasks exist in configs/tasks/task_assembly.yaml
8+
- lite assignment references tasks that exist in the task assembly
9+
10+
Run:
11+
python scripts/validate_lite_configs.py
12+
"""
13+
14+
from __future__ import annotations
15+
16+
import pathlib
17+
import sys
18+
from typing import Any, Dict, Set
19+
20+
import yaml
21+
22+
ROOT = pathlib.Path(__file__).resolve().parents[1]
23+
24+
25+
def load_yaml(path: pathlib.Path) -> Dict[str, Any]:
26+
try:
27+
with path.open("r", encoding="utf-8") as f:
28+
data = yaml.safe_load(f)
29+
except Exception as e:
30+
raise RuntimeError(f"Failed to parse YAML: {path}: {e}")
31+
if data is None:
32+
return {}
33+
if not isinstance(data, dict):
34+
raise RuntimeError(f"Expected mapping at top-level in {path}, got {type(data)}")
35+
return data
36+
37+
38+
def main() -> int:
39+
start_lite = ROOT / "configs" / "start_task_lite.yaml"
40+
assign_lite = ROOT / "configs" / "assignments" / "lite.yaml"
41+
task_assembly = ROOT / "configs" / "tasks" / "task_assembly.yaml"
42+
43+
for p in (start_lite, assign_lite, task_assembly):
44+
if not p.exists():
45+
raise RuntimeError(f"Missing required file: {p}")
46+
47+
start_cfg = load_yaml(start_lite)
48+
assign_cfg = load_yaml(assign_lite)
49+
assembly_cfg = load_yaml(task_assembly)
50+
51+
# Collect task names from imported task configs (based on file names).
52+
imports = assembly_cfg.get("import", [])
53+
if not isinstance(imports, list) or not all(isinstance(x, str) for x in imports):
54+
raise RuntimeError("configs/tasks/task_assembly.yaml must have a list field: import")
55+
56+
task_names: Set[str] = set()
57+
for rel in imports:
58+
# rel like "webshop.yaml" -> file stem "webshop".
59+
task_names.add(pathlib.Path(rel).stem)
60+
61+
# start_task_lite.yaml: ensure start keys look like known tasks.
62+
start = start_cfg.get("start", {})
63+
if not isinstance(start, dict) or not start:
64+
raise RuntimeError("configs/start_task_lite.yaml must have non-empty mapping field: start")
65+
66+
unknown_in_start = sorted([k for k in start.keys() if str(k).split("-")[0] not in task_names])
67+
if unknown_in_start:
68+
raise RuntimeError(
69+
"start_task_lite.yaml references tasks not present in task_assembly imports: "
70+
+ ", ".join(map(str, unknown_in_start))
71+
)
72+
73+
# assignments/lite.yaml: ensure tasks exist.
74+
assignments = assign_cfg.get("assignments")
75+
if not isinstance(assignments, list) or not assignments:
76+
raise RuntimeError("configs/assignments/lite.yaml must have a non-empty list field: assignments")
77+
78+
unknown_in_assign = []
79+
for a in assignments:
80+
if not isinstance(a, dict):
81+
raise RuntimeError("Each assignment must be a mapping")
82+
tasks = a.get("task", [])
83+
if isinstance(tasks, str):
84+
tasks = [tasks]
85+
if not isinstance(tasks, list):
86+
raise RuntimeError("assignment.task must be a string or list")
87+
for t in tasks:
88+
base = str(t).split("-")[0]
89+
if base not in task_names:
90+
unknown_in_assign.append(t)
91+
92+
if unknown_in_assign:
93+
raise RuntimeError(
94+
"assignments/lite.yaml references tasks not present in task_assembly imports: "
95+
+ ", ".join(map(str, unknown_in_assign))
96+
)
97+
98+
print("OK: lite configs look valid")
99+
return 0
100+
101+
102+
if __name__ == "__main__":
103+
try:
104+
raise SystemExit(main())
105+
except Exception as e:
106+
print(f"ERROR: {e}", file=sys.stderr)
107+
raise SystemExit(1)

0 commit comments

Comments
 (0)