Skip to content

Commit 009351f

Browse files
committed
Merge branch 'main' into stable
2 parents de66c4a + 76a7876 commit 009351f

308 files changed

Lines changed: 117468 additions & 102456 deletions

File tree

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

.github/scripts/package-lock.json

Lines changed: 11 additions & 11 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

.github/workflows/ci-coverage.yml

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
name: Coverage
2+
3+
permissions:
4+
contents: read
5+
6+
on:
7+
push:
8+
branches: [main]
9+
paths:
10+
- 'packages/superdoc/src/**'
11+
- 'packages/superdoc/vite.config.js'
12+
workflow_dispatch:
13+
14+
concurrency:
15+
group: coverage-${{ github.sha }}
16+
cancel-in-progress: true
17+
18+
jobs:
19+
coverage:
20+
runs-on: ubuntu-latest
21+
steps:
22+
- uses: actions/checkout@v6
23+
24+
- uses: pnpm/action-setup@v4
25+
26+
- uses: actions/setup-node@v6
27+
with:
28+
node-version-file: .nvmrc
29+
cache: pnpm
30+
31+
- run: pnpm install
32+
33+
- run: pnpm --filter @superdoc-dev/superdoc-yjs-collaboration build
34+
35+
- run: pnpm --filter superdoc exec vitest run --coverage
36+
37+
- uses: codecov/codecov-action@v5
38+
with:
39+
token: ${{ secrets.CODECOV_TOKEN }}
40+
files: packages/superdoc/coverage/lcov.info
41+
flags: superdoc

.github/workflows/ci-superdoc.yml

Lines changed: 33 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ jobs:
4040

4141
- uses: oven-sh/setup-bun@v2
4242
with:
43-
bun-version: 1.3.11
43+
bun-version: 1.3.12
4444

4545
- name: Install canvas system dependencies
4646
run: |
@@ -108,7 +108,7 @@ jobs:
108108

109109
- uses: oven-sh/setup-bun@v2
110110
with:
111-
bun-version: 1.3.11
111+
bun-version: 1.3.12
112112

113113
- name: Install canvas system dependencies
114114
run: |
@@ -171,13 +171,13 @@ jobs:
171171
if: matrix.name == 'other-packages'
172172
run: pnpm test:slow
173173

174-
- name: Install Playwright for UMD smoke test
174+
- name: Install Playwright for CDN smoke test
175175
if: matrix.name == 'other-packages'
176-
run: pnpm --filter @superdoc/umd-smoke-test exec playwright install --with-deps chromium
176+
run: pnpm --filter @superdoc/cdn-smoke-test exec playwright install --with-deps chromium
177177

178-
- name: Run UMD smoke test
178+
- name: Run CDN smoke test
179179
if: matrix.name == 'other-packages'
180-
working-directory: packages/superdoc/tests/umd-smoke
180+
working-directory: packages/superdoc/tests/cdn-smoke
181181
run: pnpm test
182182

183183
cli-tests:
@@ -195,7 +195,7 @@ jobs:
195195

196196
- uses: oven-sh/setup-bun@v2
197197
with:
198-
bun-version: 1.3.11
198+
bun-version: 1.3.12
199199

200200
- name: Install dependencies
201201
run: pnpm install
@@ -206,9 +206,34 @@ jobs:
206206
- name: Run CLI tests
207207
run: pnpm run test:cli
208208

209+
coverage:
210+
needs: build
211+
runs-on: ubuntu-latest
212+
steps:
213+
- uses: actions/checkout@v6
214+
215+
- uses: pnpm/action-setup@v4
216+
217+
- uses: actions/setup-node@v6
218+
with:
219+
node-version-file: .nvmrc
220+
cache: pnpm
221+
222+
- run: pnpm install
223+
224+
- run: pnpm --filter @superdoc-dev/superdoc-yjs-collaboration build
225+
226+
- run: pnpm --filter superdoc exec vitest run --coverage
227+
228+
- uses: codecov/codecov-action@v5
229+
with:
230+
token: ${{ secrets.CODECOV_TOKEN }}
231+
files: packages/superdoc/coverage/lcov.info
232+
flags: superdoc
233+
209234
validate:
210235
if: always()
211-
needs: [build, unit-tests, cli-tests]
236+
needs: [build, unit-tests, cli-tests, coverage]
212237
runs-on: ubuntu-latest
213238
steps:
214239
- name: Check results

.github/workflows/release-cli.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -64,7 +64,7 @@ jobs:
6464

6565
- uses: oven-sh/setup-bun@v2
6666
with:
67-
bun-version: 1.3.11
67+
bun-version: 1.3.12
6868

6969
- name: Cache apt packages
7070
uses: actions/cache@v5

.github/workflows/release-sdk.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ jobs:
8989

9090
- uses: oven-sh/setup-bun@v2
9191
with:
92-
bun-version: 1.3.11
92+
bun-version: 1.3.12
9393

9494
- uses: actions/setup-python@v5
9595
with:
@@ -237,7 +237,7 @@ jobs:
237237

238238
- uses: oven-sh/setup-bun@v2
239239
with:
240-
bun-version: 1.3.11
240+
bun-version: 1.3.12
241241

242242
- uses: actions/setup-python@v5
243243
with:

AGENTS.md

Lines changed: 51 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -118,22 +118,70 @@ Many packages use `.js` files with JSDoc `@typedef` for type definitions (e.g.,
118118

119119
## AI Eval Suite
120120

121-
The `evals/` directory contains a Promptfoo-based evaluation suite for validating AI tool call quality.
121+
The `evals/` directory contains a Promptfoo-based evaluation suite with three levels of evaluation.
122+
123+
### Level 1: Deterministic Evals (tool selection + argument accuracy)
122124

123125
| Command | What it does | Cost |
124126
|---------|-------------|------|
125127
| `pnpm --filter @superdoc-testing/evals run eval` | Run deterministic evals (reading + argument tests) | ~$0.30 |
126128
| `pnpm --filter @superdoc-testing/evals run eval:reading` | Run reading tool tests only | ~$0.15 |
127-
| `pnpm --filter @superdoc-testing/evals run eval:gdpval` | Run GDPval benchmark (Model+SuperDoc vs Model-Only) | ~$1-2 |
128129
| `pnpm --filter @superdoc-testing/evals run eval:view` | Open Promptfoo web UI with results | Free |
129130
| `pnpm --filter @superdoc-testing/evals run baseline:save <label>` | Save versioned results snapshot | Free |
130131

131132
Tool definitions are extracted from `packages/sdk/tools/` via `evals/tools/extract.mjs`. Run `pnpm run generate:all` first if SDK artifacts are missing.
132133

133-
Test files are YAML in `evals/tests/`. Each test has a `vars.task` prompt and JavaScript assertions that check tool call structure (Level 1: tool selection + argument accuracy, not execution).
134+
Test files are YAML in `evals/tests/`. Each test has a `vars.task` prompt and JavaScript assertions that check tool call structure (tool selection + argument accuracy, not execution).
134135

135136
The system prompt at `evals/prompts/agent.txt` is a copy of the proven prompt from `examples/eval-demo/lib/agent.ts`. Update both when changing the prompt.
136137

138+
### Level 2: GDPval Benchmark (Model+SuperDoc vs Model-Only)
139+
140+
| Command | What it does | Cost |
141+
|---------|-------------|------|
142+
| `pnpm --filter @superdoc-testing/evals run eval:gdpval` | Run GDPval benchmark | ~$1-2 |
143+
144+
### Level 3: DOCX Agent Benchmark (real agents, real documents)
145+
146+
Runs actual Claude Code and Codex CLIs against DOCX tasks, comparing their performance with and without SuperDoc tools. 4 conditions x 2 agents x N tasks.
147+
148+
**Conditions:**
149+
150+
| Condition | What the agent gets |
151+
|-----------|-------------------|
152+
| baseline | No skill, agent figures out DOCX on its own |
153+
| baseline-with-docx-skill | Anthropic's DOCX skill (unzip + XML editing) |
154+
| superdoc-mcp | SuperDoc MCP server (`superdoc_open`, `superdoc_get_content`, etc.) |
155+
| superdoc-cli | SuperDoc CLI on PATH |
156+
157+
**Tasks:** 3 reading (extract headings, entity names, financial figures) + 3 editing (replace entity name, insert section, fill placeholders).
158+
159+
**Metrics per task:** correctness (pass/fail), collateral (no unintended changes), steps (agent turn count), latency (seconds), tokens (input + output), path (which DOCX approach was used).
160+
161+
| Command | What it does | Cost |
162+
|---------|-------------|------|
163+
| `pnpm --filter @superdoc-testing/evals run eval:benchmark` | Run full benchmark | ~15 min |
164+
| `pnpm --filter @superdoc-testing/evals run eval:benchmark:codex` | Run Codex conditions only | ~8 min |
165+
| `pnpm --filter @superdoc-testing/evals run eval:benchmark:claude` | Run Claude Code conditions only | ~8 min |
166+
| `pnpm --filter @superdoc-testing/evals run eval:benchmark:report` | Generate comparison report (Markdown + CSV) | Free |
167+
168+
**Prerequisites:**
169+
- `OPENAI_API_KEY` in `evals/.env` (for Codex; use `codex login --with-api-key` for API key auth)
170+
- Claude Code installed locally (uses local auth, no API key needed in `.env`)
171+
- MCP server built: `cd apps/mcp && pnpm run build`
172+
- CLI built: check `apps/cli/dist/index.js` exists
173+
174+
**Key files:**
175+
176+
| File | Purpose |
177+
|------|---------|
178+
| `evals/config/benchmark.promptfoo.yaml` | Level 3 Promptfoo config (8 providers) |
179+
| `evals/suites/benchmark/tests/agent-benchmark-v2.yaml` | Benchmark tasks with assertions |
180+
| `evals/providers/claude-code-agent.mjs` | Claude Agent SDK provider |
181+
| `evals/providers/codex-agent.mjs` | Codex SDK provider |
182+
| `evals/suites/benchmark/reports/benchmark-report.mjs` | Markdown + CSV report generator |
183+
| `evals/fixtures/vendor/vendor-docx-skill.md` | Anthropic's DOCX skill for baseline-with-docx-skill condition |
184+
137185
## Generated Artifacts
138186

139187
These directories are produced by `pnpm run generate:all`:

README.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
<div align="center">
1212
<a href="https://www.npmjs.com/package/superdoc" target="_blank"><img src="https://img.shields.io/npm/v/superdoc.svg?color=1355ff" height="22px"></a>
1313
<a href="https://www.npmjs.com/package/superdoc" target="_blank"><img src="https://img.shields.io/npm/dm/superdoc.svg?color=1355ff" height="22px"></a>
14+
<a href="https://codecov.io/gh/superdoc-dev/superdoc" target="_blank"><img src="https://codecov.io/gh/superdoc-dev/superdoc/branch/main/graph/badge.svg" height="22px"></a>
1415
<a href="https://www.gnu.org/licenses/agpl-3.0" target="_blank"><img src="https://img.shields.io/badge/License-AGPL%20v3-1355ff.svg?color=1355ff" height="22px"></a>
1516
<a href="https://github.com/superdoc-dev/superdoc" target="_blank"><img src="https://img.shields.io/github/stars/superdoc-dev/superdoc?style=flat&color=1355ff" height="22px"></a>
1617
<a href="https://discord.com/invite/b9UuaZRyaB" target="_blank"><img src="https://img.shields.io/badge/discord-join-1355ff" height="22px"></a>
@@ -162,6 +163,8 @@ Special thanks to these community members who have contributed code to SuperDoc:
162163
<a href="https://github.com/iguit0"><img src="https://github.com/iguit0.png" width="50" height="50" alt="iguit0" title="Igor Alves" /></a>
163164
<a href="https://github.com/PeterHollens"><img src="https://github.com/PeterHollens.png" width="50" height="50" alt="PeterHollens" title="Peter Hollens" /></a>
164165
<a href="https://github.com/baristaGeek"><img src="https://github.com/baristaGeek.png" width="50" height="50" alt="baristaGeek" title="Esteban Vargas" /></a>
166+
<a href="https://github.com/Anuj52"><img src="https://github.com/Anuj52.png" width="50" height="50" alt="Anuj52" title="Anuj Chaudhary" /></a>
167+
<a href="https://github.com/Abdeltoto"><img src="https://github.com/Abdeltoto.png" width="50" height="50" alt="Abdeltoto" title="Abdel ATIA" /></a>
165168

166169
Want to see your avatar here? Check the [Contributing Guide](CONTRIBUTING.md) to get started.
167170

apps/cli/src/__tests__/lib/context.test.ts

Lines changed: 62 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,68 @@ describe('normalizeContextMetadata', () => {
102102
expect(result.collaboration).toBeUndefined();
103103
});
104104

105+
test('preserves websocket params on rehydration', () => {
106+
const metadata = makeMetadata({
107+
sessionType: 'collab',
108+
collaboration: {
109+
providerType: 'y-websocket',
110+
url: 'ws://localhost:4000',
111+
documentId: 'test-doc',
112+
params: { customAttributions: 'agent_id:abc', region: 'us-east-1' },
113+
} as any,
114+
});
115+
const result = normalizeContextMetadata(metadata);
116+
expect(result.sessionType).toBe('collab');
117+
expect(result.collaboration).toMatchObject({
118+
params: { customAttributions: 'agent_id:abc', region: 'us-east-1' },
119+
});
120+
});
121+
122+
test('preserves websocket profile when params is absent', () => {
123+
const metadata = makeMetadata({
124+
sessionType: 'collab',
125+
collaboration: {
126+
providerType: 'y-websocket',
127+
url: 'ws://localhost:4000',
128+
documentId: 'test-doc',
129+
},
130+
});
131+
const result = normalizeContextMetadata(metadata);
132+
expect(result.sessionType).toBe('collab');
133+
expect(result.collaboration).toBeDefined();
134+
expect((result.collaboration as any).params).toBeUndefined();
135+
});
136+
137+
test('rejects websocket profile with non-object params', () => {
138+
const metadata = makeMetadata({
139+
sessionType: 'collab',
140+
collaboration: {
141+
providerType: 'y-websocket',
142+
url: 'ws://localhost:4000',
143+
documentId: 'test-doc',
144+
params: 'not-an-object',
145+
} as any,
146+
});
147+
const result = normalizeContextMetadata(metadata);
148+
expect(result.sessionType).toBe('local');
149+
expect(result.collaboration).toBeUndefined();
150+
});
151+
152+
test('rejects websocket profile with non-string param values', () => {
153+
const metadata = makeMetadata({
154+
sessionType: 'collab',
155+
collaboration: {
156+
providerType: 'y-websocket',
157+
url: 'ws://localhost:4000',
158+
documentId: 'test-doc',
159+
params: { count: 42 },
160+
} as any,
161+
});
162+
const result = normalizeContextMetadata(metadata);
163+
expect(result.sessionType).toBe('local');
164+
expect(result.collaboration).toBeUndefined();
165+
});
166+
105167
test('preserves Liveblocks collab profile with publicApiKey', () => {
106168
const metadata = makeMetadata({
107169
sessionType: 'collab',

0 commit comments

Comments
 (0)