superdoc-dev
diff --git a/‎.github/scripts/package-lock.json‎
Lines changed: 11 additions & 11 deletions b/‎.github/scripts/package-lock.json‎
Lines changed: 11 additions & 11 deletions
diff --git a/‎.github/workflows/ci-coverage.yml‎
Lines changed: 41 additions & 0 deletions b/‎.github/workflows/ci-coverage.yml‎
Lines changed: 41 additions & 0 deletions
diff --git a/‎.github/workflows/ci-superdoc.yml‎
Lines changed: 33 additions & 8 deletions b/‎.github/workflows/ci-superdoc.yml‎
Lines changed: 33 additions & 8 deletions
diff --git a/‎.github/workflows/release-cli.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/release-cli.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎.github/workflows/release-sdk.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/release-sdk.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎AGENTS.md‎
Lines changed: 51 additions & 3 deletions b/‎AGENTS.md‎
Lines changed: 51 additions & 3 deletions
diff --git a/‎README.md‎
Lines changed: 3 additions & 0 deletions b/‎README.md‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎apps/cli/src/__tests__/lib/context.test.ts‎
Lines changed: 62 additions & 0 deletions b/‎apps/cli/src/__tests__/lib/context.test.ts‎
Lines changed: 62 additions & 0 deletions
@@ -0,0 +1,41 @@
+name: Coverage
+
+permissions:
+  contents: read
+
+on:
+  push:
+    branches: [main]
+    paths:
+      - 'packages/superdoc/src/**'
+      - 'packages/superdoc/vite.config.js'
+  workflow_dispatch:
+
+concurrency:
+  group: coverage-${{ github.sha }}
+  cancel-in-progress: true
+
+jobs:
+  coverage:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: pnpm/action-setup@v4
+
+      - uses: actions/setup-node@v6
+        with:
+          node-version-file: .nvmrc
+          cache: pnpm
+
+      - run: pnpm install
+
+      - run: pnpm --filter @superdoc-dev/superdoc-yjs-collaboration build
+
+      - run: pnpm --filter superdoc exec vitest run --coverage
+
+      - uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          files: packages/superdoc/coverage/lcov.info
+          flags: superdoc
@@ -40,7 +40,7 @@ jobs:
 
       - uses: oven-sh/setup-bun@v2
         with:
-          bun-version: 1.3.11
+          bun-version: 1.3.12
 
       - name: Install canvas system dependencies
         run: |
@@ -108,7 +108,7 @@ jobs:
 
       - uses: oven-sh/setup-bun@v2
         with:
-          bun-version: 1.3.11
+          bun-version: 1.3.12
 
       - name: Install canvas system dependencies
         run: |
@@ -171,13 +171,13 @@ jobs:
         if: matrix.name == 'other-packages'
         run: pnpm test:slow
 
-      - name: Install Playwright for UMD smoke test
+      - name: Install Playwright for CDN smoke test
         if: matrix.name == 'other-packages'
-        run: pnpm --filter @superdoc/umd-smoke-test exec playwright install --with-deps chromium
+        run: pnpm --filter @superdoc/cdn-smoke-test exec playwright install --with-deps chromium
 
-      - name: Run UMD smoke test
+      - name: Run CDN smoke test
         if: matrix.name == 'other-packages'
-        working-directory: packages/superdoc/tests/umd-smoke
+        working-directory: packages/superdoc/tests/cdn-smoke
         run: pnpm test
 
   cli-tests:
@@ -195,7 +195,7 @@ jobs:
 
       - uses: oven-sh/setup-bun@v2
         with:
-          bun-version: 1.3.11
+          bun-version: 1.3.12
 
       - name: Install dependencies
         run: pnpm install
@@ -206,9 +206,34 @@ jobs:
       - name: Run CLI tests
         run: pnpm run test:cli
 
+  coverage:
+    needs: build
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v6
+
+      - uses: pnpm/action-setup@v4
+
+      - uses: actions/setup-node@v6
+        with:
+          node-version-file: .nvmrc
+          cache: pnpm
+
+      - run: pnpm install
+
+      - run: pnpm --filter @superdoc-dev/superdoc-yjs-collaboration build
+
+      - run: pnpm --filter superdoc exec vitest run --coverage
+
+      - uses: codecov/codecov-action@v5
+        with:
+          token: ${{ secrets.CODECOV_TOKEN }}
+          files: packages/superdoc/coverage/lcov.info
+          flags: superdoc
+
   validate:
     if: always()
-    needs: [build, unit-tests, cli-tests]
+    needs: [build, unit-tests, cli-tests, coverage]
     runs-on: ubuntu-latest
     steps:
       - name: Check results
 
@@ -64,7 +64,7 @@ jobs:
 
       - uses: oven-sh/setup-bun@v2
         with:
-          bun-version: 1.3.11
+          bun-version: 1.3.12
 
       - name: Cache apt packages
         uses: actions/cache@v5
 
@@ -89,7 +89,7 @@ jobs:
 
       - uses: oven-sh/setup-bun@v2
         with:
-          bun-version: 1.3.11
+          bun-version: 1.3.12
 
       - uses: actions/setup-python@v5
         with:
@@ -237,7 +237,7 @@ jobs:
 
       - uses: oven-sh/setup-bun@v2
         with:
-          bun-version: 1.3.11
+          bun-version: 1.3.12
 
       - uses: actions/setup-python@v5
         with:
 
@@ -118,22 +118,70 @@ Many packages use `.js` files with JSDoc `@typedef` for type definitions (e.g.,
 
 ## AI Eval Suite
 
-The `evals/` directory contains a Promptfoo-based evaluation suite for validating AI tool call quality.
+The `evals/` directory contains a Promptfoo-based evaluation suite with three levels of evaluation.
+
+### Level 1: Deterministic Evals (tool selection + argument accuracy)
 
 | Command | What it does | Cost |
 |---------|-------------|------|
 | `pnpm --filter @superdoc-testing/evals run eval` | Run deterministic evals (reading + argument tests) | ~$0.30 |
 | `pnpm --filter @superdoc-testing/evals run eval:reading` | Run reading tool tests only | ~$0.15 |
-| `pnpm --filter @superdoc-testing/evals run eval:gdpval` | Run GDPval benchmark (Model+SuperDoc vs Model-Only) | ~$1-2 |
 | `pnpm --filter @superdoc-testing/evals run eval:view` | Open Promptfoo web UI with results | Free |
 | `pnpm --filter @superdoc-testing/evals run baseline:save <label>` | Save versioned results snapshot | Free |
 
 Tool definitions are extracted from `packages/sdk/tools/` via `evals/tools/extract.mjs`. Run `pnpm run generate:all` first if SDK artifacts are missing.
 
-Test files are YAML in `evals/tests/`. Each test has a `vars.task` prompt and JavaScript assertions that check tool call structure (Level 1: tool selection + argument accuracy, not execution).
+Test files are YAML in `evals/tests/`. Each test has a `vars.task` prompt and JavaScript assertions that check tool call structure (tool selection + argument accuracy, not execution).
 
 The system prompt at `evals/prompts/agent.txt` is a copy of the proven prompt from `examples/eval-demo/lib/agent.ts`. Update both when changing the prompt.
 
+### Level 2: GDPval Benchmark (Model+SuperDoc vs Model-Only)
+
+| Command | What it does | Cost |
+|---------|-------------|------|
+| `pnpm --filter @superdoc-testing/evals run eval:gdpval` | Run GDPval benchmark | ~$1-2 |
+
+### Level 3: DOCX Agent Benchmark (real agents, real documents)
+
+Runs actual Claude Code and Codex CLIs against DOCX tasks, comparing their performance with and without SuperDoc tools. 4 conditions x 2 agents x N tasks.
+
+**Conditions:**
+
+| Condition | What the agent gets |
+|-----------|-------------------|
+| baseline | No skill, agent figures out DOCX on its own |
+| baseline-with-docx-skill | Anthropic's DOCX skill (unzip + XML editing) |
+| superdoc-mcp | SuperDoc MCP server (`superdoc_open`, `superdoc_get_content`, etc.) |
+| superdoc-cli | SuperDoc CLI on PATH |
+
+**Tasks:** 3 reading (extract headings, entity names, financial figures) + 3 editing (replace entity name, insert section, fill placeholders).
+
+**Metrics per task:** correctness (pass/fail), collateral (no unintended changes), steps (agent turn count), latency (seconds), tokens (input + output), path (which DOCX approach was used).
+
+| Command | What it does | Cost |
+|---------|-------------|------|
+| `pnpm --filter @superdoc-testing/evals run eval:benchmark` | Run full benchmark | ~15 min |
+| `pnpm --filter @superdoc-testing/evals run eval:benchmark:codex` | Run Codex conditions only | ~8 min |
+| `pnpm --filter @superdoc-testing/evals run eval:benchmark:claude` | Run Claude Code conditions only | ~8 min |
+| `pnpm --filter @superdoc-testing/evals run eval:benchmark:report` | Generate comparison report (Markdown + CSV) | Free |
+
+**Prerequisites:**
+- `OPENAI_API_KEY` in `evals/.env` (for Codex; use `codex login --with-api-key` for API key auth)
+- Claude Code installed locally (uses local auth, no API key needed in `.env`)
+- MCP server built: `cd apps/mcp && pnpm run build`
+- CLI built: check `apps/cli/dist/index.js` exists
+
+**Key files:**
+
+| File | Purpose |
+|------|---------|
+| `evals/config/benchmark.promptfoo.yaml` | Level 3 Promptfoo config (8 providers) |
+| `evals/suites/benchmark/tests/agent-benchmark-v2.yaml` | Benchmark tasks with assertions |
+| `evals/providers/claude-code-agent.mjs` | Claude Agent SDK provider |
+| `evals/providers/codex-agent.mjs` | Codex SDK provider |
+| `evals/suites/benchmark/reports/benchmark-report.mjs` | Markdown + CSV report generator |
+| `evals/fixtures/vendor/vendor-docx-skill.md` | Anthropic's DOCX skill for baseline-with-docx-skill condition |
+
 ## Generated Artifacts
 
 These directories are produced by `pnpm run generate:all`:
 
@@ -11,6 +11,7 @@
 <div align="center">
   <a href="https://www.npmjs.com/package/superdoc" target="_blank"><img src="https://img.shields.io/npm/v/superdoc.svg?color=1355ff" height="22px"></a>
   <a href="https://www.npmjs.com/package/superdoc" target="_blank"><img src="https://img.shields.io/npm/dm/superdoc.svg?color=1355ff" height="22px"></a>
+  <a href="https://codecov.io/gh/superdoc-dev/superdoc" target="_blank"><img src="https://codecov.io/gh/superdoc-dev/superdoc/branch/main/graph/badge.svg" height="22px"></a>
   <a href="https://www.gnu.org/licenses/agpl-3.0" target="_blank"><img src="https://img.shields.io/badge/License-AGPL%20v3-1355ff.svg?color=1355ff" height="22px"></a>
   <a href="https://github.com/superdoc-dev/superdoc" target="_blank"><img src="https://img.shields.io/github/stars/superdoc-dev/superdoc?style=flat&color=1355ff" height="22px"></a>
   <a href="https://discord.com/invite/b9UuaZRyaB" target="_blank"><img src="https://img.shields.io/badge/discord-join-1355ff" height="22px"></a>
@@ -162,6 +163,8 @@ Special thanks to these community members who have contributed code to SuperDoc:
 <a href="https://github.com/iguit0"><img src="https://github.com/iguit0.png" width="50" height="50" alt="iguit0" title="Igor Alves" /></a>
 <a href="https://github.com/PeterHollens"><img src="https://github.com/PeterHollens.png" width="50" height="50" alt="PeterHollens" title="Peter Hollens" /></a>
 <a href="https://github.com/baristaGeek"><img src="https://github.com/baristaGeek.png" width="50" height="50" alt="baristaGeek" title="Esteban Vargas" /></a>
+<a href="https://github.com/Anuj52"><img src="https://github.com/Anuj52.png" width="50" height="50" alt="Anuj52" title="Anuj Chaudhary" /></a>
+<a href="https://github.com/Abdeltoto"><img src="https://github.com/Abdeltoto.png" width="50" height="50" alt="Abdeltoto" title="Abdel ATIA" /></a>
 
 Want to see your avatar here? Check the [Contributing Guide](CONTRIBUTING.md) to get started.
 
 
@@ -102,6 +102,68 @@ describe('normalizeContextMetadata', () => {
       expect(result.collaboration).toBeUndefined();
     });
 
+    test('preserves websocket params on rehydration', () => {
+      const metadata = makeMetadata({
+        sessionType: 'collab',
+        collaboration: {
+          providerType: 'y-websocket',
+          url: 'ws://localhost:4000',
+          documentId: 'test-doc',
+          params: { customAttributions: 'agent_id:abc', region: 'us-east-1' },
+        } as any,
+      });
+      const result = normalizeContextMetadata(metadata);
+      expect(result.sessionType).toBe('collab');
+      expect(result.collaboration).toMatchObject({
+        params: { customAttributions: 'agent_id:abc', region: 'us-east-1' },
+      });
+    });
+
+    test('preserves websocket profile when params is absent', () => {
+      const metadata = makeMetadata({
+        sessionType: 'collab',
+        collaboration: {
+          providerType: 'y-websocket',
+          url: 'ws://localhost:4000',
+          documentId: 'test-doc',
+        },
+      });
+      const result = normalizeContextMetadata(metadata);
+      expect(result.sessionType).toBe('collab');
+      expect(result.collaboration).toBeDefined();
+      expect((result.collaboration as any).params).toBeUndefined();
+    });
+
+    test('rejects websocket profile with non-object params', () => {
+      const metadata = makeMetadata({
+        sessionType: 'collab',
+        collaboration: {
+          providerType: 'y-websocket',
+          url: 'ws://localhost:4000',
+          documentId: 'test-doc',
+          params: 'not-an-object',
+        } as any,
+      });
+      const result = normalizeContextMetadata(metadata);
+      expect(result.sessionType).toBe('local');
+      expect(result.collaboration).toBeUndefined();
+    });
+
+    test('rejects websocket profile with non-string param values', () => {
+      const metadata = makeMetadata({
+        sessionType: 'collab',
+        collaboration: {
+          providerType: 'y-websocket',
+          url: 'ws://localhost:4000',
+          documentId: 'test-doc',
+          params: { count: 42 },
+        } as any,
+      });
+      const result = normalizeContextMetadata(metadata);
+      expect(result.sessionType).toBe('local');
+      expect(result.collaboration).toBeUndefined();
+    });
+
     test('preserves Liveblocks collab profile with publicApiKey', () => {
       const metadata = makeMetadata({
         sessionType: 'collab',