|
6 | 6 |
|
7 | 7 | permissions: |
8 | 8 | actions: read |
| 9 | + checks: read |
9 | 10 | contents: write |
10 | 11 | pull-requests: write |
11 | 12 |
|
| 13 | +concurrency: |
| 14 | + group: ${{ github.workflow }}-${{ github.event.pull_request.number }} |
| 15 | + cancel-in-progress: true |
| 16 | + |
12 | 17 | jobs: |
13 | | - fix-dependabot: |
| 18 | + regen-lockfile: |
14 | 19 | runs-on: ubuntu-latest |
15 | | - timeout-minutes: 30 |
| 20 | + timeout-minutes: 10 |
| 21 | + outputs: |
| 22 | + skip: ${{ steps.guard.outputs.skip }} |
| 23 | + head_sha: ${{ steps.get-sha.outputs.sha }} |
16 | 24 |
|
17 | 25 | steps: |
18 | 26 | - name: Check if Dependabot PR |
@@ -89,99 +97,223 @@ jobs: |
89 | 97 | echo "changed=true" >> "$GITHUB_OUTPUT" |
90 | 98 | fi |
91 | 99 |
|
92 | | - - name: Try building |
| 100 | + - name: Get HEAD SHA |
93 | 101 | if: steps.guard.outputs.skip != 'true' |
94 | | - id: build |
95 | | - continue-on-error: true |
96 | | - run: | |
97 | | - set -o pipefail |
98 | | - pnpm install --frozen-lockfile |
99 | | - pnpm run build 2>&1 | tee /tmp/build-output.txt |
100 | | -
|
101 | | - - name: Try linting |
102 | | - if: steps.guard.outputs.skip != 'true' && steps.build.outcome == 'success' |
103 | | - id: lint |
104 | | - continue-on-error: true |
105 | | - run: | |
106 | | - set -o pipefail |
107 | | - pnpm exec eslint . 2>&1 | tee /tmp/lint-output.txt |
| 102 | + id: get-sha |
| 103 | + run: echo "sha=$(git rev-parse HEAD)" >> "$GITHUB_OUTPUT" |
108 | 104 |
|
109 | | - - name: Try testing |
110 | | - if: steps.guard.outputs.skip != 'true' && steps.build.outcome == 'success' |
111 | | - id: test |
112 | | - continue-on-error: true |
113 | | - run: | |
114 | | - set -o pipefail |
115 | | - failed=0 |
116 | | - pnpm test:unit 2>&1 | tee /tmp/test-output.txt || failed=1 |
117 | | - pnpm --filter @ably/react-web-cli test 2>&1 | tee -a /tmp/test-output.txt || failed=1 |
118 | | - exit $failed |
| 105 | + fix-failures: |
| 106 | + needs: regen-lockfile |
| 107 | + if: needs.regen-lockfile.outputs.skip != 'true' |
| 108 | + runs-on: ubuntu-latest |
| 109 | + timeout-minutes: 45 |
119 | 110 |
|
120 | | - - name: Check if fixes needed |
121 | | - if: steps.guard.outputs.skip != 'true' |
122 | | - id: needs-fix |
123 | | - run: | |
124 | | - if [[ "${{ steps.build.outcome }}" == "failure" || "${{ steps.lint.outcome }}" == "failure" || "${{ steps.test.outcome }}" == "failure" ]]; then |
125 | | - echo "needed=true" >> "$GITHUB_OUTPUT" |
126 | | - else |
127 | | - echo "needed=false" >> "$GITHUB_OUTPUT" |
128 | | - fi |
| 111 | + steps: |
| 112 | + - name: Generate App Token |
| 113 | + id: generate-token |
| 114 | + uses: actions/create-github-app-token@v3 |
| 115 | + with: |
| 116 | + app-id: ${{ secrets.CI_APP_ID }} |
| 117 | + private-key: ${{ secrets.CI_APP_PRIVATE_KEY }} |
129 | 118 |
|
130 | | - - name: Capture error output |
131 | | - if: steps.needs-fix.outputs.needed == 'true' |
132 | | - id: errors |
| 119 | + - name: Wait for CI checks to complete |
| 120 | + id: wait-for-checks |
| 121 | + env: |
| 122 | + GH_TOKEN: ${{ github.token }} |
| 123 | + HEAD_SHA: ${{ needs.regen-lockfile.outputs.head_sha }} |
| 124 | + REPO: ${{ github.repository }} |
133 | 125 | run: | |
134 | | - { |
135 | | - echo "build_output<<ENDOFOUTPUT" |
136 | | - if [ -f /tmp/build-output.txt ]; then |
137 | | - tail -n 200 /tmp/build-output.txt |
138 | | - else |
139 | | - echo "No build output captured" |
| 126 | + # Default output so downstream steps have a defined value even if this step fails |
| 127 | + echo "failed_count=0" >> "$GITHUB_OUTPUT" |
| 128 | +
|
| 129 | + POLL_INTERVAL=30 |
| 130 | + MAX_POLL_TIME=1500 # 25 minutes |
| 131 | + INITIAL_WAIT=60 |
| 132 | +
|
| 133 | + # Checks to skip: our own workflow jobs, Vercel (prefix match), PR tooling |
| 134 | + # NOTE: keep in sync — if you rename jobs in other workflows, update here |
| 135 | + SKIP_PATTERN="^(regen-lockfile|fix-failures|Vercel.*|claude-review|generate-overview|Generate PR Overview)$" |
| 136 | +
|
| 137 | + # Expected CI checks and their source workflows: |
| 138 | + # test -> test.yml (unit + lint + integration) |
| 139 | + # e2e-cli -> e2e-tests.yml (CLI E2E) |
| 140 | + # audit -> audit.yml (security audit) |
| 141 | + # setup -> e2e-web-cli-parallel.yml (Web CLI E2E build prep) |
| 142 | + EXPECTED_CHECKS=("test" "e2e-cli" "setup" "audit") |
| 143 | + MIN_EXPECTED=3 |
| 144 | +
|
| 145 | + echo "Waiting for CI checks on SHA: $HEAD_SHA" |
| 146 | + echo "SHA source: regen-lockfile job output (may be a new commit if lockfile was pushed)" |
| 147 | + echo "Initial wait of ${INITIAL_WAIT}s for checks to be queued..." |
| 148 | + sleep "$INITIAL_WAIT" |
| 149 | +
|
| 150 | + start_time=$(date +%s) |
| 151 | +
|
| 152 | + while true; do |
| 153 | + elapsed=$(( $(date +%s) - start_time )) |
| 154 | + if [[ $elapsed -ge $MAX_POLL_TIME ]]; then |
| 155 | + echo "::warning::Timed out after ${MAX_POLL_TIME}s waiting for checks" |
| 156 | + if [[ -n "$ci_checks" ]]; then |
| 157 | + still_pending=$(echo "$ci_checks" | jq -c 'select(.status != "completed")' | jq -r '.name' 2>/dev/null || true) |
| 158 | + if [[ -n "$still_pending" ]]; then |
| 159 | + echo "::warning::Still pending at timeout: ${still_pending}" |
| 160 | + fi |
| 161 | + fi |
| 162 | + break |
140 | 163 | fi |
141 | | - echo "ENDOFOUTPUT" |
142 | | - echo "lint_output<<ENDOFOUTPUT" |
143 | | - if [ -f /tmp/lint-output.txt ]; then |
144 | | - tail -n 200 /tmp/lint-output.txt |
145 | | - else |
146 | | - echo "Lint was not run" |
| 164 | +
|
| 165 | + # Fetch all check runs for this SHA (handles pagination) |
| 166 | + all_checks=$(gh api "repos/${REPO}/commits/${HEAD_SHA}/check-runs" \ |
| 167 | + --paginate \ |
| 168 | + --jq '.check_runs[] | {name: .name, status: .status, conclusion: .conclusion, details_url: .details_url}' \ |
| 169 | + 2>/dev/null) || { |
| 170 | + echo "::warning::API call failed (elapsed: ${elapsed}s), retrying in 10s..." |
| 171 | + sleep 10 |
| 172 | + continue |
| 173 | + } |
| 174 | +
|
| 175 | + # Filter out non-CI checks |
| 176 | + ci_checks=$(echo "$all_checks" | jq -c "select(.name | test(\"${SKIP_PATTERN}\") | not)" 2>/dev/null) |
| 177 | +
|
| 178 | + if [[ -z "$ci_checks" ]]; then |
| 179 | + echo "No CI checks found yet (elapsed: ${elapsed}s), waiting..." |
| 180 | + sleep "$POLL_INTERVAL" |
| 181 | + continue |
| 182 | + fi |
| 183 | +
|
| 184 | + # Count how many expected checks have appeared |
| 185 | + appeared=0 |
| 186 | + for check_name in "${EXPECTED_CHECKS[@]}"; do |
| 187 | + if echo "$ci_checks" | jq -e "select(.name == \"${check_name}\")" > /dev/null 2>&1; then |
| 188 | + appeared=$((appeared + 1)) |
| 189 | + fi |
| 190 | + done |
| 191 | +
|
| 192 | + if [[ $appeared -lt $MIN_EXPECTED && $elapsed -lt 300 ]]; then |
| 193 | + echo "Only ${appeared}/${MIN_EXPECTED} expected checks appeared (elapsed: ${elapsed}s), waiting..." |
| 194 | + sleep "$POLL_INTERVAL" |
| 195 | + continue |
147 | 196 | fi |
148 | | - echo "ENDOFOUTPUT" |
149 | | - echo "test_output<<ENDOFOUTPUT" |
150 | | - if [ -f /tmp/test-output.txt ]; then |
151 | | - tail -n 200 /tmp/test-output.txt |
152 | | - else |
153 | | - echo "Tests were not run" |
| 197 | +
|
| 198 | + # Check if all CI checks are completed |
| 199 | + total=$(echo "$ci_checks" | jq -s 'length') |
| 200 | + pending=$(echo "$ci_checks" | jq -c 'select(.status != "completed")' | jq -s 'length') |
| 201 | +
|
| 202 | + echo "Check status: $((total - pending))/${total} completed (elapsed: ${elapsed}s)" |
| 203 | +
|
| 204 | + if [[ "$pending" -eq 0 && "$total" -gt 0 ]]; then |
| 205 | + echo "All CI checks completed." |
| 206 | + break |
154 | 207 | fi |
155 | | - echo "ENDOFOUTPUT" |
| 208 | +
|
| 209 | + sleep "$POLL_INTERVAL" |
| 210 | + done |
| 211 | +
|
| 212 | + # Fail explicitly if we timed out without ever receiving check data |
| 213 | + if [[ $elapsed -ge $MAX_POLL_TIME && -z "$ci_checks" ]]; then |
| 214 | + echo "::error::Timed out waiting for CI checks — no check data received" |
| 215 | + exit 1 |
| 216 | + fi |
| 217 | +
|
| 218 | + # Collect failures (include cancelled — usually means an upstream job failed) |
| 219 | + failed_checks=$(echo "$ci_checks" | jq -c 'select(.conclusion == "failure" or .conclusion == "cancelled")' 2>/dev/null) |
| 220 | + failed_count=0 |
| 221 | + if [[ -n "$failed_checks" ]]; then |
| 222 | + failed_count=$(echo "$failed_checks" | jq -s 'length') |
| 223 | + fi |
| 224 | +
|
| 225 | + echo "failed_count=${failed_count}" >> "$GITHUB_OUTPUT" |
| 226 | +
|
| 227 | + if [[ "$failed_count" -eq 0 ]]; then |
| 228 | + echo "All checks passed! Nothing to fix." |
| 229 | + exit 0 |
| 230 | + fi |
| 231 | +
|
| 232 | + echo "Found ${failed_count} failed check(s)" |
| 233 | +
|
| 234 | + # List failed check names |
| 235 | + failed_names=$(echo "$failed_checks" | jq -r '.name' | sort) |
| 236 | + echo "Failed: ${failed_names}" |
| 237 | +
|
| 238 | + # Extract unique workflow run IDs from details_url |
| 239 | + # URL format: https://github.com/{owner}/{repo}/actions/runs/{run_id}/job/{job_id} |
| 240 | + run_ids=$(echo "$failed_checks" | jq -r '.details_url' | sed -n 's|.*/runs/\([0-9]*\)/.*|\1|p' | sort -u) |
| 241 | +
|
| 242 | + # Fetch failed logs for each workflow run |
| 243 | + failure_logs="" |
| 244 | + for run_id in $run_ids; do |
| 245 | + run_name=$(gh api "repos/${REPO}/actions/runs/${run_id}" --jq '.name' 2>/dev/null || echo "unknown") |
| 246 | + run_url="https://github.com/${REPO}/actions/runs/${run_id}" |
| 247 | + echo "Fetching failed logs for: ${run_name} (run ${run_id})..." |
| 248 | + logs=$(gh run view "$run_id" --repo "$REPO" --log-failed 2>&1 | tail -n 500) || logs="Failed to fetch logs. View manually: ${run_url}" |
| 249 | +
|
| 250 | + failure_logs="${failure_logs} |
| 251 | + === Failed workflow: ${run_name} (run ${run_id}) === |
| 252 | + URL: ${run_url} |
| 253 | + ${logs} |
| 254 | +
|
| 255 | + " |
| 256 | + done |
| 257 | +
|
| 258 | + # Write outputs using randomised delimiters to avoid collision with log content |
| 259 | + delim_summary="EOF_$(openssl rand -hex 16)" |
| 260 | + delim_logs="EOF_$(openssl rand -hex 16)" |
| 261 | + { |
| 262 | + echo "failure_summary<<${delim_summary}" |
| 263 | + echo "Failed checks: $(echo "$failed_names" | tr '\n' ', ' | sed 's/, $//')" |
| 264 | + echo "${delim_summary}" |
| 265 | + echo "failure_logs<<${delim_logs}" |
| 266 | + echo "$failure_logs" |
| 267 | + echo "${delim_logs}" |
156 | 268 | } >> "$GITHUB_OUTPUT" |
157 | 269 |
|
158 | | - - name: Fix issues with Claude |
159 | | - if: steps.needs-fix.outputs.needed == 'true' |
| 270 | + - name: Checkout Dependabot branch |
| 271 | + if: steps.wait-for-checks.outputs.failed_count > 0 |
| 272 | + uses: actions/checkout@v6 |
| 273 | + with: |
| 274 | + ref: ${{ github.event.pull_request.head.ref }} |
| 275 | + token: ${{ steps.generate-token.outputs.token }} |
| 276 | + |
| 277 | + - name: Set up pnpm |
| 278 | + if: steps.wait-for-checks.outputs.failed_count > 0 |
| 279 | + uses: pnpm/action-setup@v5 |
| 280 | + with: |
| 281 | + version: 10 |
| 282 | + |
| 283 | + - name: Set up Node.js |
| 284 | + if: steps.wait-for-checks.outputs.failed_count > 0 |
| 285 | + uses: actions/setup-node@v6 |
| 286 | + with: |
| 287 | + node-version: "22.x" |
| 288 | + |
| 289 | + - name: Install dependencies |
| 290 | + if: steps.wait-for-checks.outputs.failed_count > 0 |
| 291 | + run: pnpm install --frozen-lockfile |
| 292 | + |
| 293 | + - name: Fix failures with Claude |
| 294 | + if: steps.wait-for-checks.outputs.failed_count > 0 |
160 | 295 | uses: anthropics/claude-code-action@v1 |
161 | 296 | with: |
162 | 297 | anthropic_api_key: ${{ secrets.ANTHROPIC_API_KEY }} |
163 | 298 | github_token: ${{ steps.generate-token.outputs.token }} |
164 | 299 | allowed_bots: "dependabot[bot]" |
165 | 300 | prompt: | |
166 | 301 | This is a Dependabot PR that bumps dependencies. The lockfile has been |
167 | | - regenerated but the build, lint, or tests are failing. |
| 302 | + regenerated, but CI checks are failing. |
168 | 303 |
|
169 | 304 | Read .claude/CLAUDE.md for project context. |
170 | 305 |
|
171 | | - ## Errors |
| 306 | + ## Failed Checks |
172 | 307 |
|
173 | | - Build output (if failed): |
174 | | - ${{ steps.errors.outputs.build_output }} |
| 308 | + ${{ steps.wait-for-checks.outputs.failure_summary }} |
175 | 309 |
|
176 | | - Lint output (if failed): |
177 | | - ${{ steps.errors.outputs.lint_output }} |
| 310 | + ## Failure Logs |
178 | 311 |
|
179 | | - Test output (if failed): |
180 | | - ${{ steps.errors.outputs.test_output }} |
| 312 | + ${{ steps.wait-for-checks.outputs.failure_logs }} |
181 | 313 |
|
182 | 314 | ## Instructions |
183 | 315 |
|
184 | | - 1. Diagnose why the build/lint/tests fail after the dependency bump |
| 316 | + 1. Analyze ALL the failure logs above to understand what broke |
185 | 317 | 2. Make the MINIMUM changes needed to fix it — do not refactor unrelated code |
186 | 318 | 3. Run `pnpm run build`, `pnpm exec eslint .`, `pnpm test:unit`, and `pnpm --filter @ably/react-web-cli test` to verify your fixes |
187 | 319 | 4. Commit your changes with a descriptive message |
|
0 commit comments