Skip to content

Commit 93cc727

Browse files
authored
Merge branch 'main' into jhcipar/AE-1371/worker-concurrency-new-jobs
2 parents cc2e461 + 3a5f8c7 commit 93cc727

17 files changed

Lines changed: 572 additions & 105 deletions

File tree

.github/workflows/CI-e2e.yml

Lines changed: 23 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -1,93 +1,38 @@
1-
# Performs a full test of the package within production environment.
2-
3-
name: CI | End-to-End Runpod Python Tests
4-
1+
name: CI-e2e
52
on:
63
push:
7-
branches:
8-
- main
9-
4+
branches: [main]
105
pull_request:
11-
branches:
12-
- main
13-
6+
branches: [main]
147
workflow_dispatch:
158

169
jobs:
17-
e2e-build:
18-
name: Build and push mock-worker Docker image
10+
e2e:
1911
if: github.repository == 'runpod/runpod-python'
2012
runs-on: ubuntu-latest
21-
outputs:
22-
docker_tag: ${{ steps.output_docker_tag.outputs.docker_tag }}
23-
13+
timeout-minutes: 15
2414
steps:
25-
- name: Checkout Repo
26-
uses: actions/checkout@v4
27-
with:
28-
fetch-depth: 2
29-
30-
- name: Clone and patch mock-worker
31-
run: |
32-
git clone https://github.com/runpod-workers/mock-worker
33-
GIT_SHA=${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
34-
echo "git+https://github.com/runpod/runpod-python.git@$GIT_SHA" > mock-worker/builder/requirements.txt
35-
36-
- name: Set up QEMU
37-
uses: docker/setup-qemu-action@v3
15+
- uses: actions/checkout@v4
3816

39-
- name: Set up Docker Buildx
40-
uses: docker/setup-buildx-action@v3
17+
- uses: astral-sh/setup-uv@v6
4118

42-
- name: Login to Docker Hub
43-
uses: docker/login-action@v3
19+
- uses: actions/setup-python@v5
4420
with:
45-
username: ${{ secrets.DOCKERHUB_USERNAME }}
46-
password: ${{ secrets.DOCKERHUB_TOKEN }}
21+
python-version: "3.12"
4722

48-
- name: Define Docker Tag
49-
id: docker_tag
23+
- name: Install dependencies
5024
run: |
51-
DOCKER_TAG=${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}
52-
echo "DOCKER_TAG=$(echo $DOCKER_TAG | cut -c 1-7)" >> $GITHUB_ENV
53-
54-
- name: Set Docker Tag as Output
55-
id: output_docker_tag
56-
run: echo "docker_tag=${{ env.DOCKER_TAG }}" >> $GITHUB_OUTPUT
57-
58-
- name: Build and push Docker image
59-
uses: docker/build-push-action@v6
60-
with:
61-
context: ./mock-worker
62-
file: ./mock-worker/Dockerfile
63-
push: true
64-
tags: ${{ vars.DOCKERHUB_REPO }}/${{ vars.DOCKERHUB_IMG }}:${{ env.DOCKER_TAG }}
65-
cache-from: type=gha
66-
cache-to: type=gha,mode=max
67-
68-
test:
69-
name: Run End-to-End Tests
70-
runs-on: ubuntu-latest
71-
needs: [e2e-build]
72-
73-
steps:
74-
- uses: actions/checkout@v4
75-
76-
- name: Run Tests
77-
id: run-tests
78-
uses: runpod/runpod-test-runner@v2.1.0
79-
with:
80-
image-tag: ${{ vars.DOCKERHUB_REPO }}/${{ vars.DOCKERHUB_IMG }}:${{ needs.e2e-build.outputs.docker_tag }}
81-
runpod-api-key: ${{ secrets.RUNPOD_API_KEY }}
82-
request-timeout: 1200
83-
84-
- name: Verify Tests
85-
env:
86-
TOTAL_TESTS: ${{ steps.run-tests.outputs.total-tests }}
87-
SUCCESSFUL_TESTS: ${{ steps.run-tests.outputs.succeeded }}
25+
uv venv
26+
source .venv/bin/activate
27+
uv pip install -e ".[test]" --quiet || uv pip install -e .
28+
uv pip install runpod-flash pytest pytest-asyncio pytest-timeout pytest-rerunfailures httpx
29+
uv pip install -e . --reinstall --no-deps
30+
python -c "import runpod; print(f'runpod: {runpod.__version__} from {runpod.__file__}')"
31+
32+
- name: Run e2e tests
8833
run: |
89-
echo "Total tests: $TOTAL_TESTS"
90-
echo "Successful tests: $SUCCESSFUL_TESTS"
91-
if [ "$TOTAL_TESTS" != "$SUCCESSFUL_TESTS" ]; then
92-
exit 1
93-
fi
34+
source .venv/bin/activate
35+
pytest tests/e2e/ -v -p no:xdist --timeout=600 --reruns 1 --reruns-delay 5 --log-cli-level=INFO -o "addopts="
36+
env:
37+
RUNPOD_API_KEY: ${{ secrets.RUNPOD_API_KEY }}
38+
RUNPOD_SDK_GIT_REF: ${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}

CHANGELOG.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,13 @@
11
# Changelog
22

3+
## [1.8.2](https://github.com/runpod/runpod-python/compare/v1.8.1...v1.8.2) (2026-03-24)
4+
5+
6+
### Bug Fixes
7+
8+
* **config:** handle corrupted config.toml in credential functions ([#481](https://github.com/runpod/runpod-python/issues/481)) ([9894894](https://github.com/runpod/runpod-python/commit/9894894ee2022e7db0777c9dd24c23208e52f90c))
9+
* use flashBootType instead of appending -fb ([#484](https://github.com/runpod/runpod-python/issues/484)) ([7938936](https://github.com/runpod/runpod-python/commit/7938936158c351d6e00caebbf4242e085f7565ae))
10+
311
## [1.8.1](https://github.com/runpod/runpod-python/compare/v1.8.0...v1.8.1) (2025-11-19)
412

513

pytest.ini

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
[pytest]
22
addopts = --durations=10 --cov-config=.coveragerc --timeout=120 --timeout_method=thread --cov=runpod --cov-report=xml --cov-report=term-missing --cov-fail-under=90 -W error -p no:cacheprovider -p no:unraisableexception
33
python_files = tests.py test_*.py *_test.py
4-
norecursedirs = venv *.egg-info .git build
4+
norecursedirs = venv *.egg-info .git build tests/e2e
55
asyncio_mode = auto
6+
markers =
7+
qb: Queue-based tests (local execution, fast)
8+
lb: Load-balanced tests (remote provisioning, slow)
9+
cold_start: Cold start benchmark (starts own server)

runpod/api/mutations/endpoints.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
""" Runpod | API Wrapper | Mutations | Endpoints """
1+
f"""Runpod | API Wrapper | Mutations | Endpoints"""
22

33
# pylint: disable=too-many-arguments
44

@@ -23,7 +23,7 @@ def generate_endpoint_mutation(
2323

2424
# ------------------------------ Required Fields ----------------------------- #
2525
if flashboot:
26-
name = name + "-fb"
26+
input_fields.append('flashBootType: "FLASHBOOT"')
2727

2828
input_fields.append(f'name: "{name}"')
2929
input_fields.append(f'templateId: "{template_id}"')
@@ -75,12 +75,12 @@ def generate_endpoint_mutation(
7575
workersMax
7676
allowedCudaVersions
7777
gpuCount
78+
flashBootType
7879
}}
7980
}}
8081
"""
8182

8283

83-
8484
def update_endpoint_template_mutation(endpoint_id: str, template_id: str):
8585
"""Generate a string for a GraphQL mutation to update an existing endpoint's template."""
8686
input_fields = []

runpod/cli/groups/config/functions.py

Lines changed: 17 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,15 @@ def set_credentials(api_key: str, profile: str = "default", overwrite=False) ->
3131
Path(CREDENTIAL_FILE).touch(exist_ok=True)
3232

3333
if not overwrite:
34-
with open(CREDENTIAL_FILE, "rb") as cred_file:
35-
if profile in toml.load(cred_file):
36-
raise ValueError(
37-
"Profile already exists. Use `update_credentials` instead."
38-
)
34+
try:
35+
with open(CREDENTIAL_FILE, "rb") as cred_file:
36+
existing = toml.load(cred_file)
37+
except (TypeError, ValueError):
38+
existing = {}
39+
if profile in existing:
40+
raise ValueError(
41+
"Profile already exists. Use `update_credentials` instead."
42+
)
3943

4044
with open(CREDENTIAL_FILE, "w", encoding="UTF-8") as cred_file:
4145
cred_file.write("[" + profile + "]\n")
@@ -72,12 +76,18 @@ def check_credentials(profile: str = "default"):
7276
def get_credentials(profile="default"):
7377
"""
7478
Returns the credentials for the specified profile from ~/.runpod/config.toml
79+
80+
Returns None if the file does not exist, is not valid TOML, or does not
81+
contain the requested profile.
7582
"""
7683
if not os.path.exists(CREDENTIAL_FILE):
7784
return None
7885

79-
with open(CREDENTIAL_FILE, "rb") as cred_file:
80-
credentials = toml.load(cred_file)
86+
try:
87+
with open(CREDENTIAL_FILE, "rb") as cred_file:
88+
credentials = toml.load(cred_file)
89+
except (TypeError, ValueError):
90+
return None
8191

8292
if profile not in credentials:
8393
return None

tests/e2e/__init__.py

Whitespace-only changes.

tests/e2e/conftest.py

Lines changed: 73 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,73 @@
1+
"""E2E test fixtures: provision real endpoints, configure SDK, clean up."""
2+
3+
import logging
4+
import os
5+
import subprocess
6+
from pathlib import Path
7+
8+
import pytest
9+
import runpod
10+
11+
from tests.e2e.e2e_provisioner import load_test_cases, provision_endpoints
12+
13+
log = logging.getLogger(__name__)
14+
REQUEST_TIMEOUT = 300 # seconds per job request
15+
16+
# Repo root: tests/e2e/conftest.py -> ../../
17+
_REPO_ROOT = Path(__file__).resolve().parents[2]
18+
19+
20+
@pytest.fixture(scope="session", autouse=True)
21+
def verify_local_runpod():
22+
"""Fail fast if the local runpod-python is not installed."""
23+
log.info("runpod version=%s path=%s", runpod.__version__, runpod.__file__)
24+
runpod_path = Path(runpod.__file__).resolve()
25+
if not runpod_path.is_relative_to(_REPO_ROOT):
26+
pytest.fail(
27+
f"Expected runpod installed from {_REPO_ROOT} but got {runpod_path}. "
28+
"Run: pip install -e . --force-reinstall --no-deps"
29+
)
30+
31+
32+
@pytest.fixture(scope="session")
33+
def require_api_key():
34+
"""Skip entire session if RUNPOD_API_KEY is not set."""
35+
key = os.environ.get("RUNPOD_API_KEY")
36+
if not key:
37+
pytest.skip("RUNPOD_API_KEY not set")
38+
log.info("RUNPOD_API_KEY is set (length=%d)", len(key))
39+
40+
41+
@pytest.fixture(scope="session")
42+
def test_cases():
43+
"""Load test cases from tests.json."""
44+
cases = load_test_cases()
45+
log.info("Loaded %d test cases: %s", len(cases), [c.get("id") for c in cases])
46+
return cases
47+
48+
49+
@pytest.fixture(scope="session")
50+
def endpoints(require_api_key, test_cases):
51+
"""Provision one endpoint per unique hardwareConfig.
52+
53+
Endpoints deploy lazily on first .run()/.runsync() call.
54+
"""
55+
eps = provision_endpoints(test_cases)
56+
for key, ep in eps.items():
57+
log.info("Endpoint ready: name=%s image=%s template.dockerArgs=%s", ep.name, ep.image, ep.template.dockerArgs if ep.template else "N/A")
58+
yield eps
59+
60+
log.info("Cleaning up all provisioned endpoints")
61+
try:
62+
result = subprocess.run(
63+
["flash", "undeploy", "--all", "--force"],
64+
capture_output=True,
65+
text=True,
66+
timeout=120,
67+
)
68+
if result.returncode == 0:
69+
log.info("Undeployed all endpoints")
70+
else:
71+
log.warning("flash undeploy --all --force failed (rc=%d): %s", result.returncode, result.stderr)
72+
except Exception:
73+
log.exception("Failed to undeploy endpoints")

0 commit comments

Comments
 (0)