cattidea
diff --git a/‎.github/workflows/_gpu_4cards_case_test.yml‎
Lines changed: 1 addition & 20 deletions b/‎.github/workflows/_gpu_4cards_case_test.yml‎
Lines changed: 1 addition & 20 deletions
diff --git a/‎.github/workflows/ce_job.yml‎
Lines changed: 1 addition & 1 deletion b/‎.github/workflows/ce_job.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎custom_ops/gpu_ops/swap_cache_layout.cu‎
Lines changed: 10 additions & 1 deletion b/‎custom_ops/gpu_ops/swap_cache_layout.cu‎
Lines changed: 10 additions & 1 deletion
diff --git a/‎dockerfiles/Dockerfile.gpu‎
Lines changed: 2 additions & 2 deletions b/‎dockerfiles/Dockerfile.gpu‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎docs/get_started/installation/nvidia_gpu.md‎
Lines changed: 5 additions & 5 deletions b/‎docs/get_started/installation/nvidia_gpu.md‎
Lines changed: 5 additions & 5 deletions
@@ -190,24 +190,5 @@ jobs:
           export PYTHONPATH=/workspace/FastDeploy/
 
           export CUDA_VISIBLE_DEVICES=0,1,2,3
-          echo "============================================================"
-          echo "Running pytest for 4-GPU end-to-end cases"
-
-          python -m pytest -sv --tb=short tests/e2e/4cards_cases/
-          exit_code=$?
-
-          if [ $exit_code -ne 0 ]; then
-              if [ -f "./log/log_0/workerlog.0" ]; then
-                  echo "---------------- log/workerlog.0 -------------------"
-                  cat "./log/log_0/workerlog.0"
-                  echo "----------------------------------------------------"
-              fi
-
-              if [ -f "./server.log" ]; then
-                  echo "---------------- server.log ----------------"
-                  cat "./server.log"
-                  echo "--------------------------------------------"
-              fi
-              exit 1
-          fi
+          bash scripts/run_gpu_4cards.sh
           '
@@ -162,7 +162,7 @@ jobs:
     if: ${{ needs.ce_job_pre_check.outputs.sm8090_match == 'true' }}
     uses: ./.github/workflows/_build_linux_rl.yml
     with:
-      DOCKER_IMAGE: iregistry.baidu-int.com/tiangexiao/base-images:paddlecloud-ubuntu24.04-gcc13.3-cuda12.9-cudnn9.9-bccl1.4.1.4-nccl2.26.5-openmpi4.1.5-FleetY13.0.0-rc2
+      DOCKER_IMAGE: iregistry.baidu-int.com/new_rl_infra/base-images:paddlecloud-ubuntu24.04-gcc13.3-cuda12.9-cudnn9.9-bccl1.4.1.4-nccl2.26.5-openmpi4.1.5-FleetY13.0.0-v2.4.0-rc1
       FASTDEPLOY_ARCHIVE_URL: ${{ needs.clone.outputs.repo_archive_url }}
       COMPILE_ARCH: "80,90"
       WITH_NIGHTLY_BUILD: OFF
 
@@ -73,6 +73,11 @@ void SwapCacheImpLayout(
           copy_kind,
           stream);
 
+      PADDLE_ENFORCE_EQ(status,
+                        cudaSuccess,
+                        phi::errors::External("cudaMemcpyAsync failed: %s",
+                                              cudaGetErrorString(status)));
+
 #ifdef SWAP_DEBUG
       cudaStreamSynchronize(stream);
       std::cout << "mode:" << mode << ", layer_idx:" << layer_idx
@@ -81,7 +86,11 @@ void SwapCacheImpLayout(
 #endif
     }
   }
-  cudaStreamSynchronize(stream);
+  cudaError_t sync_status = cudaStreamSynchronize(stream);
+  PADDLE_ENFORCE_EQ(sync_status,
+                    cudaSuccess,
+                    phi::errors::External("cudaStreamSynchronize failed: %s",
+                                          cudaGetErrorString(sync_status)));
 }
 
 void SwapCacheLayout(
 
@@ -1,6 +1,6 @@
 FROM ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-cuda-12.6:tag-base
-ARG PADDLE_VERSION=3.2.1
-ARG FD_VERSION=2.3.0
+ARG PADDLE_VERSION=3.3.0
+ARG FD_VERSION=2.4.0
 
 ENV DEBIAN_FRONTEND=noninteractive
 
 
@@ -23,7 +23,7 @@ docker pull ccr-2vdh3abv-pub.cnc.bj.baidubce.com/paddlepaddle/fastdeploy-cuda-12
 First install paddlepaddle-gpu. For detailed instructions, refer to [PaddlePaddle Installation](https://www.paddlepaddle.org.cn/en/install/quick?docurl=/documentation/docs/en/develop/install/pip/linux-pip_en.html)
 ```shell
 # Install stable release
-python -m pip install paddlepaddle-gpu==3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
+python -m pip install paddlepaddle-gpu==3.3.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
 
 # Install latest Nightly build
 python -m pip install --pre paddlepaddle-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/cu126/
@@ -34,7 +34,7 @@ Then install fastdeploy. **Do not install from PyPI**. Use the following methods
 For SM80/90 architecture GPUs(e.g A30/A100/H100/):
 ```
 # Install stable release
-python -m pip install fastdeploy-gpu==2.3.3 -i https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-gpu-80_90/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+python -m pip install fastdeploy-gpu==2.4.0 -i https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-gpu-80_90/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 
 # Install latest Nightly build
 python -m pip install fastdeploy-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/fastdeploy-gpu-80_90/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
@@ -43,7 +43,7 @@ python -m pip install fastdeploy-gpu -i https://www.paddlepaddle.org.cn/packages
 For SM86/89 architecture GPUs(e.g A10/4090/L20/L40):
 ```
 # Install stable release
-python -m pip install fastdeploy-gpu==2.3.3 -i https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-gpu-86_89/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
+python -m pip install fastdeploy-gpu==2.4.0 -i https://www.paddlepaddle.org.cn/packages/stable/fastdeploy-gpu-86_89/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
 
 # Install latest Nightly build
 python -m pip install fastdeploy-gpu -i https://www.paddlepaddle.org.cn/packages/nightly/fastdeploy-gpu-86_89/ --extra-index-url https://mirrors.tuna.tsinghua.edu.cn/pypi/web/simple
@@ -64,7 +64,7 @@ docker build -f dockerfiles/Dockerfile.gpu -t fastdeploy:gpu .
 
 First install paddlepaddle-gpu. For detailed instructions, refer to [PaddlePaddle Installation](https://www.paddlepaddle.org.cn/en/install/quick?docurl=/documentation/docs/en/develop/install/pip/linux-pip_en.html)
 ```shell
-python -m pip install paddlepaddle-gpu==3.2.1 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
+python -m pip install paddlepaddle-gpu==3.3.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
 ```
 
 Then clone the source code and build:
@@ -92,7 +92,7 @@ First, install paddlepaddle-gpu.
 For detailed instructions, please refer to the [PaddlePaddle Installation Guide](https://www.paddlepaddle.org.cn/).
 
 ```shell
-python -m pip install paddlepaddle-gpu==3.2.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
+python -m pip install paddlepaddle-gpu==3.3.0 -i https://www.paddlepaddle.org.cn/packages/stable/cu126/
 ```
 
 Then, clone the FastDeploy repository and build using the precompiled operator wheels: