Skip to content

Commit a8d3e3b

Browse files
[BugFix] fix shm opened but not closed in set_data_ipc (PaddlePaddle#5826)
1 parent deb9698 commit a8d3e3b

2 files changed

Lines changed: 49 additions & 61 deletions

File tree

custom_ops/gpu_ops/set_data_ipc.cu

Lines changed: 48 additions & 61 deletions
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,14 @@
1212
// See the License for the specific language governing permissions and
1313
// limitations under the License.
1414

15-
#include "helper.h"
1615
#include "cuda_multiprocess.h"
16+
#include "helper.h"
1717

18-
int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info) {
18+
int sharedMemoryCreate(const char* name, size_t sz, sharedMemoryInfo* info) {
1919
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
2020
info->size = sz;
21-
info->shmHandle = CreateFileMapping(INVALID_HANDLE_VALUE, NULL,
22-
PAGE_READWRITE, 0, (DWORD)sz, name);
21+
info->shmHandle = CreateFileMapping(
22+
INVALID_HANDLE_VALUE, NULL, PAGE_READWRITE, 0, (DWORD)sz, name);
2323
if (info->shmHandle == 0) {
2424
return GetLastError();
2525
}
@@ -42,20 +42,22 @@ int sharedMemoryCreate(const char *name, size_t sz, sharedMemoryInfo *info) {
4242

4343
status = ftruncate(info->shmFd, sz);
4444
if (status != 0) {
45-
return status;
45+
return errno;
4646
}
4747

4848
info->addr = mmap(0, sz, PROT_READ | PROT_WRITE, MAP_SHARED, info->shmFd, 0);
49-
if (info->addr == NULL) {
49+
if (info->addr == MAP_FAILED) {
5050
return errno;
5151
}
52+
close(info->shmFd);
53+
info->shmFd = -1;
5254

5355
return 0;
5456
#endif
5557
}
5658

5759
template <typename T>
58-
__global__ void set_data(T *input, int n) {
60+
__global__ void set_data(T* input, int n) {
5961
if (threadIdx.x == 0) {
6062
for (int i = 0; i < n; ++i) {
6163
*(input + i) = static_cast<T>(i);
@@ -65,7 +67,7 @@ __global__ void set_data(T *input, int n) {
6567
}
6668

6769
template <typename T>
68-
__global__ void print_data(const T *input, int n) {
70+
__global__ void print_data(const T* input, int n) {
6971
if (threadIdx.x == 0) {
7072
for (int i = 0; i < n; ++i) {
7173
printf("input[%d]: %f\n", i, input[i]);
@@ -81,72 +83,57 @@ void set_data_ipc(const paddle::Tensor& tmp_input,
8183
typedef typename traits_::data_t data_t;
8284

8385
sharedMemoryInfo info;
84-
volatile shmStruct *shm = NULL;
86+
volatile shmStruct* shm = NULL;
8587
if (sharedMemoryCreate(shm_name.c_str(), sizeof(*shm), &info) != 0) {
86-
printf("Failed to create shared memory slab\n");
87-
printf("Func sharedMemoryCreate. Shm_name: %s\n", shm_name.c_str());
88-
exit(EXIT_FAILURE);
88+
printf("Failed to create shared memory slab\n");
89+
printf("Func sharedMemoryCreate. Shm_name: %s\n", shm_name.c_str());
90+
exit(EXIT_FAILURE);
8991
}
90-
shm = (volatile shmStruct *)info.addr;
91-
memset((void *)shm, 0, sizeof(*shm));
92+
shm = (volatile shmStruct*)info.addr;
93+
memset((void*)shm, 0, sizeof(*shm));
9294

93-
void *data_ptr_now = reinterpret_cast<void*>(const_cast<data_t*>(tmp_input.data<data_t>()));
95+
void* data_ptr_now =
96+
reinterpret_cast<void*>(const_cast<data_t*>(tmp_input.data<data_t>()));
9497
#ifdef PADDLE_WITH_HIP
95-
checkCudaErrors(hipIpcGetMemHandle((hipIpcMemHandle_t *)&shm->memHandle, data_ptr_now));
98+
checkCudaErrors(
99+
hipIpcGetMemHandle((hipIpcMemHandle_t*)&shm->memHandle, data_ptr_now));
96100
#else
97-
checkCudaErrors(cudaIpcGetMemHandle((cudaIpcMemHandle_t *)&shm->memHandle, data_ptr_now));
101+
checkCudaErrors(
102+
cudaIpcGetMemHandle((cudaIpcMemHandle_t*)&shm->memHandle, data_ptr_now));
98103
#endif
99-
100-
101104
}
102105

103-
void SetDataIpc(const paddle::Tensor& tmp_input,
104-
const std::string& shm_name) {
105-
std::vector<int64_t> shape = tmp_input.shape();
106-
107-
switch (tmp_input.type()) {
108-
case paddle::DataType::BFLOAT16: {
109-
return set_data_ipc<paddle::DataType::BFLOAT16>(
110-
tmp_input,
111-
shm_name
112-
);
113-
}
114-
case paddle::DataType::FLOAT16: {
115-
return set_data_ipc<paddle::DataType::FLOAT16>(
116-
tmp_input,
117-
shm_name
118-
);
119-
}
120-
case paddle::DataType::FLOAT32: {
121-
return set_data_ipc<paddle::DataType::FLOAT32>(
122-
tmp_input,
123-
shm_name
124-
);
125-
}
126-
case paddle::DataType::INT8: {
127-
return set_data_ipc<paddle::DataType::INT8>(
128-
tmp_input,
129-
shm_name
130-
);
131-
}
132-
case paddle::DataType::UINT8: {
133-
return set_data_ipc<paddle::DataType::UINT8>(
134-
tmp_input,
135-
shm_name
136-
);
137-
}
138-
default: {
139-
PD_THROW(
140-
"NOT supported data type. "
141-
"Only float16, bfloat16 and float32 are supported. ");
142-
break;
143-
}
106+
void SetDataIpc(const paddle::Tensor& tmp_input, const std::string& shm_name) {
107+
std::vector<int64_t> shape = tmp_input.shape();
108+
109+
switch (tmp_input.type()) {
110+
case paddle::DataType::BFLOAT16: {
111+
return set_data_ipc<paddle::DataType::BFLOAT16>(tmp_input, shm_name);
144112
}
113+
case paddle::DataType::FLOAT16: {
114+
return set_data_ipc<paddle::DataType::FLOAT16>(tmp_input, shm_name);
115+
}
116+
case paddle::DataType::FLOAT32: {
117+
return set_data_ipc<paddle::DataType::FLOAT32>(tmp_input, shm_name);
118+
}
119+
case paddle::DataType::INT8: {
120+
return set_data_ipc<paddle::DataType::INT8>(tmp_input, shm_name);
121+
}
122+
case paddle::DataType::UINT8: {
123+
return set_data_ipc<paddle::DataType::UINT8>(tmp_input, shm_name);
124+
}
125+
default: {
126+
PD_THROW(
127+
"NOT supported data type. "
128+
"Only float16, bfloat16 and float32 are supported. ");
129+
break;
130+
}
131+
}
145132
}
146133

147134
PD_BUILD_STATIC_OP(set_data_ipc)
148135
.Inputs({"tmp_input"})
149-
.Attrs({ "shm_name: std::string"})
136+
.Attrs({"shm_name: std::string"})
150137
.Outputs({"tmp_input_out"})
151138
.SetInplaceMap({{"tmp_input", "tmp_input_out"}})
152139
.SetKernelFn(PD_KERNEL(SetDataIpc));

tests/ce/stable_cases/launch_model.sh

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ python -m fastdeploy.entrypoints.openai.api_server \
3737
--max-num-seqs 1 \
3838
--gpu-memory-utilization 0.9 \
3939
--model "$MODEL_PATH" \
40+
--no-shutdown-comm-group-if-worker-idle \
4041
--load-strategy ipc_snapshot \
4142
--dynamic-load-weight &
4243

0 commit comments

Comments
 (0)