Skip to content

Commit 60caaa6

Browse files
authored
Merge pull request #143 from nyurik/portable
Add support for configurable SIMD modes in CMake build system
2 parents aa1a6c3 + eaa8da3 commit 60caaa6

File tree

7 files changed

+125
-32
lines changed

7 files changed

+125
-32
lines changed

.github/workflows/macos-ci.yml

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,23 @@ on: [push, pull_request]
44

55
jobs:
66
macos-build:
7+
name: macos (${{ matrix.simd_mode }})
78
runs-on: macos-latest
9+
strategy:
10+
fail-fast: false
11+
matrix:
12+
simd_mode: [portable, native]
13+
814
steps:
9-
- uses: actions/checkout@v2
10-
- name: Use cmake (default)
15+
- uses: actions/checkout@v6
16+
- name: Build and test (Release, ${{ matrix.simd_mode }})
1117
run: |
12-
cmake -B build -D CMAKE_BUILD_TYPE=Release
18+
cmake -B build -D CMAKE_BUILD_TYPE=Release -D FASTPFOR_SIMD_MODE=${{ matrix.simd_mode }}
1319
cmake --build build
1420
ctest --test-dir build --output-on-failure
15-
- name: Use cmake (debug)
21+
- name: Build and test (Debug, ${{ matrix.simd_mode }})
1622
run: |
17-
cmake -B build -D CMAKE_BUILD_TYPE=Debug
23+
rm -rf build
24+
cmake -B build -D CMAKE_BUILD_TYPE=Debug -D FASTPFOR_SIMD_MODE=${{ matrix.simd_mode }}
1825
cmake --build build
19-
ctest --test-dir build --output-on-failure
26+
ctest --test-dir build --output-on-failure

.github/workflows/ubuntu-ci.yml

Lines changed: 27 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -2,31 +2,46 @@ name: Ubuntu-CI
22

33
on: [push, pull_request]
44

5-
6-
75
jobs:
86
ci:
9-
name: ubuntu-gcc
7+
name: ubuntu-gcc (${{ matrix.simd_mode }})
108
runs-on: ubuntu-latest
9+
strategy:
10+
fail-fast: false
11+
matrix:
12+
simd_mode: [portable, native]
1113

1214
env:
1315
CC: gcc
1416
CXX: g++
1517

16-
steps:
17-
- uses: actions/checkout@v2
18-
- name: Use cmake (default)
18+
steps:
19+
- uses: actions/checkout@v6
20+
- name: Build and test (Release, ${{ matrix.simd_mode }})
1921
run: |
20-
cmake -B build -D CMAKE_BUILD_TYPE=Release
22+
cmake -B build -D CMAKE_BUILD_TYPE=Release -D FASTPFOR_SIMD_MODE=${{ matrix.simd_mode }}
2123
cmake --build build
2224
ctest --test-dir build --output-on-failure
23-
- name: Use cmake (debug)
25+
- name: Build and test (Debug, ${{ matrix.simd_mode }})
2426
run: |
25-
cmake -B build -D CMAKE_BUILD_TYPE=Debug
27+
rm -rf build
28+
cmake -B build -D CMAKE_BUILD_TYPE=Debug -D FASTPFOR_SIMD_MODE=${{ matrix.simd_mode }}
2629
cmake --build build
2730
ctest --test-dir build --output-on-failure
28-
- name: Use cmake (sanitizers)
31+
32+
33+
ci-sanitizers:
34+
name: ubuntu-gcc (sanitizers)
35+
runs-on: ubuntu-latest
36+
37+
env:
38+
CC: gcc
39+
CXX: g++
40+
41+
steps:
42+
- uses: actions/checkout@v6
43+
- name: Build and test with sanitizers
2944
run: |
30-
cmake -B build -D FASTPFOR_SANITIZE=ON
45+
cmake -B build -D FASTPFOR_SANITIZE=ON -D FASTPFOR_SIMD_MODE=portable
3146
cmake --build build
32-
ctest --test-dir build --output-on-failure
47+
ctest --test-dir build --output-on-failure

.github/workflows/vs-ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ jobs:
88
runs-on: windows-latest
99
steps:
1010
- name: checkout
11-
uses: actions/checkout@v2
11+
uses: actions/checkout@v6
1212
- name: Configure
1313
run: |
1414
cmake -B build
@@ -23,4 +23,4 @@ jobs:
2323
- name: Run Debug tests
2424
run: |
2525
cd build
26-
ctest -C Debug -LE explicitonly --output-on-failure
26+
ctest -C Debug -LE explicitonly --output-on-failure

.github/workflows/vs17-arm-ci.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ jobs:
1313
- {gen: Visual Studio 17 2022, arch: ARM64}
1414
steps:
1515
- name: checkout
16-
uses: actions/checkout@v2
16+
uses: actions/checkout@v6
1717
- name: Configure
1818
run: |
1919
cmake -B build
@@ -28,4 +28,4 @@ jobs:
2828
- name: Run Debug tests
2929
run: |
3030
cd build
31-
ctest -C Debug -LE explicitonly --output-on-failure
31+
ctest -C Debug -LE explicitonly --output-on-failure

CMakeLists.txt

Lines changed: 28 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -87,13 +87,35 @@ if(${CMAKE_CXX_COMPILER_ID} STREQUAL "GNU" OR ${CMAKE_CXX_COMPILER_ID} STREQUAL
8787
target_compile_options(FastPFOR PRIVATE -Wall -Wextra -Weffc++ -Wsign-compare -Wshadow -Wwrite-strings -Wpointer-arith -Winit-self -Wconversion -Wno-sign-conversion)
8888
endif()
8989
include(CheckCXXCompilerFlag)
90-
unset(FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE CACHE)
91-
CHECK_CXX_COMPILER_FLAG(-march=native FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE)
92-
if(FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE)
93-
target_compile_options(FastPFOR PRIVATE -march=native)
90+
91+
# SIMD mode: native, portable, or runtime
92+
# - native: Use -march=native for maximum performance on the build machine (not portable)
93+
# - portable: Use baseline SSE4.2 only for maximum compatibility (default)
94+
# - runtime: Use function multi-versioning for runtime CPU dispatch (future)
95+
set(FASTPFOR_SIMD_MODE "native" CACHE STRING "SIMD compilation mode: native, portable, or runtime")
96+
set_property(CACHE FASTPFOR_SIMD_MODE PROPERTY STRINGS native portable runtime)
97+
98+
if(FASTPFOR_SIMD_MODE STREQUAL "native")
99+
unset(FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE CACHE)
100+
CHECK_CXX_COMPILER_FLAG(-march=native FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE)
101+
if(FASTPFOR_COMPILER_SUPPORTS_MARCH_NATIVE)
102+
target_compile_options(FastPFOR PRIVATE -march=native)
103+
else()
104+
message(STATUS "native target not supported, falling back to portable mode")
105+
target_compile_options(FastPFOR PRIVATE -msse4.2)
106+
endif()
107+
elseif(FASTPFOR_SIMD_MODE STREQUAL "portable")
108+
# Baseline: SSE4.2 is required by FastPFOR SIMD code
109+
target_compile_options(FastPFOR PRIVATE -msse4.2)
110+
elseif(FASTPFOR_SIMD_MODE STREQUAL "runtime")
111+
# Runtime dispatch: compile with baseline SSE4.2 and enable multi-versioning
112+
target_compile_options(FastPFOR PRIVATE -msse4.2)
113+
target_compile_definitions(FastPFOR PRIVATE FASTPFOR_RUNTIME_DISPATCH)
114+
message(STATUS "Runtime dispatch mode is experimental")
94115
else()
95-
message(STATUS "native target not supported")
116+
message(FATAL_ERROR "Invalid FASTPFOR_SIMD_MODE: ${FASTPFOR_SIMD_MODE}. Use native, portable, or runtime.")
96117
endif()
118+
message(STATUS "FASTPFOR_SIMD_MODE: ${FASTPFOR_SIMD_MODE}")
97119

98120
MESSAGE( STATUS "CMAKE_CXX_FLAGS_DEBUG: " ${CMAKE_CXX_FLAGS_DEBUG} )
99121
MESSAGE( STATUS "CMAKE_CXX_FLAGS_RELEASE: " ${CMAKE_CXX_FLAGS_RELEASE} )
@@ -217,4 +239,4 @@ if (SUPPORT_NEON)
217239
message(WARNING "Building with emulation with SIMDE for ARM NEON support.")
218240
message(WARNING "We do not actually support ARM NEON natively.")
219241
message(WARNING "If you actually want native ARM NEON support, please consider providing a patch.")
220-
endif()
242+
endif()

README.md

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,53 @@ It may be necessary to set the CXX variable. The project is installable (`make i
152152

153153
To create project files for Microsoft Visual Studio, it might be useful to target 64-bit Windows (e.g., see http://www.cmake.org/cmake/help/v3.0/generator/Visual%20Studio%2012%202013.html).
154154

155+
### CMake Options
156+
157+
#### FASTPFOR_SIMD_MODE
158+
159+
Controls how SIMD instructions are compiled. This affects portability and performance:
160+
161+
| Mode | Flag | Description |
162+
|------|------|-------------|
163+
| `portable` | `-msse4.2` | **Default.** Compiles with SSE4.2 baseline only. Binaries will run on any x86-64 CPU from ~2008 onwards. Best for distributable libraries and CI builds. |
164+
| `native` | `-march=native` | Compiles with all SIMD instructions supported by the build machine (may include AVX, AVX2, AVX-512, etc.). Maximum performance but binaries may crash with `SIGILL` on CPUs that lack the required instructions. |
165+
| `runtime` | `-msse4.2` + `FASTPFOR_RUNTIME_DISPATCH` | Experimental. Intended for future runtime CPU dispatch using function multi-versioning. |
166+
167+
**Usage:**
168+
169+
```bash
170+
# Portable build (default) - safe for distribution
171+
cmake -B build -DFASTPFOR_SIMD_MODE=portable
172+
173+
# Native build - maximum performance on build machine
174+
cmake -B build -DFASTPFOR_SIMD_MODE=native
175+
176+
# Check which mode is active in CMake output
177+
cmake -B build
178+
# Look for: "FASTPFOR_SIMD_MODE: portable" in the output
179+
```
180+
181+
**When to use each mode:**
182+
183+
- Use `portable` (default) when building binaries that will run on different machines, in CI/CD pipelines, or when distributing pre-built libraries.
184+
- Use `native` when building for a specific machine where maximum performance is needed and you know the binary won't be moved to a different CPU.
185+
186+
#### FASTPFOR_SANITIZE
187+
188+
Enable address sanitizer for debugging memory issues:
189+
190+
```bash
191+
cmake -B build -DFASTPFOR_SANITIZE=ON
192+
```
193+
194+
#### FASTPFOR_WITH_TEST
195+
196+
Build with Google Test (enabled by default):
197+
198+
```bash
199+
cmake -B build -DFASTPFOR_WITH_TEST=OFF # Disable tests
200+
```
201+
155202
### Multithreaded context
156203

157204
You should not assume that our objects are thread safe.

cmake_modules/DetectCPUFeatures.cmake

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ return _mm256_movemask_epi8(x);
3030
}")
3131

3232
if(MSVC)
33-
message(STATUS "TEST 2")
33+
message(STATUS "Detecting CPU features (MSVC)")
3434
set(CMAKE_REQUIRED_FLAGS "/EHsc /arch:SSE2")
3535
check_cxx_source_compiles("${SSE4PROG}" SUPPORT_SSE42)
3636
message(STATUS "SUPPORT_SSE42 ${SUPPORT_SSE42}")
@@ -41,11 +41,13 @@ if(MSVC)
4141
check_cxx_source_compiles("${AVX2PROG}" SUPPORT_AVX2)
4242
message(STATUS "SUPPORT_AVX2 ${SUPPORT_AVX2}")
4343
else()
44-
set(CMAKE_REQUIRED_FLAGS "-march=native -msse4.2")
44+
# Test compiler support for SIMD instruction sets
45+
# Note: This tests compiler capability, not runtime CPU support
46+
set(CMAKE_REQUIRED_FLAGS "-msse4.2")
4547
check_cxx_source_compiles("${SSE4PROG}" SUPPORT_SSE42)
46-
set(CMAKE_REQUIRED_FLAGS "-march=native -mavx")
48+
set(CMAKE_REQUIRED_FLAGS "-mavx")
4749
check_cxx_source_compiles("${AVXPROG}" SUPPORT_AVX)
48-
set(CMAKE_REQUIRED_FLAGS "-march=native -mavx2")
50+
set(CMAKE_REQUIRED_FLAGS "-mavx2")
4951
check_cxx_source_compiles("${AVX2PROG}" SUPPORT_AVX2)
5052
endif()
5153

0 commit comments

Comments
 (0)