Skip to content

Commit 4629fcb

Browse files
authored
Merge pull request #58 from Forceflow/develop
Pull 0.2.6 from develop
2 parents b87379b + 489a6b4 commit 4629fcb

8 files changed

+50
-33
lines changed

.gitignore

+2
Original file line numberDiff line numberDiff line change
@@ -156,3 +156,5 @@ $RECYCLE.BIN/
156156

157157
# Mac desktop service store files
158158
.DS_Store
159+
*.db-shm
160+
*.db-wal

README.md

+8-5
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
# Libmorton v0.2.5
1+
# Libmorton v0.2.6
22
[![Build Status](https://travis-ci.org/Forceflow/libmorton.svg?branch=master)](https://travis-ci.org/Forceflow/libmorton) [![license](https://img.shields.io/github/license/mashape/apistatus.svg)](https://opensource.org/licenses/MIT) [![Donate](https://img.shields.io/badge/Donate-PayPal-green.svg)](https://www.paypal.me/forceflow)
33

44
* Libmorton is a **C++ header-only library** with methods to efficiently encode/decode 64, 32 and 16-bit Morton codes and coordinates, in 2D and 3D. *Morton order* is also known as *Z-order* or *[the Z-order curve](https://en.wikipedia.org/wiki/Z-order_curve)*.
@@ -21,13 +21,16 @@ inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uin
2121
inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z);
2222
</pre>
2323

24+
## Installation
25+
No installation is required (just download the headers and include them), but I was informed libmorton is packaged for [Microsoft's VCPKG system](https://github.com/Microsoft/vcpkg) as well, if you want a more controlled environment to install C++ packages in.
26+
2427
## Instruction sets
2528
In the standard case, libmorton only uses operations that are supported on pretty much any CPU you can throw it at. If you know you're compiling for a specific architecture, you might gain a speed boost in encoding/decoding operations by enabling implementations for a specific instruction set. Libmorton ships with support for:
26-
* **BMI2 instruction set**: Intel Haswell CPU's and newer. Define `__BMI2__` before including `morton.h`. This is definitely a faster method when compared to the standard case.
29+
* **BMI2 instruction set**: Intel: Haswell CPU's and newer. AMD: Ryzen CPU's and newer. Define `__BMI2__` before including `morton.h`. This is definitely a faster method when compared to the standard case.
2730
* **AVX512 instruction set (experimental)**: Intel Ice Lake CPU's and newer. Uses `_mm512_bitshuffle_epi64_mask`. Define `__AVX512BITALG__` before including `morton.h`. For more info on performance, see [this PR](https://github.com/Forceflow/libmorton/pull/40).
28-
29-
## Installation
30-
No installation is required (just download the headers and include them), but I was informed libmorton is packaged for [Microsoft's VCPKG system](https://github.com/Microsoft/vcpkg) as well, if you want a more controlled environment to install C++ packages in.
31+
32+
When using MSVC, these options can be found under _Project Properties -> Code Generation -> Enable Enhanced Instruction set_.
33+
When using GCC (version 9.0 or higher), you can use `-march=haswell` (or `-march=znver2`) for BMI2 support and `-march=icelake-client` for AVX512 support.
3134

3235
## Testing
3336
The `test` folder contains tools I use to test correctness and performance of the libmorton implementation. You can regard them as unit tests. This section is under heavy re-writing, but might contain some useful code for advanced usage.

libmorton/include/morton.h

+24-25
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,9 @@
99
#include "morton2D.h"
1010
#include "morton3D.h"
1111

12-
#if defined(__BMI2__) || defined(__AVX2__)
12+
#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER))
1313
#include "morton_BMI.h"
14-
#endif
15-
16-
#if defined(__AVX512BITALG__)
14+
#elif defined(__AVX512BITALG__)
1715
#include "morton_AVX512BITALG.h"
1816
#endif
1917

@@ -22,31 +20,31 @@ namespace libmorton {
2220
//-----------------------------------------------------------------------------------------------
2321

2422
// ENCODING
25-
#if defined(__AVX512BITALG__)
23+
#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER))
2624
inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) {
27-
return m2D_e_BITALG<uint_fast32_t, uint_fast16_t>(x, y);
25+
return m2D_e_BMI<uint_fast32_t, uint_fast16_t>(x, y);
2826
}
2927
inline uint_fast64_t morton2D_64_encode(const uint_fast32_t x, const uint_fast32_t y) {
30-
return m2D_e_BITALG<uint_fast64_t, uint_fast32_t>(x, y);
28+
return m2D_e_BMI<uint_fast64_t, uint_fast32_t>(x, y);
3129
}
3230
inline uint_fast32_t morton3D_32_encode(const uint_fast16_t x, const uint_fast16_t y, const uint_fast16_t z) {
33-
return m3D_e_BITALG<uint_fast32_t, uint_fast16_t>(x, y, z);
31+
return m3D_e_BMI<uint_fast32_t, uint_fast16_t>(x, y, z);
3432
}
3533
inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z) {
36-
return m3D_e_BITALG<uint_fast64_t, uint_fast32_t>(x, y, z);
34+
return m3D_e_BMI<uint_fast64_t, uint_fast32_t>(x, y, z);
3735
}
38-
#elif defined(__BMI2__) || defined(__AVX2__)
36+
#elif defined(__AVX512BITALG__)
3937
inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) {
40-
return m2D_e_BMI<uint_fast32_t, uint_fast16_t>(x, y);
38+
return m2D_e_BITALG<uint_fast32_t, uint_fast16_t>(x, y);
4139
}
4240
inline uint_fast64_t morton2D_64_encode(const uint_fast32_t x, const uint_fast32_t y) {
43-
return m2D_e_BMI<uint_fast64_t, uint_fast32_t>(x, y);
41+
return m2D_e_BITALG<uint_fast64_t, uint_fast32_t>(x, y);
4442
}
4543
inline uint_fast32_t morton3D_32_encode(const uint_fast16_t x, const uint_fast16_t y, const uint_fast16_t z) {
46-
return m3D_e_BMI<uint_fast32_t, uint_fast16_t>(x, y, z);
44+
return m3D_e_BITALG<uint_fast32_t, uint_fast16_t>(x, y, z);
4745
}
4846
inline uint_fast64_t morton3D_64_encode(const uint_fast32_t x, const uint_fast32_t y, const uint_fast32_t z) {
49-
return m3D_e_BMI<uint_fast64_t, uint_fast32_t>(x, y, z);
47+
return m3D_e_BITALG<uint_fast64_t, uint_fast32_t>(x, y, z);
5048
}
5149
#else
5250
inline uint_fast32_t morton2D_32_encode(const uint_fast16_t x, const uint_fast16_t y) {
@@ -64,32 +62,33 @@ namespace libmorton {
6462
#endif
6563

6664
// DECODING
67-
#if defined(__AVX512BITALG__)
65+
66+
#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER))
6867
inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) {
69-
m2D_d_BITALG<uint_fast32_t, uint_fast16_t>(morton, x, y);
68+
m2D_d_BMI<uint_fast32_t, uint_fast16_t>(morton, x, y);
7069
}
7170
inline void morton2D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y) {
72-
m2D_d_BITALG<uint_fast64_t, uint_fast32_t>(morton, x, y);
71+
m2D_d_BMI<uint_fast64_t, uint_fast32_t>(morton, x, y);
7372
}
7473
inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y, uint_fast16_t& z) {
75-
m3D_d_BITALG<uint_fast32_t, uint_fast16_t>(morton, x, y, z);
74+
m3D_d_BMI<uint_fast32_t, uint_fast16_t>(morton, x, y, z);
7675
}
7776
inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z) {
78-
m3D_d_BITALG<uint_fast64_t, uint_fast32_t>(morton, x, y, z);
77+
m3D_d_BMI<uint_fast64_t, uint_fast32_t>(morton, x, y, z);
7978
}
80-
#elif defined(__BMI2__) || defined(__AVX2__)
79+
#elif defined(__AVX512BITALG__)
8180
inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) {
82-
m2D_d_BMI<uint_fast32_t, uint_fast16_t>(morton, x, y);
81+
m2D_d_BITALG<uint_fast32_t, uint_fast16_t>(morton, x, y);
8382
}
8483
inline void morton2D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y) {
85-
m2D_d_BMI<uint_fast64_t, uint_fast32_t>(morton, x, y);
84+
m2D_d_BITALG<uint_fast64_t, uint_fast32_t>(morton, x, y);
8685
}
8786
inline void morton3D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y, uint_fast16_t& z) {
88-
m3D_d_BMI<uint_fast32_t, uint_fast16_t>(morton, x, y, z);
87+
m3D_d_BITALG<uint_fast32_t, uint_fast16_t>(morton, x, y, z);
8988
}
9089
inline void morton3D_64_decode(const uint_fast64_t morton, uint_fast32_t& x, uint_fast32_t& y, uint_fast32_t& z) {
91-
m3D_d_BMI<uint_fast64_t, uint_fast32_t>(morton, x, y, z);
92-
}
90+
m3D_d_BITALG<uint_fast64_t, uint_fast32_t>(morton, x, y, z);
91+
}
9392
#else
9493
inline void morton2D_32_decode(const uint_fast32_t morton, uint_fast16_t& x, uint_fast16_t& y) {
9594
m2D_d_sLUT<uint_fast32_t, uint_fast16_t>(morton, x, y);

libmorton/include/morton_AVX512BITALG.h

+3-1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#pragma once
2+
#if defined(__AVX512BITALG__)
23
#include <immintrin.h>
34
#include <stdint.h>
45

@@ -218,4 +219,5 @@ namespace libmorton {
218219
inline void m3D_d_BITALG(const morton m, coord& x, coord& y, coord& z) {
219220
bitalg_detail::bitunzip3D(m, x, y, z);
220221
}
221-
}
222+
}
223+
#endif

libmorton/include/morton_BMI.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
#pragma once
2-
#if defined(__BMI2__) || defined(__AVX2__)
2+
#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER))
33
#include <immintrin.h>
44
#include <stdint.h>
55

test/libmorton_test.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,7 @@ void registerFunctions() {
145145
f3D_32_decode.push_back(decode_3D_32_wrapper("LUT Shifted ET", &m3D_d_sLUT_ET<uint_fast32_t, uint_fast16_t>));
146146

147147
// Register 3D BMI intrinsics if available
148-
#if defined(__BMI2__)
148+
#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER))
149149
f3D_64_encode.push_back(encode_3D_64_wrapper("BMI2 instruction set", &m3D_e_BMI<uint_fast64_t, uint_fast32_t>));
150150
f3D_32_encode.push_back(encode_3D_32_wrapper("BMI2 instruction set", &m3D_e_BMI<uint_fast32_t, uint_fast16_t>));
151151
f3D_64_decode.push_back(decode_3D_64_wrapper("BMI2 Instruction set", &m3D_d_BMI<uint_fast64_t, uint_fast32_t>));

test/libmorton_test.h

+8
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,14 @@
2222
#include "morton_LUT_generators.h"
2323
#include "../libmorton/include/morton2D.h"
2424
#include "../libmorton/include/morton3D.h"
25+
#if defined(__BMI2__) || (defined(__AVX2__) && defined(_MSC_VER))
26+
#include "morton_BMI.h"
27+
#endif
28+
#if defined(__AVX512BITALG__)
29+
#include "morton_AVX512BITALG.h"
30+
#endif
31+
32+
// Load main morton include file (should be unnecessary)
2533
#include "../libmorton/include/morton.h"
2634

2735
using std::string;

test/makefile

+3
Original file line numberDiff line numberDiff line change
@@ -14,3 +14,6 @@ avx512:
1414

1515
clean:
1616
rm -f libmorton_test libmorton_test_bmi2 libmorton_test_avx512
17+
18+
zen2:
19+
$(CXX) $(CFLAGS) -march=znver2 libmorton_test.cpp -o libmorton_test_zen2

0 commit comments

Comments
 (0)