32#if defined( __SSE2__ )
34#elif defined( __ARM_NEON ) || defined( __ARM_NEON__ )
53inline uint64_t
HilbertXY2D(
int aOrder, uint32_t aX, uint32_t aY )
59 for( uint32_t s = ( 1U << ( aOrder - 1 ) ); s > 0; s >>= 1 )
61 uint32_t rx = ( x & s ) ? 1 : 0;
62 uint32_t ry = ( y & s ) ? 1 : 0;
64 d +=
static_cast<uint64_t
>( s ) * s * ( ( 3 * rx ) ^ ry );
97inline uint64_t
HilbertND2D(
int aOrder,
const uint32_t aCoords[NUMDIMS] )
99 if constexpr( NUMDIMS == 2 )
101 return HilbertXY2D( aOrder, aCoords[0], aCoords[1] );
110 for(
int bit = aOrder - 1; bit >= 0; --bit )
112 for(
int dim = NUMDIMS - 1; dim >= 0; --dim )
115 d |= ( aCoords[dim] >> bit ) & 1;
143template <
class ELEMTYPE,
int FANOUT>
144inline uint32_t
OverlapMask2D(
const ELEMTYPE* aBoundsMin0,
const ELEMTYPE* aBoundsMax0,
145 const ELEMTYPE* aBoundsMin1,
const ELEMTYPE* aBoundsMax1,
147 const ELEMTYPE aMin[2],
const ELEMTYPE aMax[2] )
149 static_assert( FANOUT <= 31,
"OverlapMask2D requires FANOUT <= 31" );
150 static_assert( FANOUT % 4 == 0,
"OverlapMask2D requires FANOUT divisible by 4 for SIMD" );
152#if defined( __SSE2__ )
153 if constexpr(
sizeof( ELEMTYPE ) == 4 )
156 __m128i qmin0 = _mm_set1_epi32(
static_cast<int>( aMin[0] ) );
157 __m128i qmax0 = _mm_set1_epi32(
static_cast<int>( aMax[0] ) );
158 __m128i qmin1 = _mm_set1_epi32(
static_cast<int>( aMin[1] ) );
159 __m128i qmax1 = _mm_set1_epi32(
static_cast<int>( aMax[1] ) );
167 for(
int j = 0; j < FANOUT; j += 4 )
169 __m128i bmin0 = _mm_loadu_si128(
170 reinterpret_cast<const __m128i*
>( &aBoundsMin0[j] ) );
171 __m128i bmax0 = _mm_loadu_si128(
172 reinterpret_cast<const __m128i*
>( &aBoundsMax0[j] ) );
173 __m128i bmin1 = _mm_loadu_si128(
174 reinterpret_cast<const __m128i*
>( &aBoundsMin1[j] ) );
175 __m128i bmax1 = _mm_loadu_si128(
176 reinterpret_cast<const __m128i*
>( &aBoundsMax1[j] ) );
179 __m128i fail = _mm_or_si128(
180 _mm_or_si128( _mm_cmpgt_epi32( bmin0, qmax0 ),
181 _mm_cmpgt_epi32( qmin0, bmax0 ) ),
182 _mm_or_si128( _mm_cmpgt_epi32( bmin1, qmax1 ),
183 _mm_cmpgt_epi32( qmin1, bmax1 ) ) );
186 int failBits = _mm_movemask_ps( _mm_castsi128_ps( fail ) );
187 mask |=
static_cast<uint32_t
>( ~failBits & 0xF ) << j;
190 return mask & ( ( 1U << aCount ) - 1 );
193#elif defined( __ARM_NEON ) || defined( __ARM_NEON__ )
194 if constexpr(
sizeof( ELEMTYPE ) == 4 )
197 int32x4_t qmin0 = vdupq_n_s32(
static_cast<int32_t
>( aMin[0] ) );
198 int32x4_t qmax0 = vdupq_n_s32(
static_cast<int32_t
>( aMax[0] ) );
199 int32x4_t qmin1 = vdupq_n_s32(
static_cast<int32_t
>( aMin[1] ) );
200 int32x4_t qmax1 = vdupq_n_s32(
static_cast<int32_t
>( aMax[1] ) );
204 static const int32_t kBitShifts[4] = { 0, 1, 2, 3 };
205 int32x4_t shifts = vld1q_s32( kBitShifts );
213 for(
int j = 0; j < FANOUT; j += 4 )
215 int32x4_t bmin0 = vld1q_s32(
216 reinterpret_cast<const int32_t*
>( &aBoundsMin0[j] ) );
217 int32x4_t bmax0 = vld1q_s32(
218 reinterpret_cast<const int32_t*
>( &aBoundsMax0[j] ) );
219 int32x4_t bmin1 = vld1q_s32(
220 reinterpret_cast<const int32_t*
>( &aBoundsMin1[j] ) );
221 int32x4_t bmax1 = vld1q_s32(
222 reinterpret_cast<const int32_t*
>( &aBoundsMax1[j] ) );
225 uint32x4_t fail = vorrq_u32(
226 vorrq_u32( vcgtq_s32( bmin0, qmax0 ),
227 vcgtq_s32( qmin0, bmax0 ) ),
228 vorrq_u32( vcgtq_s32( bmin1, qmax1 ),
229 vcgtq_s32( qmin1, bmax1 ) ) );
232 uint32x4_t bits = vshrq_n_u32( fail, 31 );
233 uint32x4_t positioned = vshlq_u32( bits, shifts );
236 uint32x2_t half = vorr_u32( vget_low_u32( positioned ),
237 vget_high_u32( positioned ) );
238 uint32_t failMask = vget_lane_u32( half, 0 ) | vget_lane_u32( half, 1 );
240 mask |=
static_cast<uint32_t
>( ~failMask & 0xF ) << j;
243 return mask & ( ( 1U << aCount ) - 1 );
259 for(
int i = 0; i < FANOUT; ++i )
261 result[i] = ( aBoundsMin0[i] <= aMax[0] )
262 & ( aBoundsMax0[i] >= aMin[0] )
263 & ( aBoundsMin1[i] <= aMax[1] )
264 & ( aBoundsMax1[i] >= aMin[1] );
269 for(
int i = 0; i < FANOUT; ++i )
270 mask |= (
static_cast<uint32_t
>(
result[i] ) << i );
272 return mask & ( ( 1U << aCount ) - 1 );
293template <
class DATATYPE,
class ELEMTYPE,
int NUMDIMS,
int MAXNODES>
334 for(
int d = 0; d < NUMDIMS; ++d )
336 aMin[d] = std::numeric_limits<ELEMTYPE>::max();
337 aMax[d] = std::numeric_limits<ELEMTYPE>::lowest();
340 for(
int i = 0; i <
count; ++i )
342 for(
int d = 0; d < NUMDIMS; ++d )
344 if(
bounds[d * 2][i] < aMin[d] )
345 aMin[d] =
bounds[d * 2][i];
347 if(
bounds[d * 2 + 1][i] > aMax[d] )
348 aMax[d] =
bounds[d * 2 + 1][i];
360 for(
int d = 0; d < NUMDIMS; ++d )
361 area *=
static_cast<int64_t
>(
bounds[d * 2 + 1][i] ) -
bounds[d * 2][i];
369 bool ChildOverlaps(
int i,
const ELEMTYPE aMin[NUMDIMS],
const ELEMTYPE aMax[NUMDIMS] )
const
371 for(
int d = 0; d < NUMDIMS; ++d )
373 if(
bounds[d * 2][i] > aMax[d] ||
bounds[d * 2 + 1][i] < aMin[d] )
385 const ELEMTYPE aMax[NUMDIMS] )
const
387 if constexpr( NUMDIMS == 2 )
396 for(
int i = 0; i <
count; ++i )
410 const ELEMTYPE aMax[NUMDIMS] )
const
414 for(
int d = 0; d < NUMDIMS; ++d )
416 ELEMTYPE lo = std::max(
bounds[d * 2][i], aMin[d] );
417 ELEMTYPE hi = std::min(
bounds[d * 2 + 1][i], aMax[d] );
422 overlap *=
static_cast<int64_t
>( hi ) - lo;
433 const ELEMTYPE aMax[NUMDIMS] )
const
436 int64_t enlargedArea = 1;
438 for(
int d = 0; d < NUMDIMS; ++d )
440 ELEMTYPE lo = std::min(
bounds[d * 2][i], aMin[d] );
441 ELEMTYPE hi = std::max(
bounds[d * 2 + 1][i], aMax[d] );
442 enlargedArea *=
static_cast<int64_t
>( hi ) - lo;
445 return enlargedArea - originalArea;
454 int64_t perimeter = 0;
456 for(
int d = 0; d < NUMDIMS; ++d )
457 perimeter +=
static_cast<int64_t
>(
bounds[d * 2 + 1][i] ) -
bounds[d * 2][i];
459 return 2 * perimeter;
465 void SetChildBounds(
int i,
const ELEMTYPE aMin[NUMDIMS],
const ELEMTYPE aMax[NUMDIMS] )
467 for(
int d = 0; d < NUMDIMS; ++d )
469 bounds[d * 2][i] = aMin[d];
470 bounds[d * 2 + 1][i] = aMax[d];
477 void GetChildBounds(
int i, ELEMTYPE aMin[NUMDIMS], ELEMTYPE aMax[NUMDIMS] )
const
479 for(
int d = 0; d < NUMDIMS; ++d )
481 aMin[d] =
bounds[d * 2][i];
482 aMax[d] =
bounds[d * 2 + 1][i];
489 void SetInsertBounds(
int i,
const ELEMTYPE aMin[NUMDIMS],
const ELEMTYPE aMax[NUMDIMS] )
491 for(
int d = 0; d < NUMDIMS; ++d )
503 for(
int d = 0; d < NUMDIMS; ++d )
516 int last =
count - 1;
520 for(
int d = 0; d < NUMDIMS * 2; ++d )
555 std::bitset<NODES_PER_PAGE>
used;
570 m_pages( std::move( aOther.m_pages ) ),
577 if(
this != &aOther )
579 m_pages = std::move( aOther.m_pages );
613 NODE* node = &page->nodes[i];
622 m_pages.push_back( std::make_unique<PAGE>() );
625 NODE* node = &page->
nodes[0];
657 bool Owns(
const NODE* aNode )
const
670 const auto addr =
reinterpret_cast<uintptr_t
>( aNode );
674 const auto begin =
reinterpret_cast<uintptr_t
>( &page->nodes[0] );
677 if( addr >= begin && addr <
end )
680 aOffset =
static_cast<size_t>( aNode - &page->nodes[0] );
694 page->
used.set( offset );
703 page->
used.reset( offset );
NODE * Allocate()
Allocate a new node, either from the free list or from a new page.
static constexpr size_t NODES_PER_PAGE
size_t MemoryUsage() const
Return approximate memory usage in bytes.
SLAB_ALLOCATOR & operator=(SLAB_ALLOCATOR &&aOther) noexcept
std::vector< std::unique_ptr< PAGE > > m_pages
SLAB_ALLOCATOR & operator=(const SLAB_ALLOCATOR &)=delete
void markUnused(NODE *aNode)
bool Owns(const NODE *aNode) const
Check if a node was allocated from one of this allocator's pages.
~SLAB_ALLOCATOR()=default
SLAB_ALLOCATOR(SLAB_ALLOCATOR &&aOther) noexcept
void Free(NODE *aNode)
Return a node to the free list for reuse.
SLAB_ALLOCATOR(const SLAB_ALLOCATOR &)=delete
PAGE * findOwningPage(const NODE *aNode, size_t &aOffset) const
Find the page that contains aNode using address-range comparison (no UB).
std::vector< NODE * > m_freeList
void markUsed(NODE *aNode)
uint64_t HilbertND2D(int aOrder, const uint32_t aCoords[NUMDIMS])
Compute Hilbert index for N-dimensional coordinates.
uint64_t HilbertXY2D(int aOrder, uint32_t aX, uint32_t aY)
Compute a 64-bit Hilbert curve index from 2D unsigned coordinates.
uint32_t OverlapMask2D(const ELEMTYPE *aBoundsMin0, const ELEMTYPE *aBoundsMax0, const ELEMTYPE *aBoundsMin1, const ELEMTYPE *aBoundsMax1, int aCount, const ELEMTYPE aMin[2], const ELEMTYPE aMax[2])
Compute a bitmask of which child slots overlap a 2D query rectangle.
static constexpr int MINNODES
int64_t ChildOverlapArea(int i, const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS]) const
Compute the overlap area between child slot i and the given box.
ELEMTYPE insertBounds[NUMDIMS *2][MAXNODES]
ELEMTYPE bounds[NUMDIMS *2][MAXNODES]
std::atomic< int > refcount
int64_t ChildEnlargement(int i, const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS]) const
Compute how much child slot i's area would increase if it were enlarged to include the given bounding...
int64_t ChildPerimeter(int i) const
Compute the perimeter (or margin for 3D) of child slot i's bounding box.
int64_t ChildArea(int i) const
Compute the area (or volume for 3D) of child slot i's bounding box.
void GetChildBounds(int i, ELEMTYPE aMin[NUMDIMS], ELEMTYPE aMax[NUMDIMS]) const
Get the bounding box for child slot i.
RTREE_NODE * children[MAXNODES]
uint32_t ChildOverlapMask(const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS]) const
Bitmask of children whose bounding boxes overlap the query rectangle.
bool ChildOverlaps(int i, const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS]) const
Test whether child slot i's bounding box overlaps with the given query box.
void SetChildBounds(int i, const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS])
Set the bounding box for child slot i.
void SetInsertBounds(int i, const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS])
Store the insertion bounding box for leaf entry i.
void GetInsertBounds(int i, ELEMTYPE aMin[NUMDIMS], ELEMTYPE aMax[NUMDIMS]) const
Get the stored insertion bounding box for leaf entry i.
void ComputeEnclosingBounds(ELEMTYPE aMin[NUMDIMS], ELEMTYPE aMax[NUMDIMS]) const
Compute the bounding box that encloses all children in this node.
void RemoveChild(int i)
Remove child at slot i by swapping with last entry.
NODE nodes[NODES_PER_PAGE]
std::bitset< NODES_PER_PAGE > used
wxString result
Test unit parsing edge cases and error handling.