36#if defined( __SSE2__ )
38#elif defined( __ARM_NEON ) || defined( __ARM_NEON__ )
57inline uint64_t
HilbertXY2D(
int aOrder, uint32_t aX, uint32_t aY )
63 for( uint32_t s = ( 1U << ( aOrder - 1 ) ); s > 0; s >>= 1 )
65 uint32_t rx = ( x & s ) ? 1 : 0;
66 uint32_t ry = ( y & s ) ? 1 : 0;
68 d +=
static_cast<uint64_t
>( s ) * s * ( ( 3 * rx ) ^ ry );
100template <
int NUMDIMS>
101inline uint64_t
HilbertND2D(
int aOrder,
const uint32_t aCoords[NUMDIMS] )
103 if constexpr( NUMDIMS == 2 )
105 return HilbertXY2D( aOrder, aCoords[0], aCoords[1] );
114 for(
int bit = aOrder - 1; bit >= 0; --bit )
116 for(
int dim = NUMDIMS - 1; dim >= 0; --dim )
119 d |= ( aCoords[dim] >> bit ) & 1;
147template <
class ELEMTYPE,
int FANOUT>
148inline uint32_t
OverlapMask2D(
const ELEMTYPE* aBoundsMin0,
const ELEMTYPE* aBoundsMax0,
149 const ELEMTYPE* aBoundsMin1,
const ELEMTYPE* aBoundsMax1,
151 const ELEMTYPE aMin[2],
const ELEMTYPE aMax[2] )
153 static_assert( FANOUT <= 31,
"OverlapMask2D requires FANOUT <= 31" );
154 static_assert( FANOUT % 4 == 0,
"OverlapMask2D requires FANOUT divisible by 4 for SIMD" );
156#if defined( __SSE2__ )
157 if constexpr(
sizeof( ELEMTYPE ) == 4 )
160 __m128i qmin0 = _mm_set1_epi32(
static_cast<int>( aMin[0] ) );
161 __m128i qmax0 = _mm_set1_epi32(
static_cast<int>( aMax[0] ) );
162 __m128i qmin1 = _mm_set1_epi32(
static_cast<int>( aMin[1] ) );
163 __m128i qmax1 = _mm_set1_epi32(
static_cast<int>( aMax[1] ) );
171 for(
int j = 0; j < FANOUT; j += 4 )
173 __m128i bmin0 = _mm_loadu_si128(
174 reinterpret_cast<const __m128i*
>( &aBoundsMin0[j] ) );
175 __m128i bmax0 = _mm_loadu_si128(
176 reinterpret_cast<const __m128i*
>( &aBoundsMax0[j] ) );
177 __m128i bmin1 = _mm_loadu_si128(
178 reinterpret_cast<const __m128i*
>( &aBoundsMin1[j] ) );
179 __m128i bmax1 = _mm_loadu_si128(
180 reinterpret_cast<const __m128i*
>( &aBoundsMax1[j] ) );
183 __m128i fail = _mm_or_si128(
184 _mm_or_si128( _mm_cmpgt_epi32( bmin0, qmax0 ),
185 _mm_cmpgt_epi32( qmin0, bmax0 ) ),
186 _mm_or_si128( _mm_cmpgt_epi32( bmin1, qmax1 ),
187 _mm_cmpgt_epi32( qmin1, bmax1 ) ) );
190 int failBits = _mm_movemask_ps( _mm_castsi128_ps( fail ) );
191 mask |=
static_cast<uint32_t
>( ~failBits & 0xF ) << j;
194 return mask & ( ( 1U << aCount ) - 1 );
197#elif defined( __ARM_NEON ) || defined( __ARM_NEON__ )
198 if constexpr(
sizeof( ELEMTYPE ) == 4 )
201 int32x4_t qmin0 = vdupq_n_s32(
static_cast<int32_t
>( aMin[0] ) );
202 int32x4_t qmax0 = vdupq_n_s32(
static_cast<int32_t
>( aMax[0] ) );
203 int32x4_t qmin1 = vdupq_n_s32(
static_cast<int32_t
>( aMin[1] ) );
204 int32x4_t qmax1 = vdupq_n_s32(
static_cast<int32_t
>( aMax[1] ) );
208 static const int32_t kBitShifts[4] = { 0, 1, 2, 3 };
209 int32x4_t shifts = vld1q_s32( kBitShifts );
217 for(
int j = 0; j < FANOUT; j += 4 )
219 int32x4_t bmin0 = vld1q_s32(
220 reinterpret_cast<const int32_t*
>( &aBoundsMin0[j] ) );
221 int32x4_t bmax0 = vld1q_s32(
222 reinterpret_cast<const int32_t*
>( &aBoundsMax0[j] ) );
223 int32x4_t bmin1 = vld1q_s32(
224 reinterpret_cast<const int32_t*
>( &aBoundsMin1[j] ) );
225 int32x4_t bmax1 = vld1q_s32(
226 reinterpret_cast<const int32_t*
>( &aBoundsMax1[j] ) );
229 uint32x4_t fail = vorrq_u32(
230 vorrq_u32( vcgtq_s32( bmin0, qmax0 ),
231 vcgtq_s32( qmin0, bmax0 ) ),
232 vorrq_u32( vcgtq_s32( bmin1, qmax1 ),
233 vcgtq_s32( qmin1, bmax1 ) ) );
236 uint32x4_t bits = vshrq_n_u32( fail, 31 );
237 uint32x4_t positioned = vshlq_u32( bits, shifts );
240 uint32x2_t half = vorr_u32( vget_low_u32( positioned ),
241 vget_high_u32( positioned ) );
242 uint32_t failMask = vget_lane_u32( half, 0 ) | vget_lane_u32( half, 1 );
244 mask |=
static_cast<uint32_t
>( ~failMask & 0xF ) << j;
247 return mask & ( ( 1U << aCount ) - 1 );
263 for(
int i = 0; i < FANOUT; ++i )
265 result[i] = ( aBoundsMin0[i] <= aMax[0] )
266 & ( aBoundsMax0[i] >= aMin[0] )
267 & ( aBoundsMin1[i] <= aMax[1] )
268 & ( aBoundsMax1[i] >= aMin[1] );
273 for(
int i = 0; i < FANOUT; ++i )
274 mask |= (
static_cast<uint32_t
>(
result[i] ) << i );
276 return mask & ( ( 1U << aCount ) - 1 );
297template <
class DATATYPE,
class ELEMTYPE,
int NUMDIMS,
int MAXNODES>
338 for(
int d = 0; d < NUMDIMS; ++d )
340 aMin[d] = std::numeric_limits<ELEMTYPE>::max();
341 aMax[d] = std::numeric_limits<ELEMTYPE>::lowest();
344 for(
int i = 0; i <
count; ++i )
346 for(
int d = 0; d < NUMDIMS; ++d )
348 if(
bounds[d * 2][i] < aMin[d] )
349 aMin[d] =
bounds[d * 2][i];
351 if(
bounds[d * 2 + 1][i] > aMax[d] )
352 aMax[d] =
bounds[d * 2 + 1][i];
364 for(
int d = 0; d < NUMDIMS; ++d )
365 area *=
static_cast<int64_t
>(
bounds[d * 2 + 1][i] ) -
bounds[d * 2][i];
373 bool ChildOverlaps(
int i,
const ELEMTYPE aMin[NUMDIMS],
const ELEMTYPE aMax[NUMDIMS] )
const
375 for(
int d = 0; d < NUMDIMS; ++d )
377 if(
bounds[d * 2][i] > aMax[d] ||
bounds[d * 2 + 1][i] < aMin[d] )
389 const ELEMTYPE aMax[NUMDIMS] )
const
391 if constexpr( NUMDIMS == 2 )
400 for(
int i = 0; i <
count; ++i )
414 const ELEMTYPE aMax[NUMDIMS] )
const
418 for(
int d = 0; d < NUMDIMS; ++d )
420 ELEMTYPE lo = std::max(
bounds[d * 2][i], aMin[d] );
421 ELEMTYPE hi = std::min(
bounds[d * 2 + 1][i], aMax[d] );
426 overlap *=
static_cast<int64_t
>( hi ) - lo;
437 const ELEMTYPE aMax[NUMDIMS] )
const
440 int64_t enlargedArea = 1;
442 for(
int d = 0; d < NUMDIMS; ++d )
444 ELEMTYPE lo = std::min(
bounds[d * 2][i], aMin[d] );
445 ELEMTYPE hi = std::max(
bounds[d * 2 + 1][i], aMax[d] );
446 enlargedArea *=
static_cast<int64_t
>( hi ) - lo;
449 return enlargedArea - originalArea;
458 int64_t perimeter = 0;
460 for(
int d = 0; d < NUMDIMS; ++d )
461 perimeter +=
static_cast<int64_t
>(
bounds[d * 2 + 1][i] ) -
bounds[d * 2][i];
463 return 2 * perimeter;
469 void SetChildBounds(
int i,
const ELEMTYPE aMin[NUMDIMS],
const ELEMTYPE aMax[NUMDIMS] )
471 for(
int d = 0; d < NUMDIMS; ++d )
473 bounds[d * 2][i] = aMin[d];
474 bounds[d * 2 + 1][i] = aMax[d];
481 void GetChildBounds(
int i, ELEMTYPE aMin[NUMDIMS], ELEMTYPE aMax[NUMDIMS] )
const
483 for(
int d = 0; d < NUMDIMS; ++d )
485 aMin[d] =
bounds[d * 2][i];
486 aMax[d] =
bounds[d * 2 + 1][i];
493 void SetInsertBounds(
int i,
const ELEMTYPE aMin[NUMDIMS],
const ELEMTYPE aMax[NUMDIMS] )
495 for(
int d = 0; d < NUMDIMS; ++d )
507 for(
int d = 0; d < NUMDIMS; ++d )
520 int last =
count - 1;
524 for(
int d = 0; d < NUMDIMS * 2; ++d )
559 std::bitset<NODES_PER_PAGE>
used;
574 m_pages( std::move( aOther.m_pages ) ),
581 if(
this != &aOther )
583 m_pages = std::move( aOther.m_pages );
617 NODE* node = &page->nodes[i];
626 m_pages.push_back( std::make_unique<PAGE>() );
629 NODE* node = &page->
nodes[0];
661 bool Owns(
const NODE* aNode )
const
674 const auto addr =
reinterpret_cast<uintptr_t
>( aNode );
678 const auto begin =
reinterpret_cast<uintptr_t
>( &page->nodes[0] );
681 if( addr >= begin && addr <
end )
684 aOffset =
static_cast<size_t>( aNode - &page->nodes[0] );
698 page->
used.set( offset );
707 page->
used.reset( offset );
NODE * Allocate()
Allocate a new node, either from the free list or from a new page.
static constexpr size_t NODES_PER_PAGE
size_t MemoryUsage() const
Return approximate memory usage in bytes.
SLAB_ALLOCATOR & operator=(SLAB_ALLOCATOR &&aOther) noexcept
std::vector< std::unique_ptr< PAGE > > m_pages
SLAB_ALLOCATOR & operator=(const SLAB_ALLOCATOR &)=delete
void markUnused(NODE *aNode)
bool Owns(const NODE *aNode) const
Check if a node was allocated from one of this allocator's pages.
~SLAB_ALLOCATOR()=default
SLAB_ALLOCATOR(SLAB_ALLOCATOR &&aOther) noexcept
void Free(NODE *aNode)
Return a node to the free list for reuse.
SLAB_ALLOCATOR(const SLAB_ALLOCATOR &)=delete
PAGE * findOwningPage(const NODE *aNode, size_t &aOffset) const
Find the page that contains aNode using address-range comparison (no UB).
std::vector< NODE * > m_freeList
void markUsed(NODE *aNode)
uint64_t HilbertND2D(int aOrder, const uint32_t aCoords[NUMDIMS])
Compute Hilbert index for N-dimensional coordinates.
uint64_t HilbertXY2D(int aOrder, uint32_t aX, uint32_t aY)
Compute a 64-bit Hilbert curve index from 2D unsigned coordinates.
uint32_t OverlapMask2D(const ELEMTYPE *aBoundsMin0, const ELEMTYPE *aBoundsMax0, const ELEMTYPE *aBoundsMin1, const ELEMTYPE *aBoundsMax1, int aCount, const ELEMTYPE aMin[2], const ELEMTYPE aMax[2])
Compute a bitmask of which child slots overlap a 2D query rectangle.
static constexpr int MINNODES
int64_t ChildOverlapArea(int i, const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS]) const
Compute the overlap area between child slot i and the given box.
ELEMTYPE insertBounds[NUMDIMS *2][MAXNODES]
ELEMTYPE bounds[NUMDIMS *2][MAXNODES]
std::atomic< int > refcount
int64_t ChildEnlargement(int i, const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS]) const
Compute how much child slot i's area would increase if it were enlarged to include the given bounding...
int64_t ChildPerimeter(int i) const
Compute the perimeter (or margin for 3D) of child slot i's bounding box.
int64_t ChildArea(int i) const
Compute the area (or volume for 3D) of child slot i's bounding box.
void GetChildBounds(int i, ELEMTYPE aMin[NUMDIMS], ELEMTYPE aMax[NUMDIMS]) const
Get the bounding box for child slot i.
RTREE_NODE * children[MAXNODES]
uint32_t ChildOverlapMask(const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS]) const
Bitmask of children whose bounding boxes overlap the query rectangle.
bool ChildOverlaps(int i, const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS]) const
Test whether child slot i's bounding box overlaps with the given query box.
void SetChildBounds(int i, const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS])
Set the bounding box for child slot i.
void SetInsertBounds(int i, const ELEMTYPE aMin[NUMDIMS], const ELEMTYPE aMax[NUMDIMS])
Store the insertion bounding box for leaf entry i.
void GetInsertBounds(int i, ELEMTYPE aMin[NUMDIMS], ELEMTYPE aMax[NUMDIMS]) const
Get the stored insertion bounding box for leaf entry i.
void ComputeEnclosingBounds(ELEMTYPE aMin[NUMDIMS], ELEMTYPE aMax[NUMDIMS]) const
Compute the bounding box that encloses all children in this node.
void RemoveChild(int i)
Remove child at slot i by swapping with last entry.
NODE nodes[NODES_PER_PAGE]
std::bitset< NODES_PER_PAGE > used
wxString result
Test unit parsing edge cases and error handling.