1 ///
2 module stdx.allocator.building_blocks.bitmapped_block;
3 
4 import stdx.allocator.building_blocks.null_allocator;
5 import stdx.allocator.common;
6 
7 /**
8 
9 $(D BitmappedBlock) implements a simple heap consisting of one contiguous area
10 of memory organized in blocks, each of size $(D theBlockSize). A block is a unit
11 of allocation. A bitmap serves as bookkeeping data, more precisely one bit per
12 block indicating whether that block is currently allocated or not.
13 
14 Passing $(D NullAllocator) as $(D ParentAllocator) (the default) means user code
15 manages allocation of the memory block from the outside; in that case
16 $(D BitmappedBlock) must be constructed with a $(D void[]) preallocated block and
17 has no responsibility regarding the lifetime of its support underlying storage.
18 If another allocator type is passed, $(D BitmappedBlock) defines a destructor that
19 uses the parent allocator to release the memory block. That makes the combination of $(D AllocatorList), $(D BitmappedBlock), and a back-end allocator such as $(D MmapAllocator) a simple and scalable solution for memory allocation.
20 
21 There are advantages to storing bookkeeping data separated from the payload
22 (as opposed to e.g. using $(D AffixAllocator) to store metadata together with
23 each allocation). The layout is more compact (overhead is one bit per block),
24 searching for a free block during allocation enjoys better cache locality, and
25 deallocation does not touch memory around the payload being deallocated (which
26 is often cold).
27 
28 Allocation requests are handled on a first-fit basis. Although linear in
29 complexity, allocation is in practice fast because of the compact bookkeeping
30 representation, use of simple and fast bitwise routines, and caching of the
31 first available block position. A known issue with this general approach is
32 fragmentation, partially mitigated by coalescing. Since $(D BitmappedBlock) does
33 not need to maintain the allocated size, freeing memory implicitly coalesces
34 free blocks together. Also, tuning $(D blockSize) has a considerable impact on
35 both internal and external fragmentation.
36 
37 The size of each block can be selected either during compilation or at run
38 time. Statically-known block sizes are frequent in practice and yield slightly
39 better performance. To choose a block size statically, pass it as the $(D
40 blockSize) parameter as in $(D BitmappedBlock!(Allocator, 4096)). To choose a block
41 size parameter, use $(D BitmappedBlock!(Allocator, chooseAtRuntime)) and pass the
42 block size to the constructor.
43 
44 */
45 struct BitmappedBlock(size_t theBlockSize, uint theAlignment = platformAlignment,
46     ParentAllocator = NullAllocator)
47 {
48     import stdx.allocator.internal : Ternary;
49     import mir.functional : RefTuple;
50     // for internal API only
51     private alias Tuple = RefTuple!(size_t, uint);
52 
53     @system unittest
54     {
55         import mir.utility : max;
56         import stdx.allocator.mallocator : AlignedMallocator;
57         auto m = cast(ubyte[])(AlignedMallocator.instance.alignedAllocate(1024 * 64,
58                                 max(theAlignment, cast(uint) size_t.sizeof)));
59         scope(exit) AlignedMallocator.instance.deallocate(m);
60         static if (theBlockSize == chooseAtRuntime)
61         {
62             testAllocator!(() => BitmappedBlock!(theBlockSize, theAlignment, NullAllocator)(m, 64));
63         }
64         else
65         {
66             testAllocator!(() => BitmappedBlock!(theBlockSize, theAlignment, NullAllocator)(m));
67         }
68     }
69     static assert(theBlockSize > 0 && theAlignment.isGoodStaticAlignment);
70     static assert(theBlockSize == chooseAtRuntime
71         || theBlockSize % theAlignment == 0,
72         "Block size must be a multiple of the alignment");
73 
74     /**
75     If $(D blockSize == chooseAtRuntime), $(D BitmappedBlock) offers a read/write
76     property $(D blockSize). It must be set before any use of the allocator.
77     Otherwise (i.e. $(D theBlockSize) is a legit constant), $(D blockSize) is
78     an alias for $(D theBlockSize). Whether constant or variable, must also be
79     a multiple of $(D alignment). This constraint is $(D assert)ed statically
80     and dynamically.
81     */
82     static if (theBlockSize != chooseAtRuntime)
83     {
84         alias blockSize = theBlockSize;
85     }
86     else
87     {
88         @property uint blockSize() { return _blockSize; }
89         @property void blockSize(uint s)
90         {
91             assert(_control.allAre0() && s % alignment == 0);
92             _blockSize = s;
93         }
94         private uint _blockSize;
95     }
96 
97     static if (is(ParentAllocator == NullAllocator))
98     {
99         private enum parentAlignment = platformAlignment;
100     }
101     else
102     {
103         private alias parentAlignment = ParentAllocator.alignment;
104         static assert(parentAlignment >= ulong.alignof);
105     }
106 
107     /**
108     The _alignment offered is user-configurable statically through parameter
109     $(D theAlignment), defaulted to $(D platformAlignment).
110     */
111     alias alignment = theAlignment;
112 
113     // state {
114     /**
115     The _parent allocator. Depending on whether $(D ParentAllocator) holds state
116     or not, this is a member variable or an alias for
117     `ParentAllocator.instance`.
118     */
119     static if (stateSize!ParentAllocator)
120     {
121         ParentAllocator parent;
122     }
123     else
124     {
125         alias parent = ParentAllocator.instance;
126     }
127     private size_t _blocks;
128     private BitVector _control;
129     private void[] _payload;
130     private size_t _startIdx;
131     // }
132 
133     private size_t totalAllocation(size_t capacity)
134     {
135         auto blocks = capacity.divideRoundUp(blockSize);
136         auto leadingUlongs = blocks.divideRoundUp(64);
137         import mir.utility : min;
138         immutable initialAlignment = min(parentAlignment,
139             1U << trailingZeros(leadingUlongs * 8));
140         auto maxSlack = alignment <= initialAlignment
141             ? 0
142             : alignment - initialAlignment;
143         //writeln(maxSlack);
144         return leadingUlongs * 8 + maxSlack + blockSize * blocks;
145     }
146 
147     /**
148     Constructs a block allocator given a hunk of memory, or a desired capacity
149     in bytes.
150 
151     $(UL
152     $(LI If $(D ParentAllocator) is $(D NullAllocator), only the constructor
153     taking $(D data) is defined and the user is responsible for freeing $(D
154     data) if desired.)
155     $(LI Otherwise, both constructors are defined. The $(D data)-based
156     constructor assumes memory has been allocated with the parent allocator.
157     The $(D capacity)-based constructor uses $(D ParentAllocator) to allocate
158     an appropriate contiguous hunk of memory. Regardless of the constructor
159     used, the destructor releases the memory by using $(D
160     ParentAllocator.deallocate).)
161     )
162     */
163     this(ubyte[] data)
164     {
165         immutable a = data.ptr.effectiveAlignment;
166         assert(a >= size_t.alignof || !data.ptr,
167             "Data must be aligned properly");
168 
169         immutable ulong totalBits = data.length * 8;
170         immutable ulong bitsPerBlock = blockSize * 8 + 1;
171         // Get a first estimate
172         _blocks = cast(size_t)(totalBits / bitsPerBlock);
173 
174         // Reality is a bit more complicated, iterate until a good number of
175         // blocks found.
176         for (; _blocks; --_blocks)
177         {
178             immutable controlWords = _blocks.divideRoundUp(64);
179             auto payload = data[controlWords * 8 .. $].roundStartToMultipleOf(
180                 alignment);
181             if (payload.length < _blocks * blockSize)
182             {
183                 // Overestimated
184                 continue;
185             }
186             _control = BitVector((cast(ulong*) data.ptr)[0 .. controlWords]);
187             _control[] = 0;
188             _payload = payload;
189             break;
190         }
191     }
192 
193     /// Ditto
194     static if (!is(ParentAllocator == NullAllocator))
195     this(size_t capacity)
196     {
197         size_t toAllocate = totalAllocation(capacity);
198         auto data = cast(ubyte[])(parent.allocate(toAllocate));
199         this(data);
200         assert(_blocks * blockSize >= capacity);
201     }
202 
203     static if (chooseAtRuntime == theBlockSize)
204     this(ubyte[] data, uint blockSize)
205     {
206         this._blockSize = blockSize;
207         this(data);
208     }
209 
210     /**
211     If $(D ParentAllocator) is not $(D NullAllocator) and defines $(D
212     deallocate), the destructor is defined to deallocate the block held.
213     */
214     static if (!is(ParentAllocator == NullAllocator)
215         && __traits(hasMember, ParentAllocator, "deallocate"))
216     ~this()
217     {
218         auto start = _control.rep.ptr, end = cast(ulong*)(_payload.ptr + _payload.length);
219         parent.deallocate(start[0 .. end - start]);
220     }
221 
222     /*
223     Adjusts the memoized _startIdx to the leftmost control word that has at
224     least one zero bit. Assumes all control words to the left of $(D
225     _control[_startIdx]) are already occupied.
226     */
227     private void adjustStartIdx()
228     {
229         while (_startIdx < _control.rep.length
230             && _control.rep[_startIdx] == ulong.max)
231         {
232             ++_startIdx;
233         }
234     }
235 
236     /*
237     Returns the blocks corresponding to the control bits starting at word index
238     wordIdx and bit index msbIdx (MSB=0) for a total of howManyBlocks.
239     */
240     private void[] blocksFor(size_t wordIdx, uint msbIdx, size_t howManyBlocks)
241     {
242         assert(msbIdx <= 63);
243         const start = (wordIdx * 64 + msbIdx) * blockSize;
244         const end = start + blockSize * howManyBlocks;
245         if (end <= _payload.length) return _payload[start .. end];
246         // This could happen if we have more control bits than available memory.
247         // That's possible because the control bits are rounded up to fit in
248         // 64-bit words.
249         return null;
250     }
251 
252     /**
253     Returns the actual bytes allocated when $(D n) bytes are requested, i.e.
254     $(D n.roundUpToMultipleOf(blockSize)).
255     */
256     size_t goodAllocSize(size_t n)
257     {
258         return n.roundUpToMultipleOf(blockSize);
259     }
260 
261     /**
262     Allocates $(D s) bytes of memory and returns it, or $(D null) if memory
263     could not be allocated.
264 
265     The following information might be of help with choosing the appropriate
266     block size. Actual allocation occurs in sizes multiple of the block size.
267     Allocating one block is the fastest because only one 0 bit needs to be
268     found in the metadata. Allocating 2 through 64 blocks is the next cheapest
269     because it affects a maximum of two $(D ulong)s in the metadata.
270     Allocations greater than 64 blocks require a multiword search through the
271     metadata.
272     */
273     @trusted void[] allocate(const size_t s)
274     {
275         const blocks = s.divideRoundUp(blockSize);
276         void[] result = void;
277 
278     switcharoo:
279         switch (blocks)
280         {
281         case 1:
282             // inline code here for speed
283             // find the next available block
284             foreach (i; _startIdx .. _control.rep.length)
285             {
286                 const w = _control.rep[i];
287                 if (w == ulong.max) continue;
288                 uint j = leadingOnes(w);
289                 assert(j < 64);
290                 assert((_control.rep[i] & ((1UL << 63) >> j)) == 0);
291                 _control.rep[i] |= (1UL << 63) >> j;
292                 if (i == _startIdx)
293                 {
294                     adjustStartIdx();
295                 }
296                 result = blocksFor(i, j, 1);
297                 break switcharoo;
298             }
299             goto case 0; // fall through
300         case 0:
301             return null;
302         case 2: .. case 64:
303             result = smallAlloc(cast(uint) blocks);
304             break;
305         default:
306             result = hugeAlloc(blocks);
307             break;
308         }
309         return result.ptr ? result.ptr[0 .. s] : null;
310     }
311 
312     /**
313     Allocates a block with specified alignment $(D a). The alignment must be a
314     power of 2. If $(D a <= alignment), function forwards to $(D allocate).
315     Otherwise, it attempts to overallocate and then adjust the result for
316     proper alignment. In the worst case the slack memory is around two blocks.
317     */
318     void[] alignedAllocate(size_t n, uint a)
319     {
320         import stdx.allocator.internal : isPowerOf2;
321         assert(a.isPowerOf2);
322         if (a <= alignment) return allocate(n);
323 
324         // Overallocate to make sure we can get an aligned block
325         auto b = allocate((n + a - alignment).roundUpToMultipleOf(blockSize));
326         if (!b.ptr) return null;
327         auto result = b.roundStartToMultipleOf(a);
328         assert(result.length >= n);
329         result = result.ptr[0 .. n]; // final result
330 
331         // Free any blocks that might be slack at the beginning
332         auto slackHeadingBlocks = (result.ptr - b.ptr) / blockSize;
333         if (slackHeadingBlocks)
334         {
335             deallocate(b[0 .. slackHeadingBlocks * blockSize]);
336         }
337 
338         // Free any blocks that might be slack at the end
339         auto slackTrailingBlocks = ((b.ptr + b.length)
340             - (result.ptr + result.length)) / blockSize;
341         if (slackTrailingBlocks)
342         {
343             deallocate(b[$ - slackTrailingBlocks * blockSize .. $]);
344         }
345 
346         return result;
347     }
348 
349     /**
350     If the $(D BitmappedBlock) object is empty (has no active allocation), allocates
351     all memory within and returns a slice to it. Otherwise, returns $(D null)
352     (i.e. no attempt is made to allocate the largest available block).
353     */
354     void[] allocateAll()
355     {
356         if (empty != Ternary.yes) return null;
357         _control[] = 1;
358         return _payload;
359     }
360 
361     /**
362     Returns `Ternary.yes` if `b` belongs to the `BitmappedBlock` object,
363     `Ternary.no` otherwise. Never returns `Ternary.unkown`. (This
364     method is somewhat tolerant in that accepts an interior slice.)
365     */
366     Ternary owns(void[] b) const
367     {
368         //if (!b.ptr) return Ternary.no;
369         assert(b.ptr !is null || b.length == 0, "Corrupt block.");
370         return Ternary(b.ptr >= _payload.ptr
371             && b.ptr + b.length <= _payload.ptr + _payload.length);
372     }
373 
374     /*
375     Tries to allocate "blocks" blocks at the exact position indicated by the
376     position wordIdx/msbIdx (msbIdx counts from MSB, i.e. MSB has index 0). If
377     it succeeds, fills "result" with the result and returns tuple(size_t.max,
378     0). Otherwise, returns a tuple with the next position to search.
379     */
380     private Tuple allocateAt(size_t wordIdx, uint msbIdx,
381             size_t blocks, ref void[] result)
382     {
383         assert(blocks > 0);
384         assert(wordIdx < _control.rep.length);
385         assert(msbIdx <= 63);
386         if (msbIdx + blocks <= 64)
387         {
388             // Allocation should fit this control word
389             if (setBitsIfZero(_control.rep[wordIdx],
390                     cast(uint) (64 - msbIdx - blocks), 63 - msbIdx))
391             {
392                 // Success
393                 result = blocksFor(wordIdx, msbIdx, blocks);
394                 return Tuple(size_t.max, 0u);
395             }
396             // Can't allocate, make a suggestion
397             return msbIdx + blocks == 64
398                 ? Tuple(wordIdx + 1, 0u)
399                 : Tuple(wordIdx, cast(uint) (msbIdx + blocks));
400         }
401         // Allocation spans two control words or more
402         immutable mask = ulong.max >> msbIdx;
403         if (_control.rep[wordIdx] & mask)
404         {
405             // We can't allocate the rest of this control word,
406             // return a suggestion.
407             return Tuple(wordIdx + 1, 0u);
408         }
409         // We can allocate the rest of this control word, but we first need to
410         // make sure we can allocate the tail.
411         if (wordIdx + 1 == _control.rep.length)
412         {
413             // No more memory
414             return Tuple(_control.rep.length, 0u);
415         }
416         auto hint = allocateAt(wordIdx + 1, 0, blocks - 64 + msbIdx, result);
417         if (hint[0] == size_t.max)
418         {
419             // We did it!
420             _control.rep[wordIdx] |= mask;
421             result = blocksFor(wordIdx, msbIdx, blocks);
422             return Tuple(size_t.max, 0u);
423         }
424         // Failed, return a suggestion that skips this whole run.
425         return hint;
426     }
427 
428     /* Allocates as many blocks as possible at the end of the blocks indicated
429     by wordIdx. Returns the number of blocks allocated. */
430     private uint allocateAtTail(size_t wordIdx)
431     {
432         assert(wordIdx < _control.rep.length);
433         const available = trailingZeros(_control.rep[wordIdx]);
434         _control.rep[wordIdx] |= ulong.max >> available;
435         return available;
436     }
437 
438     private void[] smallAlloc(uint blocks)
439     {
440         assert(blocks >= 2 && blocks <= 64);
441         foreach (i; _startIdx .. _control.rep.length)
442         {
443             // Test within the current 64-bit word
444             const v = _control.rep[i];
445             if (v == ulong.max) continue;
446             auto j = findContigOnes(~v, blocks);
447             if (j < 64)
448             {
449                 // yay, found stuff
450                 setBits(_control.rep[i], 64 - j - blocks, 63 - j);
451                 return blocksFor(i, j, blocks);
452             }
453             // Next, try allocations that cross a word
454             auto available = trailingZeros(v);
455             if (available == 0) continue;
456             if (i + 1 >= _control.rep.length) break;
457             assert(available < blocks); // otherwise we should have found it
458             auto needed = blocks - available;
459             assert(needed > 0 && needed < 64);
460             if (allocateAtFront(i + 1, needed))
461             {
462                 // yay, found a block crossing two words
463                 _control.rep[i] |= (1UL << available) - 1;
464                 return blocksFor(i, 64 - available, blocks);
465             }
466         }
467         return null;
468     }
469 
470     private void[] hugeAlloc(size_t blocks)
471     {
472         assert(blocks > 64);
473         if (_startIdx == _control._rep.length)
474         {
475             assert(_control.allAre1);
476             return null;
477         }
478         auto i = _control.findZeros(blocks, _startIdx * 64);
479         if (i == i.max) return null;
480         // Allocate those bits
481         _control[i .. i + blocks] = 1;
482         return _payload[cast(size_t) (i * blockSize)
483             .. cast(size_t) ((i + blocks) * blockSize)];
484     }
485 
486     // Rounds sizeInBytes to a multiple of blockSize.
487     private size_t bytes2blocks(size_t sizeInBytes)
488     {
489         return (sizeInBytes + blockSize - 1) / blockSize;
490     }
491 
492     /* Allocates given blocks at the beginning blocks indicated by wordIdx.
493     Returns true if allocation was possible, false otherwise. */
494     private bool allocateAtFront(size_t wordIdx, uint blocks)
495     {
496         assert(wordIdx < _control.rep.length && blocks >= 1 && blocks <= 64);
497         const mask = (1UL << (64 - blocks)) - 1;
498         if (_control.rep[wordIdx] > mask) return false;
499         // yay, works
500         _control.rep[wordIdx] |= ~mask;
501         return true;
502     }
503 
504     /**
505     Expands an allocated block in place.
506     */
507     @trusted bool expand(ref void[] b, immutable size_t delta)
508     {
509         // Dispose with trivial corner cases
510         if (delta == 0) return true;
511         if (b is null) return false;
512 
513         /* To simplify matters, refuse to expand buffers that don't start at a block start (this may be the case for blocks allocated with alignedAllocate).
514         */
515         if ((b.ptr - _payload.ptr) % blockSize) return false;
516 
517         const blocksOld = bytes2blocks(b.length);
518         const blocksNew = bytes2blocks(b.length + delta);
519         assert(blocksOld <= blocksNew);
520 
521         // Possibly we have enough slack at the end of the block!
522         if (blocksOld == blocksNew)
523         {
524             b = b.ptr[0 .. b.length + delta];
525             return true;
526         }
527 
528         assert((b.ptr - _payload.ptr) % blockSize == 0);
529         const blockIdx = (b.ptr - _payload.ptr) / blockSize;
530         const blockIdxAfter = blockIdx + blocksOld;
531 
532         // Try the maximum
533         const wordIdx = blockIdxAfter / 64,
534             msbIdx = cast(uint) (blockIdxAfter % 64);
535         void[] p;
536         auto hint = allocateAt(wordIdx, msbIdx,  blocksNew - blocksOld, p);
537         if (hint[0] != size_t.max)
538         {
539             return false;
540         }
541         // Expansion successful
542         assert(p.ptr == b.ptr + blocksOld * blockSize);
543         b = b.ptr[0 .. b.length + delta];
544         return true;
545     }
546 
547     /**
548     Reallocates a previously-allocated block. Contractions occur in place.
549     */
550     @system bool reallocate(ref void[] b, size_t newSize)
551     {
552         if (!b.ptr)
553         {
554             b = allocate(newSize);
555             return b.length == newSize;
556         }
557         if (newSize == 0)
558         {
559             deallocate(b);
560             b = null;
561             return true;
562         }
563         if (newSize < b.length)
564         {
565             // Shrink. Will shrink in place by deallocating the trailing part.
566             auto newCapacity = bytes2blocks(newSize) * blockSize;
567             deallocate(b[newCapacity .. $]);
568             b = b[0 .. newSize];
569             return true;
570         }
571         // Go the slow route
572         return .reallocate(this, b, newSize);
573     }
574 
575     /**
576     Reallocates a block previously allocated with $(D alignedAllocate). Contractions do not occur in place.
577     */
578     @system bool alignedReallocate(ref void[] b, size_t newSize, uint a)
579     {
580         if (newSize == 0)
581         {
582             deallocate(b);
583             b = null;
584             return true;
585         }
586         // Go the slow route
587         return .alignedReallocate(this, b, newSize, a);
588     }
589 
590     /**
591     Deallocates a block previously allocated with this allocator.
592     */
593     bool deallocate(void[] b)
594     {
595         if (b is null) return true;
596 
597         // Locate position
598         immutable pos = b.ptr - _payload.ptr;
599         immutable blockIdx = pos / blockSize;
600 
601         // Adjust pointer, might be inside a block due to alignedAllocate
602         auto begin = _payload.ptr + blockIdx * blockSize,
603             end = b.ptr + b.length;
604         b = begin[0 .. end - begin];
605         // Round up size to multiple of block size
606         auto blocks = b.length.divideRoundUp(blockSize);
607 
608         // Get into details
609         auto wordIdx = blockIdx / 64, msbIdx = cast(uint) (blockIdx % 64);
610         if (_startIdx > wordIdx) _startIdx = wordIdx;
611 
612         // Three stages: heading bits, full words, leftover bits
613         if (msbIdx)
614         {
615             if (blocks + msbIdx <= 64)
616             {
617                 resetBits(_control.rep[wordIdx],
618                     cast(uint) (64 - msbIdx - blocks),
619                     63 - msbIdx);
620                 return true;
621             }
622             else
623             {
624                 _control.rep[wordIdx] &= ulong.max << 64 - msbIdx;
625                 blocks -= 64 - msbIdx;
626                 ++wordIdx;
627                 msbIdx = 0;
628             }
629         }
630 
631         // Stage 2: reset one word at a time
632         for (; blocks >= 64; blocks -= 64)
633         {
634             _control.rep[wordIdx++] = 0;
635         }
636 
637         // Stage 3: deal with leftover bits, if any
638         assert(wordIdx <= _control.rep.length);
639         if (blocks)
640         {
641             _control.rep[wordIdx] &= ulong.max >> blocks;
642         }
643         return true;
644     }
645 
646     /**
647     Forcibly deallocates all memory allocated by this allocator, making it
648     available for further allocations. Does not return memory to $(D
649     ParentAllocator).
650     */
651     bool deallocateAll()
652     {
653         _control[] = 0;
654         _startIdx = 0;
655         return true;
656     }
657 
658     /**
659     Returns `Ternary.yes` if no memory is currently allocated with this
660     allocator, otherwise `Ternary.no`. This method never returns
661     `Ternary.unknown`.
662     */
663     Ternary empty()
664     {
665         return Ternary(_control.allAre0());
666     }
667 
668     debug(std_experimental_allocator_bitmapped_block)
669     void dump()()
670     {
671         import std.stdio : writefln, writeln;
672         writefln("%s @ %s {", typeid(this), cast(void*) _control._rep.ptr);
673         scope(exit) writeln("}");
674         assert(_payload.length == blockSize * _blocks);
675         assert(_control.length >= _blocks);
676         writefln("  _startIdx=%s; blockSize=%s; blocks=%s",
677             _startIdx, blockSize, _blocks);
678         if (!_control.length) return;
679         uint blockCount = 1;
680         bool inAllocatedStore = _control[0];
681         void* start = _payload.ptr;
682         for (size_t i = 1;; ++i)
683         {
684             if (i >= _blocks || _control[i] != inAllocatedStore)
685             {
686                 writefln("  %s block at 0x%s, length: %s (%s*%s)",
687                     inAllocatedStore ? "Busy" : "Free",
688                     cast(void*) start,
689                     blockCount * blockSize,
690                     blockCount, blockSize);
691                 if (i >= _blocks) break;
692                 assert(i < _control.length);
693                 inAllocatedStore = _control[i];
694                 start = _payload.ptr + blockCount * blockSize;
695                 blockCount = 1;
696             }
697             else
698             {
699                 ++blockCount;
700             }
701         }
702     }
703 }
704 
705 ///
706 @nogc @system unittest
707 {
708     // Create a block allocator on top of a 10KB stack region.
709     import stdx.allocator.building_blocks.region : InSituRegion;
710     InSituRegion!(10_240, 64) r;
711     auto a = BitmappedBlock!(64, 64)(cast(ubyte[])(r.allocateAll()));
712     static assert(__traits(hasMember, InSituRegion!(10_240, 64), "allocateAll"));
713     const b = a.allocate(100);
714     assert(b.length == 100);
715 }
716 
717 @system unittest
718 {
719     import stdx.allocator.gc_allocator : GCAllocator;
720     testAllocator!(() => BitmappedBlock!(64, 8, GCAllocator)(1024 * 64));
721 }
722 
723 @system unittest
724 {
725     static void testAllocateAll(size_t bs)(size_t blocks, uint blocksAtATime)
726     {
727         import mir.utility : min;
728         assert(bs);
729         import stdx.allocator.gc_allocator : GCAllocator;
730         auto a = BitmappedBlock!(bs, min(bs, platformAlignment))(
731             cast(ubyte[])(GCAllocator.instance.allocate((blocks * bs * 8 +
732                         blocks) / 8))
733         );
734         import std.conv : text;
735         assert(blocks >= a._blocks, text(blocks, " < ", a._blocks));
736         blocks = a._blocks;
737 
738         // test allocation of 0 bytes
739         auto x = a.allocate(0);
740         assert(x is null);
741         // test allocation of 1 byte
742         x = a.allocate(1);
743         assert(x.length == 1 || blocks == 0,
744             text(x.ptr, " ", x.length, " ", a));
745         a.deallocateAll();
746 
747         bool twice = true;
748 
749     begin:
750         foreach (i; 0 .. blocks / blocksAtATime)
751         {
752             auto b = a.allocate(bs * blocksAtATime);
753             assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
754         }
755         assert(a.allocate(bs * blocksAtATime) is null);
756         assert(a.allocate(1) is null);
757 
758         // Now deallocate all and do it again!
759         a.deallocateAll();
760 
761         // Test deallocation
762 
763         auto v = new void[][blocks / blocksAtATime];
764         foreach (i; 0 .. blocks / blocksAtATime)
765         {
766             auto b = a.allocate(bs * blocksAtATime);
767             assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
768             v[i] = b;
769         }
770         assert(a.allocate(bs * blocksAtATime) is null);
771         assert(a.allocate(1) is null);
772 
773         foreach (i; 0 .. blocks / blocksAtATime)
774         {
775             a.deallocate(v[i]);
776         }
777 
778         foreach (i; 0 .. blocks / blocksAtATime)
779         {
780             auto b = a.allocate(bs * blocksAtATime);
781             assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
782             v[i] = b;
783         }
784 
785         foreach (i; 0 .. v.length)
786         {
787             a.deallocate(v[i]);
788         }
789 
790         if (twice)
791         {
792             twice = false;
793             goto begin;
794         }
795 
796         a.deallocateAll;
797 
798         // test expansion
799         if (blocks >= blocksAtATime)
800         {
801             foreach (i; 0 .. blocks / blocksAtATime - 1)
802             {
803                 auto b = a.allocate(bs * blocksAtATime);
804                 assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
805                 (cast(ubyte[]) b)[] = 0xff;
806                 a.expand(b, blocksAtATime * bs)
807                     || assert(0, text(i));
808                 (cast(ubyte[]) b)[] = 0xfe;
809                 assert(b.length == bs * blocksAtATime * 2, text(i, ": ", b.length));
810                 a.reallocate(b, blocksAtATime * bs) || assert(0);
811                 assert(b.length == bs * blocksAtATime, text(i, ": ", b.length));
812             }
813         }
814     }
815 
816     testAllocateAll!(1)(0, 1);
817     testAllocateAll!(1)(8, 1);
818     testAllocateAll!(4096)(128, 1);
819 
820     testAllocateAll!(1)(0, 2);
821     testAllocateAll!(1)(128, 2);
822     testAllocateAll!(4096)(128, 2);
823 
824     testAllocateAll!(1)(0, 4);
825     testAllocateAll!(1)(128, 4);
826     testAllocateAll!(4096)(128, 4);
827 
828     testAllocateAll!(1)(0, 3);
829     testAllocateAll!(1)(24, 3);
830     testAllocateAll!(3008)(100, 1);
831     testAllocateAll!(3008)(100, 3);
832 
833     testAllocateAll!(1)(0, 128);
834     testAllocateAll!(1)(128 * 1, 128);
835     testAllocateAll!(128 * 20)(13 * 128, 128);
836 }
837 
838 // Test totalAllocation
839 @safe unittest
840 {
841     BitmappedBlock!(8, 8, NullAllocator) h1;
842     assert(h1.totalAllocation(1) >= 8);
843     assert(h1.totalAllocation(64) >= 64);
844     assert(h1.totalAllocation(8 * 64) >= 8 * 64);
845     assert(h1.totalAllocation(8 * 63) >= 8 * 63);
846     assert(h1.totalAllocation(8 * 64 + 1) >= 8 * 65);
847 
848     BitmappedBlock!(64, 8, NullAllocator) h2;
849     assert(h2.totalAllocation(1) >= 64);
850     assert(h2.totalAllocation(64 * 64) >= 64 * 64);
851 
852     BitmappedBlock!(4096, 4096, NullAllocator) h3;
853     assert(h3.totalAllocation(1) >= 4096);
854     assert(h3.totalAllocation(64 * 4096) >= 64 * 4096);
855     assert(h3.totalAllocation(64 * 4096 + 1) >= 65 * 4096);
856 }
857 
858 // BitmappedBlockWithInternalPointers
859 /**
860 
861 A $(D BitmappedBlock) with additional structure for supporting $(D
862 resolveInternalPointer). To that end, $(D BitmappedBlockWithInternalPointers) adds a
863 bitmap (one bit per block) that marks object starts. The bitmap itself has
864 variable size and is allocated together with regular allocations.
865 
866 The time complexity of $(D resolveInternalPointer) is $(BIGOH k), where $(D k)
867 is the size of the object within which the internal pointer is looked up.
868 
869 */
870 struct BitmappedBlockWithInternalPointers(
871     size_t theBlockSize, uint theAlignment = platformAlignment,
872     ParentAllocator = NullAllocator)
873 {
874     import stdx.allocator.internal : Ternary;
875     @system unittest
876     {
877         import stdx.allocator.mallocator : AlignedMallocator;
878         auto m = cast(ubyte[])(AlignedMallocator.instance.alignedAllocate(1024 * 64,
879             theAlignment));
880         scope(exit) AlignedMallocator.instance.deallocate(m);
881         testAllocator!(() => BitmappedBlockWithInternalPointers(m));
882     }
883 
884     // state {
885     private BitmappedBlock!(theBlockSize, theAlignment, NullAllocator) _heap;
886     private BitVector _allocStart;
887     // }
888 
889     /**
890     Constructors accepting desired capacity or a preallocated buffer, similar
891     in semantics to those of $(D BitmappedBlock).
892     */
893     this(ubyte[] data)
894     {
895         _heap = BitmappedBlock!(theBlockSize, theAlignment, ParentAllocator)(data);
896     }
897 
898     /// Ditto
899     static if (!is(ParentAllocator == NullAllocator))
900     this(size_t capacity)
901     {
902         // Add room for the _allocStart vector
903         _heap = BitmappedBlock!(theBlockSize, theAlignment, ParentAllocator)
904             (capacity + capacity.divideRoundUp(64));
905     }
906 
907     // Makes sure there's enough room for _allocStart
908     private bool ensureRoomForAllocStart(size_t len)
909     {
910         if (_allocStart.length >= len) return true;
911         // Must ensure there's room
912         immutable oldLength = _allocStart.rep.length;
913         immutable bits = len.roundUpToMultipleOf(64);
914         void[] b = _allocStart.rep;
915         if (!_heap.reallocate(b, bits / 8)) return false;
916         assert(b.length * 8 == bits);
917         _allocStart = BitVector(cast(ulong[]) b);
918         assert(_allocStart.rep.length * 64 == bits);
919         _allocStart.rep[oldLength .. $] = ulong.max;
920         return true;
921     }
922 
923     /**
924     Allocator primitives.
925     */
926     alias alignment = theAlignment;
927 
928     /// Ditto
929     size_t goodAllocSize(size_t n)
930     {
931         return n.roundUpToMultipleOf(_heap.blockSize);
932     }
933 
934     /// Ditto
935     void[] allocate(size_t bytes)
936     {
937         auto r = _heap.allocate(bytes);
938         if (!r.ptr) return r;
939         immutable block = (r.ptr - _heap._payload.ptr) / _heap.blockSize;
940         immutable blocks =
941             (r.length + _heap.blockSize - 1) / _heap.blockSize;
942         if (!ensureRoomForAllocStart(block + blocks))
943         {
944             // Failed, free r and bailout
945             _heap.deallocate(r);
946             return null;
947         }
948         assert(block < _allocStart.length);
949         assert(block + blocks <= _allocStart.length);
950         // Mark the _allocStart bits
951         assert(blocks > 0);
952         _allocStart[block] = 1;
953         _allocStart[block + 1 .. block + blocks] = 0;
954         assert(block + blocks == _allocStart.length
955             || _allocStart[block + blocks] == 1);
956         return r;
957     }
958 
959     /// Ditto
960     void[] allocateAll()
961     {
962         auto r = _heap.allocateAll();
963         if (!r.ptr) return r;
964         // Carve space at the end for _allocStart
965         auto p = alignDownTo(r.ptr + r.length - 8, ulong.alignof);
966         r = r[0 .. p - r.ptr];
967         // Initialize _allocStart
968         _allocStart = BitVector(cast(ulong[]) p[0 .. 8]);
969         _allocStart[] = 0;
970         immutable block = (r.ptr - _heap._payload.ptr) / _heap.blockSize;
971         assert(block < _allocStart.length);
972         _allocStart[block] = 1;
973         return r;
974     }
975 
976     /// Ditto
977     bool expand(ref void[] b, size_t bytes)
978     {
979         if (!bytes) return true;
980         if (b is null) return false;
981         immutable oldBlocks =
982             (b.length + _heap.blockSize - 1) / _heap.blockSize;
983         assert(oldBlocks);
984         immutable newBlocks =
985             (b.length + bytes + _heap.blockSize - 1) / _heap.blockSize;
986         assert(newBlocks >= oldBlocks);
987         immutable block = (b.ptr - _heap._payload.ptr) / _heap.blockSize;
988         assert(_allocStart[block]);
989         if (!ensureRoomForAllocStart(block + newBlocks)
990                 || !_heap.expand(b, bytes))
991         {
992             return false;
993         }
994         // Zero only the expanded bits
995         _allocStart[block + oldBlocks .. block + newBlocks] = 0;
996         assert(_allocStart[block]);
997         return true;
998     }
999 
1000     /// Ditto
1001     bool deallocate(void[] b)
1002     {
1003         // No need to touch _allocStart here - except for the first bit, it's
1004         // meaningless in freed memory. The first bit is already 1.
1005         return _heap.deallocate(b);
1006         // TODO: one smart thing to do is reduce memory occupied by
1007         // _allocStart if we're freeing the rightmost block.
1008     }
1009 
1010     /// Ditto
1011     Ternary resolveInternalPointer(const void* p, ref void[] result)
1012     {
1013         if (p < _heap._payload.ptr
1014             || p >= _heap._payload.ptr + _heap._payload.length)
1015         {
1016             return Ternary.no;
1017         }
1018         // Find block start
1019         auto block = (p - _heap._payload.ptr) / _heap.blockSize;
1020         if (block >= _allocStart.length) return Ternary.no;
1021         // Within an allocation, must find the 1 just to the left of it
1022         auto i = _allocStart.find1Backward(block);
1023         if (i == i.max) return Ternary.no;
1024         auto j = _allocStart.find1(i + 1);
1025         result = _heap._payload.ptr[cast(size_t) (_heap.blockSize * i)
1026                                     .. cast(size_t) (_heap.blockSize * j)];
1027         return Ternary.yes;
1028     }
1029 
1030     /// Ditto
1031     Ternary empty()
1032     {
1033         return _heap.empty;
1034     }
1035 
1036     // Currently unused
1037     private void markAllAsUnused()
1038     {
1039         // Mark all deallocated memory with 1 so we minimize damage created by
1040         // false pointers. TODO: improve speed.
1041         foreach (i, ref e; _allocStart.rep)
1042         {
1043             // Set to 1 all bits in _allocStart[i] that were 0 in control, and
1044             // leave the others unchanged.
1045             // (0, 0) => 1; (0, 1) => 0; (1, 0) => 1; (1, 1) => 1
1046             e |= ~_heap._control.rep[i];
1047         }
1048         // Now zero all control bits
1049         _heap._control[] = 0;
1050         // EXCEPT for the _allocStart block itself
1051         markAsUsed(_allocStart.rep);
1052     }
1053 
1054     // Currently unused
1055     private bool markAsUsed(void[] b)
1056     {
1057         // Locate position
1058         immutable pos = b.ptr - _heap._payload.ptr;
1059         assert(pos % _heap.blockSize == 0);
1060         auto blockIdx = pos / _heap.blockSize;
1061         if (_heap._control[blockIdx]) return false;
1062         // Round up size to multiple of block size
1063         auto blocks = b.length.divideRoundUp(_heap.blockSize);
1064         _heap._control[blockIdx .. blockIdx + blocks] = 1;
1065         return true;
1066     }
1067 
1068     // Currently unused
1069     private void doneMarking()
1070     {
1071         // Nothing to do, what's free stays free.
1072     }
1073 }
1074 
1075 @system unittest
1076 {
1077     import stdx.allocator.internal : Ternary;
1078 
1079     auto h = BitmappedBlockWithInternalPointers!(4096)(new ubyte[4096 * 1024]);
1080     auto b = h.allocate(123);
1081     assert(b.length == 123);
1082 
1083     void[] p;
1084     Ternary r = h.resolveInternalPointer(b.ptr + 17, p);
1085     assert(p.ptr is b.ptr);
1086     assert(p.length >= b.length);
1087     b = h.allocate(4096);
1088 
1089     h.resolveInternalPointer(b.ptr, p);
1090     assert(p is b);
1091 
1092     h.resolveInternalPointer(b.ptr + 11, p);
1093     assert(p is b);
1094 
1095     void[] unchanged = p;
1096     h.resolveInternalPointer(b.ptr - 40_970, p);
1097     assert(p is unchanged);
1098 
1099     assert(h.expand(b, 1));
1100     assert(b.length == 4097);
1101     h.resolveInternalPointer(b.ptr + 4096, p);
1102     assert(p.ptr is b.ptr);
1103 }
1104 
1105 /**
1106 Returns the number of most significant ones before a zero can be found in $(D
1107 x). If $(D x) contains no zeros (i.e. is equal to $(D ulong.max)), returns 64.
1108 */
1109 private uint leadingOnes()(ulong x)
1110 {
1111     import mir.bitop: ctlz;
1112     x = ~x;
1113     if (x)
1114         return cast(uint) x.ctlz;
1115     return 64;
1116 }
1117 
1118 @system unittest
1119 {
1120     assert(leadingOnes(0) == 0);
1121     assert(leadingOnes(~0UL) == 64);
1122     assert(leadingOnes(0xF000_0000_0000_0000) == 4);
1123     assert(leadingOnes(0xE400_0000_0000_0000) == 3);
1124     assert(leadingOnes(0xC700_0200_0000_0000) == 2);
1125     assert(leadingOnes(0x8000_0030_0000_0000) == 1);
1126     assert(leadingOnes(0x2000_0000_0000_0000) == 0);
1127 }
1128 
1129 /**
1130 Finds a run of contiguous ones in $(D x) of length at least $(D n).
1131 */
1132 private uint findContigOnes()(ulong x, uint n)
1133 {
1134     while (n > 1)
1135     {
1136         immutable s = n >> 1;
1137         x &= x << s;
1138         n -= s;
1139     }
1140     return leadingOnes(~x);
1141 }
1142 
1143 @system unittest
1144 {
1145     assert(findContigOnes(0x0000_0000_0000_0300, 2) == 54);
1146 
1147     assert(findContigOnes(~0UL, 1) == 0);
1148     assert(findContigOnes(~0UL, 2) == 0);
1149     assert(findContigOnes(~0UL, 32) == 0);
1150     assert(findContigOnes(~0UL, 64) == 0);
1151     assert(findContigOnes(0UL, 1) == 64);
1152 
1153     assert(findContigOnes(0x4000_0000_0000_0000, 1) == 1);
1154     assert(findContigOnes(0x0000_0F00_0000_0000, 4) == 20);
1155 }
1156 
1157 /*
1158 Unconditionally sets the bits from lsb through msb in w to zero.
1159 */
1160 private void setBits()(ref ulong w, uint lsb, uint msb)
1161 {
1162     assert(lsb <= msb && msb < 64);
1163     const mask = (ulong.max << lsb) & (ulong.max >> (63 - msb));
1164     w |= mask;
1165 }
1166 
1167 @system unittest
1168 {
1169     ulong w;
1170     w = 0; setBits(w, 0, 63); assert(w == ulong.max);
1171     w = 0; setBits(w, 1, 63); assert(w == ulong.max - 1);
1172     w = 6; setBits(w, 0, 1); assert(w == 7);
1173     w = 6; setBits(w, 3, 3); assert(w == 14);
1174 }
1175 
1176 /* Are bits from lsb through msb in w zero? If so, make then 1
1177 and return the resulting w. Otherwise, just return 0.
1178 */
1179 private bool setBitsIfZero()(ref ulong w, uint lsb, uint msb)
1180 {
1181     assert(lsb <= msb && msb < 64);
1182     const mask = (ulong.max << lsb) & (ulong.max >> (63 - msb));
1183     if (w & mask) return false;
1184     w |= mask;
1185     return true;
1186 }
1187 
1188 // Assigns bits in w from lsb through msb to zero.
1189 private void resetBits()(ref ulong w, uint lsb, uint msb)
1190 {
1191     assert(lsb <= msb && msb < 64);
1192     const mask = (ulong.max << lsb) & (ulong.max >> (63 - msb));
1193     w &= ~mask;
1194 }
1195 
1196 /*
1197 Bit disposition is MSB=0 (leftmost, big endian).
1198 */
1199 struct BitVector
1200 {
1201     ulong[] _rep;
1202 
1203 @safe pure nothrow @nogc:
1204 
1205     auto rep() { return _rep; }
1206 
1207     this(ulong[] data) { _rep = data; }
1208 
1209     void opSliceAssign(bool b) { _rep[] = b ? ulong.max : 0; }
1210 
1211     void opSliceAssign(bool b, ulong x, ulong y)
1212     {
1213         assert(x <= y && y <= _rep.length * 64);
1214         if (x == y) return;
1215         --y;
1216         assert(x / 64 <= size_t.max);
1217         immutable i1 = cast(size_t) (x / 64);
1218         immutable uint b1 = 63 - x % 64;
1219         assert(y / 64 <= size_t.max);
1220         immutable i2 = cast(size_t) (y / 64);
1221         immutable uint b2 = 63 - y % 64;
1222         assert(i1 <= i2 && i2 < _rep.length);
1223         if (i1 == i2)
1224         {
1225             // Inside the same word
1226             assert(b1 >= b2);
1227             if (b) setBits(_rep[i1], b2, b1);
1228             else resetBits(_rep[i1], b2, b1);
1229         }
1230         else
1231         {
1232             // Spans multiple words
1233             assert(i1 < i2);
1234             if (b) setBits(_rep[i1], 0, b1);
1235             else resetBits(_rep[i1], 0, b1);
1236             _rep[i1 + 1 .. i2] = b;
1237             if (b) setBits(_rep[i2], b2, 63);
1238             else resetBits(_rep[i2], b2, 63);
1239         }
1240     }
1241 
1242     bool opIndex(ulong x)
1243     {
1244         assert(x < length);
1245         return (_rep[cast(size_t) (x / 64)]
1246             & (0x8000_0000_0000_0000UL >> (x % 64))) != 0;
1247     }
1248 
1249     void opIndexAssign(bool b, ulong x)
1250     {
1251         assert(x / 64 <= size_t.max);
1252         immutable i = cast(size_t) (x / 64);
1253         immutable j = 0x8000_0000_0000_0000UL >> (x % 64);
1254         if (b) _rep[i] |= j;
1255         else _rep[i] &= ~j;
1256     }
1257 
1258     ulong length() const
1259     {
1260         return _rep.length * 64;
1261     }
1262 
1263     /* Returns the index of the first 1 to the right of i (including i itself),
1264     or length if not found.
1265     */
1266     ulong find1(ulong i)
1267     {
1268         assert(i < length);
1269         assert(i / 64 <= size_t.max);
1270         auto w = cast(size_t) (i / 64);
1271         immutable b = i % 64; // 0 through 63, 0 when i == 0
1272         immutable mask = ulong.max >> b;
1273         if (auto current = _rep[w] & mask)
1274         {
1275             // Great, found
1276             return w * 64 + leadingOnes(~current);
1277         }
1278         // The current word doesn't have the solution, find the leftmost 1
1279         // going to the right.
1280         for (++w; w < _rep.length; ++w)
1281         {
1282             if (auto current = _rep[w])
1283             {
1284                 return w * 64 + leadingOnes(~current);
1285             }
1286         }
1287         return length;
1288     }
1289 
1290     /* Returns the index of the first 1 to the left of i (including i itself),
1291     or ulong.max if not found.
1292     */
1293     ulong find1Backward(ulong i)
1294     {
1295         assert(i < length);
1296         auto w = cast(size_t) (i / 64);
1297         immutable b = 63 - (i % 64); // 0 through 63, 63 when i == 0
1298         immutable mask = ~((1UL << b) - 1);
1299         assert(mask != 0);
1300         // First, let's see if the current word has a bit larger than ours.
1301         if (auto currentWord = _rep[w] & mask)
1302         {
1303             // Great, this word contains the result.
1304             return w * 64 + 63 - currentWord.trailingZeros;
1305         }
1306         // The current word doesn't have the solution, find the rightmost 1
1307         // going to the left.
1308         while (w >= 1)
1309         {
1310             --w;
1311             if (auto currentWord = _rep[w])
1312                 return w * 64 + (63 - currentWord.trailingZeros);
1313         }
1314         return ulong.max;
1315     }
1316 
1317     /// Are all bits zero?
1318     bool allAre0() const
1319     {
1320         foreach (w; _rep) if (w) return false;
1321         return true;
1322     }
1323 
1324     /// Are all bits one?
1325     bool allAre1() const
1326     {
1327         foreach (w; _rep) if (w != ulong.max) return false;
1328         return true;
1329     }
1330 
1331     ulong findZeros(immutable size_t howMany, ulong start)
1332     {
1333         assert(start < length);
1334         assert(howMany > 64);
1335         auto i = cast(size_t) (start / 64);
1336         while (_rep[i] & 1)
1337         {
1338             // No trailing zeros in this word, try the next one
1339             if (++i == _rep.length) return ulong.max;
1340             start = i * 64;
1341         }
1342         // Adjust start to have only trailing zeros after it
1343         auto prefixLength = 64;
1344         while (_rep[i] & (ulong.max >> (64 - prefixLength)))
1345         {
1346             assert(prefixLength > 0);
1347             --prefixLength;
1348             ++start;
1349         }
1350 
1351         assert(howMany > prefixLength);
1352         auto needed = howMany - prefixLength;
1353         for (++i; needed >= 64; needed -= 64, ++i)
1354         {
1355             if (i >= _rep.length) return ulong.max;
1356             if (_rep[i] != 0) return findZeros(howMany, i * 64);
1357         }
1358         // Leftover < 64 bits
1359         assert(needed < 64);
1360         if (!needed) return start;
1361         if (i >= _rep.length) return ulong.max;
1362         if (leadingOnes(~_rep[i]) >= needed) return start;
1363         return findZeros(howMany, i * 64);
1364     }
1365 }
1366 
1367 @system unittest
1368 {
1369     auto v = BitVector(new ulong[10]);
1370     assert(v.length == 640);
1371 
1372     v[] = 0;
1373     v[53] = 1;
1374     assert(v[52] == 0);
1375     assert(v[53] == 1);
1376     assert(v[54] == 0);
1377 
1378     v[] = 0;
1379     v[53 .. 55] = 1;
1380     assert(v[52] == 0);
1381     assert(v[53] == 1);
1382     assert(v[54] == 1);
1383     assert(v[55] == 0);
1384 
1385     v[] = 0;
1386     v[2 .. 65] = 1;
1387     assert(v.rep[0] == 0x3FFF_FFFF_FFFF_FFFF);
1388     assert(v.rep[1] == 0x8000_0000_0000_0000);
1389     assert(v.rep[2] == 0);
1390 
1391     v[] = 0;
1392     assert(v.find1Backward(0) == ulong.max);
1393     assert(v.find1Backward(43) == ulong.max);
1394     assert(v.find1Backward(83) == ulong.max);
1395 
1396     v[0] = 1;
1397     assert(v.find1Backward(0) == 0);
1398     assert(v.find1Backward(43) == 0);
1399     import std.conv : text;
1400     assert(v.find1Backward(83) == 0, text(v.find1Backward(83)));
1401 
1402     v[0] = 0;
1403     v[101] = 1;
1404     assert(v.find1Backward(0) == ulong.max);
1405     assert(v.find1Backward(43) == ulong.max);
1406     assert(v.find1Backward(83) == ulong.max);
1407     assert(v.find1Backward(100) == ulong.max);
1408     assert(v.find1Backward(101) == 101);
1409     assert(v.find1Backward(553) == 101);
1410 
1411     v[0 .. v.length] = 0;
1412     v[v.length .. v.length] = 0;
1413     v[0 .. 0] = 0;
1414 
1415     v[] = 0;
1416     assert(v.find1(0) == v.length);
1417     v[139] = 1;
1418     assert(v.find1(0) == 139);
1419     assert(v.find1(100) == 139);
1420     assert(v.find1(138) == 139);
1421     assert(v.find1(139) == 139);
1422     assert(v.find1(140) == v.length);
1423 
1424     v[] = 0;
1425     assert(v.findZeros(100, 0) == 0);
1426     foreach (i; 0 .. 500)
1427         assert(v.findZeros(100, i) == i, text(v.findZeros(100, i), " != ", i));
1428     assert(v.findZeros(540, 99) == 99);
1429     assert(v.findZeros(99, 540) == 540);
1430     assert(v.findZeros(540, 100) == 100);
1431     assert(v.findZeros(640, 0) == 0);
1432     assert(v.findZeros(641, 1) == ulong.max);
1433     assert(v.findZeros(641, 100) == ulong.max);
1434 }