9 #include <unique_ptr.h>
13 class Simple_tree_submap_ops
17 /* The mapping database.
19 * This implementation encodes mapping trees in very compact arrays of
20 * fixed sizes, prefixed by a tree header (Mapping_tree). Array
21 * sizes can vary from 4 mappings to 4<<15 mappings. For each size,
22 * we set up a slab allocator. To grow or shrink the size of an
23 * array, we have to allocate a larger or smaller tree from the
24 * corresponding allocator and then copy the array elements.
26 * The array elements (Mapping) contain a tree depth element. This
27 * depth and the relative position in the array is all information we
28 * need to derive tree structure information. Here is an example:
32 * number value comment
33 * --------------------------
35 * 1 1 child of element #0 with depth 0
36 * 2 2 child of element #1 with depth 1
37 * 3 2 child of element #1 with depth 1
38 * 4 3 child of element #3 with depth 2
39 * 5 2 child of element #1 with depth 1
40 * 6 3 child of element #5 with depth 2
41 * 7 1 child of element #0 with depth 0
43 * This array is a pre-order encoding of the following tree:
53 * The mapping database (Mapdb) is organized in a hierarchy of
54 * frame-number-keyed maps of Mapping_trees (Treemap). The top-level
55 * Treemap contains mapping trees for superpages. These mapping trees
56 * may contain references to Treemaps for subpages. (Original credits
57 * for this idea: Christan Szmajda.)
60 * -------------------------------
61 * | | | | | | | | | | | | | | | | array of ptr to 4M Mapping_tree's
62 * ---|---------------------------
68 * | | 4M Mapping *or* ptr to nested Treemap
70 * | ----------------| Treemap
71 * | | v array of ptr to 4K Mapping_tree's
72 * --------------- -------------------------------
73 * | | | | | | | | | | | | | | | |
74 * ---|---------------------------
86 * IDEAS for enhancing this implementation:
88 * We often have to find a tree header corresponding to a mapping.
89 * Currently, we do this by iterating backwards over the array
90 * containing the mappings until we find the Sigma0 mapping, from
91 * whose address we can compute the address of the tree header. If
92 * this becomes a problem, we could add one more byte to the mappings
93 * with a hint (negative array offset) where to find the sigma0
94 * mapping. (If the hint value overflows, just iterate using the hint
95 * value of the mapping we find with the first hint value.) Another
96 * idea (from Adam) would be to just look up the tree header by using
97 * the physical address from the page-table lookup, but we would need
98 * to change the interface of the mapping database for that (pass in
99 * the physical address at all times), or we would have to include the
100 * physical address (or just the address of the tree header) in the
101 * Mapdb-user-visible Mapping (which could be different from the
102 * internal tree representation). (XXX: Implementing one of these
103 * ideas is probably worthwile doing!)
105 * Instead of copying whole trees around when they grow or shrink a
106 * lot, or copying parts of trees when inserting an element, we could
107 * give up the array representation and add a "next" pointer to the
108 * elements -- that is, keep the tree of mappings in a
109 * pre-order-encoded singly-linked list (credits to: Christan Szmajda
110 * and Adam Wiggins). 24 bits would probably be enough to encode that
111 * pointer. Disadvantages: Mapping entries would be larger, and the
112 * cache-friendly space-locality of tree entries would be lost.
116 INTERFACE [!big_endian]:
119 // FIXME: do we need this depending on endianess ?
125 Size_id_max = 9 // can be up to 15 (4 bits)
128 unsigned _count: 16; ///< Number of live entries in this tree.
129 unsigned _size_id: 4; ///< Tree size -- see number_of_entries().
130 unsigned _empty_count: 11; ///< Number of dead entries in this tree.
131 // XXX currently never read, except in
134 unsigned _unused: 1; // (make this 32 bits to avoid a compiler bug)
136 Mapping _mappings[0];
141 INTERFACE [big_endian]:
144 // FIXME: do we need this depending on endianess ?
150 Size_id_max = 9 // can be up to 15 (4 bits)
153 unsigned _unused: 1; // (make this 32 bits to avoid a compiler bug)
154 unsigned _size_id: 4; ///< Tree size -- see number_of_entries().
155 unsigned _empty_count: 11; ///< Number of dead entries in this tree.
156 // XXX currently never read, except in
159 unsigned _count: 16; ///< Number of live entries in this tree.
160 Mapping _mappings[0];
168 /** Array elements for holding frame-specific data. */
173 cxx::unique_ptr<Mapping_tree> tree;
176 }; // struct Physframe
179 //---------------------------------------------------------------------------
187 #include "kmem_slab.h"
188 #include "ram_quota.h"
190 #include "std_macros.h"
197 Simple_tree_submap_ops::mem_size(Treemap const *) const
202 Simple_tree_submap_ops::grant(Treemap *, Space *, Page_number) const
207 Simple_tree_submap_ops::owner(Treemap const *) const
212 Simple_tree_submap_ops::is_partial(Treemap const *, Page_number, Page_number) const
217 Simple_tree_submap_ops::del(Treemap *) const
222 Simple_tree_submap_ops::flush(Treemap *, Page_number, Page_number) const
226 // Mapping-tree allocators
229 enum Mapping_tree_size
232 Size_id_max = 9 // can be up to 15 (4 bits)
236 Mapping_tree::Size_id
237 Mapping_tree::shrink()
239 unsigned sid = _size_id - 1;
240 while (sid > 0 && ((static_cast<unsigned>(_count) << 2) < ((unsigned)Size_factor << sid)))
247 Mapping_tree::Size_id
248 Mapping_tree::bigger()
249 { return Mapping_tree::Size_id(_size_id + 1); }
251 template<int SIZE_ID>
252 struct Mapping_tree_allocator
257 Elem_size = (Size_factor << SIZE_ID) * sizeof (Mapping)
258 + sizeof(Mapping_tree)
261 Mapping_tree_allocator(Kmem_slab **array)
262 : a(Elem_size, Mapping::Alignment, "Mapping_tree")
263 { array[SIZE_ID] = &a; }
266 template<int SIZE_ID_MAX>
267 struct Mapping_tree_allocators;
270 struct Mapping_tree_allocators<0>
272 Mapping_tree_allocator<0> a;
273 Mapping_tree_allocators(Kmem_slab **array) : a(array) {}
276 template<int SIZE_ID_MAX>
277 struct Mapping_tree_allocators
280 Mapping_tree_allocator<SIZE_ID_MAX> a;
281 Mapping_tree_allocators<SIZE_ID_MAX - 1> n;
283 Mapping_tree_allocators(Kmem_slab **array) : a(array), n(array) {}
286 static Kmem_slab *_allocators[Size_id_max + 1];
287 static Mapping_tree_allocators<Size_id_max> _mapping_tree_allocators(_allocators);
291 allocator_for_treesize(int size)
293 return _allocators[size];
297 // class Mapping_tree
301 inline NEEDS["space.h"]
303 Mapping_tree::quota(Space *space)
305 return space->ram_quota();
310 Mapping_tree::operator new (size_t, Mapping_tree::Size_id size_id) throw()
311 { return allocator_for_treesize(size_id)->alloc(); }
315 Mapping_tree::operator delete (void* block)
320 // Try to guess right allocator object -- XXX unportable!
321 Mapping_tree* t = static_cast<Mapping_tree*>(block);
323 t->check_integrity();
325 allocator_for_treesize(t->_size_id)->free(block);
328 PUBLIC //inline NEEDS[Mapping_depth, Mapping_tree::last]
329 Mapping_tree::Mapping_tree(Size_id size_id, Page_number page,
332 _count = 1; // 1 valid mapping
333 _size_id = size_id; // size is equal to Size_factor << 0
335 _empty_count = 0; // no gaps in tree representation
338 _mappings[0].set_depth(Mapping::Depth_root);
339 _mappings[0].set_page(page);
340 _mappings[0].set_space(owner);
342 _mappings[1].set_depth(Mapping::Depth_end);
344 // We also always set the end tag on last entry so that we can
345 // check whether it has been overwritten.
346 last()->set_depth(Mapping::Depth_end);
350 Mapping_tree::~Mapping_tree()
352 // special case for copied mapping trees
353 for (Mapping *m = _mappings; m < end() && !m->is_end_tag(); ++m)
355 if (!m->submap() && !m->unused())
356 quota(m->space())->free(sizeof(Mapping));
360 PUBLIC //inline NEEDS[Mapping_depth, Mapping_tree::last]
361 Mapping_tree::Mapping_tree(Size_id size_id, Mapping_tree* from_tree)
364 last()->set_depth(Mapping::Depth_end);
366 copy_compact_tree(this, from_tree);
369 // public routines with inline implementations
370 PUBLIC inline NEEDS[Mapping_tree_size]
372 Mapping_tree::number_of_entries() const
374 return Size_factor << (unsigned long)_size_id;
379 Mapping_tree::mappings()
381 return & _mappings[0];
386 Mapping_tree::is_empty() const
391 PUBLIC inline NEEDS[Mapping_tree::mappings, Mapping_tree::number_of_entries]
395 return mappings() + number_of_entries();
398 PUBLIC inline NEEDS[Mapping_tree::end]
405 // A utility function to find the tree header belonging to a mapping.
407 /** Our Mapping_tree.
408 @return the Mapping_tree we are in.
412 Mapping_tree::head_of(Mapping *m)
414 while (m->depth() > Mapping::Depth_root)
416 // jump in bigger steps if this is not a free mapping
417 if (m->depth() <= Mapping::Depth_max)
426 return reinterpret_cast<Mapping_tree *>
427 (reinterpret_cast<char *>(m) - sizeof(Mapping_tree));
428 // We'd rather like to use offsetof as follows, but it's unsupported
429 // for non-POD classes. So we instead rely on GCC's zero-length
430 // array magic (sizeof(Mapping_tree) is the size of the header
431 // without the array).
432 // return reinterpret_cast<Mapping_tree *>
433 // (reinterpret_cast<char *>(m) - offsetof(Mapping_tree, _mappings));
436 /** Next mapping in the mapping tree.
437 @param t head of mapping tree, if available
438 @return the next mapping in the mapping tree. If the mapping has
439 children, it is the first child. Otherwise, if the mapping has a
440 sibling, it's the next sibling. Otherwise, if the mapping is the
441 last sibling or only child, it's the mapping's parent.
445 Mapping_tree::next(Mapping *m)
447 for (m++; m < end() && ! m->is_end_tag(); m++)
454 /** Next child mapping of a given parent mapping. This
455 function traverses the mapping tree like next(); however, it
456 stops (and returns 0) if the next mapping is outside the subtree
457 starting with parent.
458 @param parent Parent mapping
459 @return the next child mapping of a given parent mapping
461 PUBLIC inline NEEDS[Mapping_tree::next]
463 Mapping_tree::next_child(Mapping *parent, Mapping *current_child)
465 // Find the next valid entry in the tree structure.
466 Mapping *m = next(current_child);
468 // If we didn't find an entry, or if the entry cannot be a child of
469 // "parent", return 0
470 if (m == 0 || m->depth() <= parent->depth())
476 // This function copies the elements of mapping tree src to mapping
477 // tree dst, ignoring empty elements (that is, compressing the
478 // source tree. In-place compression is supported.
481 Mapping_tree::copy_compact_tree(Mapping_tree *dst, Mapping_tree *src)
483 unsigned src_count = src->_count; // Store in local variable before
484 // it can get overwritten
486 // Special case: cannot in-place compact a full tree
487 if (src == dst && src->number_of_entries() == src_count)
490 // Now we can assume the resulting tree will not be full.
491 assert (src_count < dst->number_of_entries());
495 dst->_empty_count = 0;
498 Mapping *d = dst->mappings();
500 for (Mapping *s = src->mappings();
501 s && !s->is_end_tag();
508 assert (dst->_count == src_count); // Same number of entries
509 assert (d < dst->end());
510 // Room for one more entry (the Mapping::Depth_end entry)
512 d->set_depth(Mapping::Depth_end);
513 dst->last()->set_depth(Mapping::Depth_end);
514 } // copy_compact_tree()
516 // Don't inline this function -- it eats a lot of stack space!
517 PUBLIC // inline NEEDS[Mapping::data, Mapping::unused, Mapping::is_end_tag,
518 // Mapping_tree::end, Mapping_tree::number_of_entries]
520 Mapping_tree::check_integrity(Space *owner = (Space*)-1)
525 assert (// Either each entry is used
526 number_of_entries() == static_cast<unsigned>(_count) + _empty_count
527 // Or the last used entry is end tag
528 || mappings()[_count + _empty_count].is_end_tag());
530 Mapping* m = mappings();
532 if (!(m->is_end_tag() // When the tree was copied to a new one
533 || (! m->unused() // The first entry is never unused.
535 && (owner == (Space *)-1 || m->space() == owner))))
537 printf("mapdb corrupted: owner=%p\n"
538 " m=%p (end: %s depth: %d space: %p page: %lx)\n",
539 owner, m, m->is_end_tag()?"yes":"no", m->depth(), m->space(),
548 while (m < end() && ! m->is_end_tag())
558 assert_kdb (_count == used);
559 assert_kdb (_empty_count == dead);
565 * Use this function to reset a the tree to empty.
567 * In the case where a tree was copied to a new one you have to use
568 * this function to prevent the node iteration in the destructor.
572 Mapping_tree::reset()
576 _mappings[0].set_depth(Mapping::Depth_end);
579 PUBLIC inline NEEDS[Mapping_tree::next, <cassert>]
581 Mapping_tree::find_submap(Mapping* parent)
583 assert (! parent->submap());
585 // We need just one step to find a possible submap, because they are
586 // always a parent's first child.
587 Mapping* m = next(parent);
589 if (m && m->submap())
595 PUBLIC inline NEEDS["ram_quota.h"]
597 Mapping_tree::allocate(Ram_quota *payer, Mapping *parent,
598 bool insert_submap = false)
600 Auto_quota<Ram_quota> q(payer, sizeof(Mapping));
601 if (EXPECT_FALSE(!q))
604 // After locating the right place for the new entry, it will be
605 // stored there (if this place is empty) or the following entries
606 // moved by one entry.
608 // We cannot continue if the last array entry is not free. This
609 // only happens if an earlier call to free() with this mapping tree
610 // couldn't allocate a bigger array. In this case, signal an
611 // out-of-memory condition.
612 if (EXPECT_FALSE (! last()->unused()))
615 // If the parent mapping already has the maximum depth, we cannot
617 if (EXPECT_FALSE (parent->depth() == Mapping::Depth_max))
620 //allocation is done, so...
623 Mapping *insert = 0, *free = 0;
624 // - Find an insertion point for the new entry. Acceptable insertion
625 // points are either before a sibling (same depth) or at the end
626 // of the subtree; for submap insertions, it's always before
627 // the first sibling. "insert" keeps track of the last
628 // acceptable insertion point.
629 // - Find a free entry in the array encoding the subtree ("free").
630 // There might be none; in this case, we stop at the end of the
633 for (Mapping* m = parent + 1; m < end(); m++)
635 // End of subtree? If we reach this point, this is our insert spot.
637 || m->depth() <= (insert_submap
638 ? parent->depth() + 1
647 else if (free // Only look for insert spots after free
648 && m->depth() <= parent->depth() + 1)
656 assert (free == 0 || (free->unused() && free < insert));
658 // We now update "free" to point to a free spot that is acceptable
663 // "free" will be the latest free spot before the "insert" spot.
664 // If there is anything between "free" and "insert", move it
665 // upward to make space just before insert.
666 while (free + 1 != insert)
673 // Tree-header maintenance
674 _empty_count -= 1; // Allocated dead entry
679 // There was no free spot in the subtree. Move everything
680 // downward until we have free space. This is guaranteed to
681 // succeed, because we ensured earlier that the last entry of
682 // the array is free.
684 free = insert; // This will be the free spot
687 while (! insert->unused())
690 // Tree maintenance: handle end tag, empty count
691 if (insert->is_end_tag())
693 // Need to move end tag.
694 if (insert + 1 < end())
695 insert++; // Arrange for copying end tag as well
699 _empty_count -= 1; // Allocated dead entry
703 while (insert > free)
705 *insert = *(insert - 1);
710 _count += 1; // Adding an alive entry
712 // found a place to insert new child (free).
713 free->set_depth(insert_submap ? (unsigned)Mapping::Depth_submap
714 : parent->depth() + 1);
719 PUBLIC inline NEEDS["ram_quota.h"]
721 Mapping_tree::free_mapping(Ram_quota *q, Mapping *m)
723 assert (!m->unused() && !m->is_end_tag());
724 q->free(sizeof(Mapping));
728 PUBLIC template< typename SUBMAP_OPS >
730 Mapping_tree::flush(Mapping *parent, bool me_too,
731 Page_number offs_begin, Page_number offs_end,
732 SUBMAP_OPS const &submap_ops = SUBMAP_OPS())
734 assert (! parent->unused());
736 // This is easy to do: We just have to iterate over the array
737 // encoding the tree.
738 Mapping *start_of_deletions = parent;
739 unsigned p_depth = parent->depth();
740 unsigned deleted = 0;
742 unsigned empty_elems_passed = 0;
747 free_mapping(quota(parent->space()), parent);
752 start_of_deletions++;
754 unsigned m_depth = p_depth;
756 for (Mapping* m = parent + 1;
757 m < end() && ! m->is_end_tag();
760 if (unsigned (m->depth()) <= p_depth)
762 // Found another data element -- stop deleting. Since we
763 // created holes in the tree representation, account for it.
765 _empty_count += deleted;
773 empty_elems_passed++;
779 if (Treemap* submap = m->submap())
781 space = submap_ops.owner(submap);
783 // Check for immediate child. Note: It's a bad idea to
784 // call m->parent() because that would iterate backwards
785 // over already-deleted entries.
786 && m_depth == p_depth
787 && submap_ops.is_partial(submap, offs_begin, offs_end))
789 submap_ops.flush(submap, offs_begin, offs_end);
791 start_of_deletions++;
795 submap_ops.del(submap);
797 else // Found a real mapping
800 m_depth = m->depth();
803 // Delete the element.
804 free_mapping(quota(space), m);
809 // We deleted stuff at the end of the array -- move end tag
810 if (start_of_deletions < end())
812 start_of_deletions->set_depth(Mapping::Depth_end);
815 // also, reduce number of free entries
816 _empty_count -= empty_elems_passed;
821 PUBLIC template< typename SUBMAP_OPS >
823 Mapping_tree::grant(Mapping* m, Space *new_space, Page_number va,
824 SUBMAP_OPS const &submap_ops = SUBMAP_OPS())
826 unsigned long _quota = sizeof(Mapping);
827 Treemap* submap = find_submap(m);
829 _quota += submap_ops.mem_size(submap);
831 if (!quota(new_space)->alloc(_quota))
834 Space *old_space = m->space();
836 m->set_space(new_space);
840 submap_ops.grant(submap, new_space, va);
842 quota(old_space)->free(_quota);
848 Mapping_tree::lookup(Space *space, Page_number page)
853 for (m = mappings(); m; m = next(m))
855 assert (!m->submap());
856 if (m->space() == space && m->page() == page)
866 Base_mappable::lookup(Space *space, Page_number va)
868 // get and lock the tree.
869 if (EXPECT_FALSE(lock.lock() == Lock::Invalid))
871 Mapping_tree *t = tree.get();
873 if (Mapping *m = t->lookup(space, va))
882 Base_mappable::insert(Mapping* parent, Space *space, Page_number page)
884 Mapping_tree* t = tree.get();
887 assert (parent == 0);
888 Auto_quota<Ram_quota> q(Mapping_tree::quota(space), sizeof(Mapping));
889 if (EXPECT_FALSE(!q))
892 Mapping_tree::Size_id min_size = Mapping_tree::Size_id_min;
893 cxx::unique_ptr<Mapping_tree> new_tree(new (min_size) Mapping_tree (min_size, page, space));
895 if (EXPECT_FALSE(!new_tree))
898 tree = cxx::move(new_tree);
900 return tree->mappings();
903 Mapping *free = t->allocate(Mapping_tree::quota(space), parent, false);
905 if (EXPECT_FALSE(!free))
908 free->set_space(space);
909 free->set_page(page);
911 t->check_integrity();
918 Base_mappable::pack()
920 // Before we unlock the tree, we need to make sure that there is
921 // room for at least one new mapping. In particular, this means
922 // that the last entry of the array encoding the tree must be free.
924 // (1) When we use up less than a quarter of all entries of the
925 // array encoding the tree, copy to a smaller tree. Otherwise, (2)
926 // if the last entry is free, do nothing. Otherwise, (3) if less
927 // than 3/4 of the entries are used, compress the tree. Otherwise,
928 // (4) copy to a larger tree.
930 Mapping_tree *t = tree.get();
931 bool maybe_out_of_memory = false;
933 do // (this is not actually a loop, just a block we can "break" out of)
935 // (1) Do we need to allocate a smaller tree?
936 if (t->_size_id > Mapping_tree::Size_id_min // must not be smallest size
937 && (static_cast<unsigned>(t->_count) << 2) < t->number_of_entries())
939 Mapping_tree::Size_id sid = t->Mapping_tree::shrink();
940 cxx::unique_ptr<Mapping_tree> new_t(new (sid) Mapping_tree(sid, t));
944 // ivalidate node 0 because we must not free the quota for it
948 // Register new tree.
949 tree = cxx::move(new_t);
955 // (2) Is last entry is free?
956 if (t->last()->unused())
957 break; // OK, last entry is free.
959 // Last entry is not free -- either compress current array
960 // (i.e., move free entries to end of array), or allocate bigger
963 // (3) Should we compress the tree?
964 // We also try to compress if we cannot allocate a bigger
965 // tree because there is no bigger tree size.
966 if (t->_count < (t->number_of_entries() >> 2)
967 + (t->number_of_entries() >> 1)
968 || t->_size_id == Size_id_max) // cannot enlarge?
970 if (t->_size_id == Size_id_max)
971 maybe_out_of_memory = true;
973 Mapping_tree::copy_compact_tree(t, t); // in-place compression
978 // (4) OK, allocate a bigger array.
980 Mapping_tree::Size_id sid = t->bigger();
981 cxx::unique_ptr<Mapping_tree> new_t(new (sid) Mapping_tree(sid, t));
985 // ivalidate node 0 because we must not free the quota for it
989 // Register new tree.
990 tree = cxx::move(new_t);
994 // out of memory -- just do tree compression and hope that helps.
995 maybe_out_of_memory = true;
997 Mapping_tree::copy_compact_tree(t, t); // in-place compression
1002 // The last entry of the tree should now be free -- exept if we're
1004 assert (t->last()->unused() || maybe_out_of_memory);