9 #include <unique_ptr.h>
13 class Simple_tree_submap_ops
17 /* The mapping database.
19 * This implementation encodes mapping trees in very compact arrays of
20 * fixed sizes, prefixed by a tree header (Mapping_tree). Array
21 * sizes can vary from 4 mappings to 4<<15 mappings. For each size,
22 * we set up a slab allocator. To grow or shrink the size of an
23 * array, we have to allocate a larger or smaller tree from the
24 * corresponding allocator and then copy the array elements.
26 * The array elements (Mapping) contain a tree depth element. This
27 * depth and the relative position in the array is all information we
28 * need to derive tree structure information. Here is an example:
32 * number value comment
33 * --------------------------
35 * 1 1 child of element #0 with depth 0
36 * 2 2 child of element #1 with depth 1
37 * 3 2 child of element #1 with depth 1
38 * 4 3 child of element #3 with depth 2
39 * 5 2 child of element #1 with depth 1
40 * 6 3 child of element #5 with depth 2
41 * 7 1 child of element #0 with depth 0
43 * This array is a pre-order encoding of the following tree:
53 * The mapping database (Mapdb) is organized in a hierarchy of
54 * frame-number-keyed maps of Mapping_trees (Treemap). The top-level
55 * Treemap contains mapping trees for superpages. These mapping trees
56 * may contain references to Treemaps for subpages. (Original credits
57 * for this idea: Christan Szmajda.)
60 * -------------------------------
61 * | | | | | | | | | | | | | | | | array of ptr to 4M Mapping_tree's
62 * ---|---------------------------
68 * | | 4M Mapping *or* ptr to nested Treemap
70 * | ----------------| Treemap
71 * | | v array of ptr to 4K Mapping_tree's
72 * --------------- -------------------------------
73 * | | | | | | | | | | | | | | | |
74 * ---|---------------------------
86 * IDEAS for enhancing this implementation:
88 * We often have to find a tree header corresponding to a mapping.
89 * Currently, we do this by iterating backwards over the array
90 * containing the mappings until we find the Sigma0 mapping, from
91 * whose address we can compute the address of the tree header. If
92 * this becomes a problem, we could add one more byte to the mappings
93 * with a hint (negative array offset) where to find the sigma0
94 * mapping. (If the hint value overflows, just iterate using the hint
95 * value of the mapping we find with the first hint value.) Another
96 * idea (from Adam) would be to just look up the tree header by using
97 * the physical address from the page-table lookup, but we would need
98 * to change the interface of the mapping database for that (pass in
99 * the physical address at all times), or we would have to include the
100 * physical address (or just the address of the tree header) in the
101 * Mapdb-user-visible Mapping (which could be different from the
102 * internal tree representation). (XXX: Implementing one of these
103 * ideas is probably worthwile doing!)
105 * Instead of copying whole trees around when they grow or shrink a
106 * lot, or copying parts of trees when inserting an element, we could
107 * give up the array representation and add a "next" pointer to the
108 * elements -- that is, keep the tree of mappings in a
109 * pre-order-encoded singly-linked list (credits to: Christan Szmajda
110 * and Adam Wiggins). 24 bits would probably be enough to encode that
111 * pointer. Disadvantages: Mapping entries would be larger, and the
112 * cache-friendly space-locality of tree entries would be lost.
116 INTERFACE [!big_endian]:
119 // FIXME: do we need this depending on endianess ?
122 typedef Mapping::Page Page;
123 typedef Mapping::Pfn Pfn;
124 typedef Mapping::Pcnt Pcnt;
129 Size_id_max = 9 // can be up to 15 (4 bits)
132 unsigned _count: 16; ///< Number of live entries in this tree.
133 unsigned _size_id: 4; ///< Tree size -- see number_of_entries().
134 unsigned _empty_count: 11; ///< Number of dead entries in this tree.
135 // XXX currently never read, except in
138 unsigned _unused: 1; // (make this 32 bits to avoid a compiler bug)
140 Mapping _mappings[0];
145 INTERFACE [big_endian]:
148 // FIXME: do we need this depending on endianess ?
151 typedef Mapping::Page Page;
152 typedef Mapping::Pfn Pfn;
153 typedef Mapping::Pcnt Pcnt;
158 Size_id_max = 9 // can be up to 15 (4 bits)
161 unsigned _unused: 1; // (make this 32 bits to avoid a compiler bug)
162 unsigned _size_id: 4; ///< Tree size -- see number_of_entries().
163 unsigned _empty_count: 11; ///< Number of dead entries in this tree.
164 // XXX currently never read, except in
167 unsigned _count: 16; ///< Number of live entries in this tree.
168 Mapping _mappings[0];
176 /** Array elements for holding frame-specific data. */
180 typedef Mapping_tree::Page Page;
182 cxx::unique_ptr<Mapping_tree> tree;
185 }; // struct Physframe
188 //---------------------------------------------------------------------------
196 #include "kmem_slab.h"
197 #include "ram_quota.h"
199 #include "std_macros.h"
206 Simple_tree_submap_ops::mem_size(Treemap const *) const
211 Simple_tree_submap_ops::grant(Treemap *, Space *, Page_number) const
216 Simple_tree_submap_ops::owner(Treemap const *) const
221 Simple_tree_submap_ops::is_partial(Treemap const *, Page_number, Page_number) const
226 Simple_tree_submap_ops::del(Treemap *) const
231 Simple_tree_submap_ops::flush(Treemap *, Page_number, Page_number) const
235 // Mapping-tree allocators
238 enum Mapping_tree_size
241 Size_id_max = 9 // can be up to 15 (4 bits)
245 Mapping_tree::Size_id
246 Mapping_tree::shrink()
248 unsigned sid = _size_id - 1;
249 while (sid > 0 && ((static_cast<unsigned>(_count) << 2) < ((unsigned)Size_factor << sid)))
256 Mapping_tree::Size_id
257 Mapping_tree::bigger()
258 { return Mapping_tree::Size_id(_size_id + 1); }
260 template<int SIZE_ID>
261 struct Mapping_tree_allocator
266 Elem_size = (Size_factor << SIZE_ID) * sizeof (Mapping)
267 + sizeof(Mapping_tree)
270 Mapping_tree_allocator(Kmem_slab **array)
271 : a(Elem_size, Mapping::Alignment, "Mapping_tree")
272 { array[SIZE_ID] = &a; }
275 template<int SIZE_ID_MAX>
276 struct Mapping_tree_allocators;
279 struct Mapping_tree_allocators<0>
281 Mapping_tree_allocator<0> a;
282 Mapping_tree_allocators(Kmem_slab **array) : a(array) {}
285 template<int SIZE_ID_MAX>
286 struct Mapping_tree_allocators
289 Mapping_tree_allocator<SIZE_ID_MAX> a;
290 Mapping_tree_allocators<SIZE_ID_MAX - 1> n;
292 Mapping_tree_allocators(Kmem_slab **array) : a(array), n(array) {}
295 static Kmem_slab *_allocators[Size_id_max + 1];
296 static Mapping_tree_allocators<Size_id_max> _mapping_tree_allocators(_allocators);
300 allocator_for_treesize(int size)
302 return _allocators[size];
306 // class Mapping_tree
310 inline NEEDS["space.h"]
312 Mapping_tree::quota(Space *space)
314 return space->ram_quota();
319 Mapping_tree::operator new (size_t, Mapping_tree::Size_id size_id) throw()
320 { return allocator_for_treesize(size_id)->alloc(); }
324 Mapping_tree::operator delete (void* block)
329 // Try to guess right allocator object -- XXX unportable!
330 Mapping_tree* t = static_cast<Mapping_tree*>(block);
332 t->check_integrity();
334 allocator_for_treesize(t->_size_id)->free(block);
337 PUBLIC //inline NEEDS[Mapping_depth, Mapping_tree::last]
338 Mapping_tree::Mapping_tree(Size_id size_id, Page page,
341 _count = 1; // 1 valid mapping
342 _size_id = size_id; // size is equal to Size_factor << 0
344 _empty_count = 0; // no gaps in tree representation
347 _mappings[0].set_depth(Mapping::Depth_root);
348 _mappings[0].set_page(page);
349 _mappings[0].set_space(owner);
351 _mappings[1].set_depth(Mapping::Depth_end);
353 // We also always set the end tag on last entry so that we can
354 // check whether it has been overwritten.
355 last()->set_depth(Mapping::Depth_end);
359 Mapping_tree::~Mapping_tree()
361 // special case for copied mapping trees
362 for (Mapping *m = _mappings; m < end() && !m->is_end_tag(); ++m)
364 if (!m->submap() && !m->unused())
365 quota(m->space())->free(sizeof(Mapping));
369 PUBLIC //inline NEEDS[Mapping_depth, Mapping_tree::last]
370 Mapping_tree::Mapping_tree(Size_id size_id, Mapping_tree* from_tree)
373 last()->set_depth(Mapping::Depth_end);
375 copy_compact_tree(this, from_tree);
378 // public routines with inline implementations
379 PUBLIC inline NEEDS[Mapping_tree_size]
381 Mapping_tree::number_of_entries() const
383 return Size_factor << (unsigned long)_size_id;
388 Mapping_tree::mappings()
390 return & _mappings[0];
395 Mapping_tree::is_empty() const
400 PUBLIC inline NEEDS[Mapping_tree::mappings, Mapping_tree::number_of_entries]
404 return mappings() + number_of_entries();
407 PUBLIC inline NEEDS[Mapping_tree::end]
414 // A utility function to find the tree header belonging to a mapping.
416 /** Our Mapping_tree.
417 @return the Mapping_tree we are in.
421 Mapping_tree::head_of(Mapping *m)
423 while (m->depth() > Mapping::Depth_root)
425 // jump in bigger steps if this is not a free mapping
426 if (m->depth() <= Mapping::Depth_max)
435 return reinterpret_cast<Mapping_tree *>
436 (reinterpret_cast<char *>(m) - sizeof(Mapping_tree));
437 // We'd rather like to use offsetof as follows, but it's unsupported
438 // for non-POD classes. So we instead rely on GCC's zero-length
439 // array magic (sizeof(Mapping_tree) is the size of the header
440 // without the array).
441 // return reinterpret_cast<Mapping_tree *>
442 // (reinterpret_cast<char *>(m) - offsetof(Mapping_tree, _mappings));
445 /** Next mapping in the mapping tree.
446 @param t head of mapping tree, if available
447 @return the next mapping in the mapping tree. If the mapping has
448 children, it is the first child. Otherwise, if the mapping has a
449 sibling, it's the next sibling. Otherwise, if the mapping is the
450 last sibling or only child, it's the mapping's parent.
454 Mapping_tree::next(Mapping *m)
456 for (m++; m < end() && ! m->is_end_tag(); m++)
463 /** Next child mapping of a given parent mapping. This
464 function traverses the mapping tree like next(); however, it
465 stops (and returns 0) if the next mapping is outside the subtree
466 starting with parent.
467 @param parent Parent mapping
468 @return the next child mapping of a given parent mapping
470 PUBLIC inline NEEDS[Mapping_tree::next]
472 Mapping_tree::next_child(Mapping *parent, Mapping *current_child)
474 // Find the next valid entry in the tree structure.
475 Mapping *m = next(current_child);
477 // If we didn't find an entry, or if the entry cannot be a child of
478 // "parent", return 0
479 if (m == 0 || m->depth() <= parent->depth())
485 // This function copies the elements of mapping tree src to mapping
486 // tree dst, ignoring empty elements (that is, compressing the
487 // source tree. In-place compression is supported.
490 Mapping_tree::copy_compact_tree(Mapping_tree *dst, Mapping_tree *src)
492 unsigned src_count = src->_count; // Store in local variable before
493 // it can get overwritten
495 // Special case: cannot in-place compact a full tree
496 if (src == dst && src->number_of_entries() == src_count)
499 // Now we can assume the resulting tree will not be full.
500 assert (src_count < dst->number_of_entries());
504 dst->_empty_count = 0;
507 Mapping *d = dst->mappings();
509 for (Mapping *s = src->mappings();
510 s && !s->is_end_tag();
517 assert (dst->_count == src_count); // Same number of entries
518 assert (d < dst->end());
519 // Room for one more entry (the Mapping::Depth_end entry)
521 d->set_depth(Mapping::Depth_end);
522 dst->last()->set_depth(Mapping::Depth_end);
523 } // copy_compact_tree()
525 // Don't inline this function -- it eats a lot of stack space!
526 PUBLIC // inline NEEDS[Mapping::data, Mapping::unused, Mapping::is_end_tag,
527 // Mapping_tree::end, Mapping_tree::number_of_entries]
529 Mapping_tree::check_integrity(Space *owner = (Space*)-1)
533 bool enter_ke = false;
535 if (// Either each entry is used
536 !(number_of_entries() == static_cast<unsigned>(_count) + _empty_count
537 // Or the last used entry is end tag
538 || mappings()[_count + _empty_count].is_end_tag()))
540 printf("mapdb consistency error: "
541 "%d == %d + %d || mappings()[%d + %d].is_end_tag()=%d\n",
542 number_of_entries(), static_cast<unsigned>(_count), _empty_count,
543 _count, _empty_count, mappings()[_count + _empty_count].is_end_tag());
547 Mapping *m = mappings();
549 if (!(m->is_end_tag() // When the tree was copied to a new one
550 || (!m->unused() // The first entry is never unused.
552 && (owner == (Space *)-1 || m->space() == owner))))
554 printf("mapdb corrupted: owner=%p\n"
555 " m=%p (end: %s depth: %d space: %p page: %lx)\n",
556 owner, m, m->is_end_tag() ? "yes" : "no", m->depth(), m->space(),
557 cxx::int_value<Page>(m->page()));
561 unsigned used = 0, dead = 0;
563 while (m < end() && !m->is_end_tag())
573 if (enter_ke |= _count != used)
574 printf("mapdb: _count=%d != used=%d\n", _count, used);
575 if (enter_ke |= _empty_count != dead)
576 printf("mapdb: _empty_count=%d != dead=%d\n", _empty_count, dead);
580 printf("mapdb: from %p on CPU%d\n",
581 __builtin_return_address(0),
582 cxx::int_value<Cpu_number>(current_cpu()));
591 * Use this function to reset a the tree to empty.
593 * In the case where a tree was copied to a new one you have to use
594 * this function to prevent the node iteration in the destructor.
598 Mapping_tree::reset()
602 _mappings[0].set_depth(Mapping::Depth_end);
605 PUBLIC inline NEEDS[Mapping_tree::next, <cassert>]
607 Mapping_tree::find_submap(Mapping* parent)
609 assert (! parent->submap());
611 // We need just one step to find a possible submap, because they are
612 // always a parent's first child.
613 Mapping* m = next(parent);
615 if (m && m->submap())
621 PUBLIC inline NEEDS["ram_quota.h"]
623 Mapping_tree::allocate(Ram_quota *payer, Mapping *parent,
624 bool insert_submap = false)
626 Auto_quota<Ram_quota> q(payer, sizeof(Mapping));
627 if (EXPECT_FALSE(!q))
630 // After locating the right place for the new entry, it will be
631 // stored there (if this place is empty) or the following entries
632 // moved by one entry.
634 // We cannot continue if the last array entry is not free. This
635 // only happens if an earlier call to free() with this mapping tree
636 // couldn't allocate a bigger array. In this case, signal an
637 // out-of-memory condition.
638 if (EXPECT_FALSE (! last()->unused()))
641 // If the parent mapping already has the maximum depth, we cannot
643 if (EXPECT_FALSE (parent->depth() == Mapping::Depth_max))
646 //allocation is done, so...
649 Mapping *insert = parent + 1, *free = 0;
650 // - Find an insertion point for the new entry. Acceptable insertion
651 // points are either before a sibling (same depth) or at the end
652 // of the subtree; for submap insertions, it's always before
653 // the first sibling. "insert" keeps track of the last
654 // acceptable insertion point.
655 // - Find a free entry in the array encoding the subtree ("free").
656 // There might be none; in this case, we stop at the end of the
660 for (; insert < end(); ++insert)
662 // End of subtree? If we reach this point, this is our insert spot.
663 if (insert->is_end_tag() || insert->depth() <= parent->depth())
666 if (insert->unused())
668 else if (free // Only look for insert spots after free
669 && insert->depth() <= parent->depth() + 1)
674 assert (free == 0 || (free->unused() && free < insert));
676 // We now update "free" to point to a free spot that is acceptable
681 // "free" will be the latest free spot before the "insert" spot.
682 // If there is anything between "free" and "insert", move it
683 // upward to make space just before insert.
684 while (free + 1 != insert)
691 // Tree-header maintenance
692 _empty_count -= 1; // Allocated dead entry
697 // There was no free spot in the subtree. Move everything
698 // downward until we have free space. This is guaranteed to
699 // succeed, because we ensured earlier that the last entry of
700 // the array is free.
702 free = insert; // This will be the free spot
705 while (! insert->unused())
708 // Tree maintenance: handle end tag, empty count
709 if (insert->is_end_tag())
711 // Need to move end tag.
712 if (insert + 1 < end())
713 insert++; // Arrange for copying end tag as well
717 _empty_count -= 1; // Allocated dead entry
721 while (insert > free)
723 *insert = *(insert - 1);
728 _count += 1; // Adding an alive entry
730 // found a place to insert new child (free).
731 free->set_depth(insert_submap ? (unsigned)Mapping::Depth_submap
732 : parent->depth() + 1);
737 PUBLIC inline NEEDS["ram_quota.h"]
739 Mapping_tree::free_mapping(Ram_quota *q, Mapping *m)
741 assert (!m->unused() && !m->is_end_tag());
742 q->free(sizeof(Mapping));
747 PUBLIC template< typename SUBMAP_OPS >
749 Mapping_tree::flush(Mapping *parent, bool me_too,
750 Pcnt offs_begin, Pcnt offs_end,
751 SUBMAP_OPS const &submap_ops = SUBMAP_OPS())
753 assert (! parent->unused());
755 // This is easy to do: We just have to iterate over the array
756 // encoding the tree.
757 Mapping *start_of_deletions = parent;
758 unsigned p_depth = parent->depth();
759 unsigned deleted = 0;
761 unsigned empty_elems_passed = 0;
766 free_mapping(quota(parent->space()), parent);
770 start_of_deletions++;
772 unsigned m_depth = p_depth;
774 for (Mapping* m = parent + 1;
775 m < end() && ! m->is_end_tag();
778 if (unsigned (m->depth()) <= p_depth)
780 // Found another data element -- stop deleting. Since we
781 // created holes in the tree representation, account for it.
783 _empty_count += deleted;
791 empty_elems_passed++;
797 if (Treemap* submap = m->submap())
799 space = submap_ops.owner(submap);
801 // Check for immediate child. Note: It's a bad idea to
802 // call m->parent() because that would iterate backwards
803 // over already-deleted entries.
804 && m_depth == p_depth
805 && submap_ops.is_partial(submap, offs_begin, offs_end))
807 submap_ops.flush(submap, offs_begin, offs_end);
809 start_of_deletions++;
813 submap_ops.del(submap);
815 else // Found a real mapping
818 m_depth = m->depth();
821 // Delete the element.
822 free_mapping(quota(space), m);
826 // We deleted stuff at the end of the array -- move end tag
827 if (start_of_deletions < end())
829 start_of_deletions->set_depth(Mapping::Depth_end);
832 // also, reduce number of free entries
833 _empty_count -= empty_elems_passed;
838 PUBLIC template< typename SUBMAP_OPS >
840 Mapping_tree::grant(Mapping* m, Space *new_space, Page page,
841 SUBMAP_OPS const &submap_ops = SUBMAP_OPS())
843 unsigned long _quota = sizeof(Mapping);
844 Treemap* submap = find_submap(m);
846 _quota += submap_ops.mem_size(submap);
848 if (!quota(new_space)->alloc(_quota))
851 Space *old_space = m->space();
853 m->set_space(new_space);
857 submap_ops.grant(submap, new_space, page);
859 quota(old_space)->free(_quota);
865 Mapping_tree::lookup(Space *space, Page page)
870 for (m = mappings(); m; m = next(m))
872 assert (!m->submap());
873 if (m->space() == space && m->page() == page)
883 Base_mappable::lookup(Space *space, Page page)
885 // get and lock the tree.
886 if (EXPECT_FALSE(lock.lock() == Lock::Invalid))
888 Mapping_tree *t = tree.get();
890 if (Mapping *m = t->lookup(space, page))
899 Base_mappable::insert(Mapping* parent, Space *space, Page page)
901 Mapping_tree* t = tree.get();
904 assert (parent == 0);
905 Auto_quota<Ram_quota> q(Mapping_tree::quota(space), sizeof(Mapping));
906 if (EXPECT_FALSE(!q))
909 Mapping_tree::Size_id min_size = Mapping_tree::Size_id_min;
910 cxx::unique_ptr<Mapping_tree> new_tree(new (min_size) Mapping_tree (min_size, page, space));
912 if (EXPECT_FALSE(!new_tree))
915 tree = cxx::move(new_tree);
917 return tree->mappings();
920 Mapping *free = t->allocate(Mapping_tree::quota(space), parent, false);
922 if (EXPECT_FALSE(!free))
925 free->set_space(space);
926 free->set_page(page);
928 t->check_integrity();
935 Base_mappable::pack()
937 // Before we unlock the tree, we need to make sure that there is
938 // room for at least one new mapping. In particular, this means
939 // that the last entry of the array encoding the tree must be free.
941 // (1) When we use up less than a quarter of all entries of the
942 // array encoding the tree, copy to a smaller tree. Otherwise, (2)
943 // if the last entry is free, do nothing. Otherwise, (3) if less
944 // than 3/4 of the entries are used, compress the tree. Otherwise,
945 // (4) copy to a larger tree.
947 Mapping_tree *t = tree.get();
948 bool maybe_out_of_memory = false;
950 do // (this is not actually a loop, just a block we can "break" out of)
952 // (1) Do we need to allocate a smaller tree?
953 if (t->_size_id > Mapping_tree::Size_id_min // must not be smallest size
954 && (static_cast<unsigned>(t->_count) << 2) < t->number_of_entries())
956 Mapping_tree::Size_id sid = t->Mapping_tree::shrink();
957 cxx::unique_ptr<Mapping_tree> new_t(new (sid) Mapping_tree(sid, t));
961 // ivalidate node 0 because we must not free the quota for it
965 // Register new tree.
966 tree = cxx::move(new_t);
972 // (2) Is last entry is free?
973 if (t->last()->unused())
974 break; // OK, last entry is free.
976 // Last entry is not free -- either compress current array
977 // (i.e., move free entries to end of array), or allocate bigger
980 // (3) Should we compress the tree?
981 // We also try to compress if we cannot allocate a bigger
982 // tree because there is no bigger tree size.
983 if (t->_count < (t->number_of_entries() >> 2)
984 + (t->number_of_entries() >> 1)
985 || t->_size_id == Size_id_max) // cannot enlarge?
987 if (t->_size_id == Size_id_max)
988 maybe_out_of_memory = true;
990 Mapping_tree::copy_compact_tree(t, t); // in-place compression
995 // (4) OK, allocate a bigger array.
997 Mapping_tree::Size_id sid = t->bigger();
998 cxx::unique_ptr<Mapping_tree> new_t(new (sid) Mapping_tree(sid, t));
1002 // ivalidate node 0 because we must not free the quota for it
1006 // Register new tree.
1007 tree = cxx::move(new_t);
1011 // out of memory -- just do tree compression and hope that helps.
1012 maybe_out_of_memory = true;
1014 Mapping_tree::copy_compact_tree(t, t); // in-place compression
1019 // The last entry of the tree should now be free -- exept if we're
1021 assert (t->last()->unused() || maybe_out_of_memory);
1022 (void) maybe_out_of_memory;