3 Reference Cycle Garbage Collection
4 ==================================
6 Neil Schemenauer <nas@arctrix.com>
8 Based on a post on the python-dev list. Ideas from Guido van Rossum,
9 Eric Tiedemann, and various others.
11 http://www.arctrix.com/nas/python/gc/
12 http://www.python.org/pipermail/python-dev/2000-March/003869.html
13 http://www.python.org/pipermail/python-dev/2000-March/004010.html
14 http://www.python.org/pipermail/python-dev/2000-March/004022.html
16 For a highlevel view of the collection process, read the collect
22 #include "frameobject.h" /* for PyFrame_ClearFreeList */
24 /* Get an object's GC head */
25 #define AS_GC(o) ((PyGC_Head *)(o)-1)
27 /* Get the object given the GC head */
28 #define FROM_GC(g) ((PyObject *)(((PyGC_Head *)g)+1))
30 /*** Global GC state ***/
32 struct gc_generation {
34 int threshold; /* collection threshold */
35 int count; /* count of allocations or collections of younger
39 #define NUM_GENERATIONS 3
40 #define GEN_HEAD(n) (&generations[n].head)
42 /* linked lists of container objects */
43 static struct gc_generation generations[NUM_GENERATIONS] = {
44 /* PyGC_Head, threshold, count */
45 {{{GEN_HEAD(0), GEN_HEAD(0), 0}}, 700, 0},
46 {{{GEN_HEAD(1), GEN_HEAD(1), 0}}, 10, 0},
47 {{{GEN_HEAD(2), GEN_HEAD(2), 0}}, 10, 0},
50 PyGC_Head *_PyGC_generation0 = GEN_HEAD(0);
52 static int enabled = 1; /* automatic collection enabled? */
54 /* true if we are currently running the collector */
55 static int collecting = 0;
57 /* list of uncollectable objects */
58 static PyObject *garbage = NULL;
60 /* Python string to use if unhandled exception occurs */
61 static PyObject *gc_str = NULL;
63 /* Python string used to look for __del__ attribute. */
64 static PyObject *delstr = NULL;
66 /* set for debugging information */
67 #define DEBUG_STATS (1<<0) /* print collection statistics */
68 #define DEBUG_COLLECTABLE (1<<1) /* print collectable objects */
69 #define DEBUG_UNCOLLECTABLE (1<<2) /* print uncollectable objects */
70 #define DEBUG_INSTANCES (1<<3) /* print instances */
71 #define DEBUG_OBJECTS (1<<4) /* print other objects */
72 #define DEBUG_SAVEALL (1<<5) /* save all garbage in gc.garbage */
73 #define DEBUG_LEAK DEBUG_COLLECTABLE | \
74 DEBUG_UNCOLLECTABLE | \
79 static PyObject *tmod = NULL;
81 /*--------------------------------------------------------------------------
84 Between collections, every gc'ed object has one of two gc_refs values:
87 The initial state; objects returned by PyObject_GC_Malloc are in this
88 state. The object doesn't live in any generation list, and its
89 tp_traverse slot must not be called.
92 The object lives in some generation list, and its tp_traverse is safe to
93 call. An object transitions to GC_REACHABLE when PyObject_GC_Track
96 During a collection, gc_refs can temporarily take on other states:
99 At the start of a collection, update_refs() copies the true refcount
100 to gc_refs, for each object in the generation being collected.
101 subtract_refs() then adjusts gc_refs so that it equals the number of
102 times an object is referenced directly from outside the generation
104 gc_refs remains >= 0 throughout these steps.
106 GC_TENTATIVELY_UNREACHABLE
107 move_unreachable() then moves objects not reachable (whether directly or
108 indirectly) from outside the generation into an "unreachable" set.
109 Objects that are found to be reachable have gc_refs set to GC_REACHABLE
110 again. Objects that are found to be unreachable have gc_refs set to
111 GC_TENTATIVELY_UNREACHABLE. It's "tentatively" because the pass doing
112 this can't be sure until it ends, and GC_TENTATIVELY_UNREACHABLE may
113 transition back to GC_REACHABLE.
115 Only objects with GC_TENTATIVELY_UNREACHABLE still set are candidates
116 for collection. If it's decided not to collect such an object (e.g.,
117 it has a __del__ method), its gc_refs is restored to GC_REACHABLE again.
118 ----------------------------------------------------------------------------
120 #define GC_UNTRACKED _PyGC_REFS_UNTRACKED
121 #define GC_REACHABLE _PyGC_REFS_REACHABLE
122 #define GC_TENTATIVELY_UNREACHABLE _PyGC_REFS_TENTATIVELY_UNREACHABLE
124 #define IS_TRACKED(o) ((AS_GC(o))->gc.gc_refs != GC_UNTRACKED)
125 #define IS_REACHABLE(o) ((AS_GC(o))->gc.gc_refs == GC_REACHABLE)
126 #define IS_TENTATIVELY_UNREACHABLE(o) ( \
127 (AS_GC(o))->gc.gc_refs == GC_TENTATIVELY_UNREACHABLE)
129 /*** list functions ***/
132 gc_list_init(PyGC_Head *list)
134 list->gc.gc_prev = list;
135 list->gc.gc_next = list;
139 gc_list_is_empty(PyGC_Head *list)
141 return (list->gc.gc_next == list);
145 /* This became unused after gc_list_move() was introduced. */
146 /* Append `node` to `list`. */
148 gc_list_append(PyGC_Head *node, PyGC_Head *list)
150 node->gc.gc_next = list;
151 node->gc.gc_prev = list->gc.gc_prev;
152 node->gc.gc_prev->gc.gc_next = node;
153 list->gc.gc_prev = node;
157 /* Remove `node` from the gc list it's currently in. */
159 gc_list_remove(PyGC_Head *node)
161 node->gc.gc_prev->gc.gc_next = node->gc.gc_next;
162 node->gc.gc_next->gc.gc_prev = node->gc.gc_prev;
163 node->gc.gc_next = NULL; /* object is not currently tracked */
166 /* Move `node` from the gc list it's currently in (which is not explicitly
167 * named here) to the end of `list`. This is semantically the same as
168 * gc_list_remove(node) followed by gc_list_append(node, list).
171 gc_list_move(PyGC_Head *node, PyGC_Head *list)
174 PyGC_Head *current_prev = node->gc.gc_prev;
175 PyGC_Head *current_next = node->gc.gc_next;
176 /* Unlink from current list. */
177 current_prev->gc.gc_next = current_next;
178 current_next->gc.gc_prev = current_prev;
179 /* Relink at end of new list. */
180 new_prev = node->gc.gc_prev = list->gc.gc_prev;
181 new_prev->gc.gc_next = list->gc.gc_prev = node;
182 node->gc.gc_next = list;
185 /* append list `from` onto list `to`; `from` becomes an empty list */
187 gc_list_merge(PyGC_Head *from, PyGC_Head *to)
191 if (!gc_list_is_empty(from)) {
192 tail = to->gc.gc_prev;
193 tail->gc.gc_next = from->gc.gc_next;
194 tail->gc.gc_next->gc.gc_prev = tail;
195 to->gc.gc_prev = from->gc.gc_prev;
196 to->gc.gc_prev->gc.gc_next = to;
202 gc_list_size(PyGC_Head *list)
206 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) {
212 /* Append objects in a GC list to a Python list.
213 * Return 0 if all OK, < 0 if error (out of memory for list).
216 append_objects(PyObject *py_list, PyGC_Head *gc_list)
219 for (gc = gc_list->gc.gc_next; gc != gc_list; gc = gc->gc.gc_next) {
220 PyObject *op = FROM_GC(gc);
222 if (PyList_Append(py_list, op)) {
223 return -1; /* exception */
230 /*** end of list stuff ***/
233 /* Set all gc_refs = ob_refcnt. After this, gc_refs is > 0 for all objects
234 * in containers, and is GC_REACHABLE for all tracked gc objects not in
238 update_refs(PyGC_Head *containers)
240 PyGC_Head *gc = containers->gc.gc_next;
241 for (; gc != containers; gc = gc->gc.gc_next) {
242 assert(gc->gc.gc_refs == GC_REACHABLE);
243 gc->gc.gc_refs = Py_REFCNT(FROM_GC(gc));
244 /* Python's cyclic gc should never see an incoming refcount
245 * of 0: if something decref'ed to 0, it should have been
246 * deallocated immediately at that time.
247 * Possible cause (if the assert triggers): a tp_dealloc
248 * routine left a gc-aware object tracked during its teardown
249 * phase, and did something-- or allowed something to happen --
250 * that called back into Python. gc can trigger then, and may
251 * see the still-tracked dying object. Before this assert
252 * was added, such mistakes went on to allow gc to try to
253 * delete the object again. In a debug build, that caused
254 * a mysterious segfault, when _Py_ForgetReference tried
255 * to remove the object from the doubly-linked list of all
256 * objects a second time. In a release build, an actual
257 * double deallocation occurred, which leads to corruption
258 * of the allocator's internal bookkeeping pointers. That's
259 * so serious that maybe this should be a release-build
260 * check instead of an assert?
262 assert(gc->gc.gc_refs != 0);
266 /* A traversal callback for subtract_refs. */
268 visit_decref(PyObject *op, void *data)
271 if (PyObject_IS_GC(op)) {
272 PyGC_Head *gc = AS_GC(op);
273 /* We're only interested in gc_refs for objects in the
274 * generation being collected, which can be recognized
275 * because only they have positive gc_refs.
277 assert(gc->gc.gc_refs != 0); /* else refcount was too small */
278 if (gc->gc.gc_refs > 0)
284 /* Subtract internal references from gc_refs. After this, gc_refs is >= 0
285 * for all objects in containers, and is GC_REACHABLE for all tracked gc
286 * objects not in containers. The ones with gc_refs > 0 are directly
287 * reachable from outside containers, and so can't be collected.
290 subtract_refs(PyGC_Head *containers)
292 traverseproc traverse;
293 PyGC_Head *gc = containers->gc.gc_next;
294 for (; gc != containers; gc=gc->gc.gc_next) {
295 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
296 (void) traverse(FROM_GC(gc),
297 (visitproc)visit_decref,
302 /* A traversal callback for move_unreachable. */
304 visit_reachable(PyObject *op, PyGC_Head *reachable)
306 if (PyObject_IS_GC(op)) {
307 PyGC_Head *gc = AS_GC(op);
308 const Py_ssize_t gc_refs = gc->gc.gc_refs;
311 /* This is in move_unreachable's 'young' list, but
312 * the traversal hasn't yet gotten to it. All
313 * we need to do is tell move_unreachable that it's
318 else if (gc_refs == GC_TENTATIVELY_UNREACHABLE) {
319 /* This had gc_refs = 0 when move_unreachable got
320 * to it, but turns out it's reachable after all.
321 * Move it back to move_unreachable's 'young' list,
322 * and move_unreachable will eventually get to it
325 gc_list_move(gc, reachable);
328 /* Else there's nothing to do.
329 * If gc_refs > 0, it must be in move_unreachable's 'young'
330 * list, and move_unreachable will eventually get to it.
331 * If gc_refs == GC_REACHABLE, it's either in some other
332 * generation so we don't care about it, or move_unreachable
333 * already dealt with it.
334 * If gc_refs == GC_UNTRACKED, it must be ignored.
338 || gc_refs == GC_REACHABLE
339 || gc_refs == GC_UNTRACKED);
345 /* Move the unreachable objects from young to unreachable. After this,
346 * all objects in young have gc_refs = GC_REACHABLE, and all objects in
347 * unreachable have gc_refs = GC_TENTATIVELY_UNREACHABLE. All tracked
348 * gc objects not in young or unreachable still have gc_refs = GC_REACHABLE.
349 * All objects in young after this are directly or indirectly reachable
350 * from outside the original young; and all objects in unreachable are
354 move_unreachable(PyGC_Head *young, PyGC_Head *unreachable)
356 PyGC_Head *gc = young->gc.gc_next;
358 /* Invariants: all objects "to the left" of us in young have gc_refs
359 * = GC_REACHABLE, and are indeed reachable (directly or indirectly)
360 * from outside the young list as it was at entry. All other objects
361 * from the original young "to the left" of us are in unreachable now,
362 * and have gc_refs = GC_TENTATIVELY_UNREACHABLE. All objects to the
363 * left of us in 'young' now have been scanned, and no objects here
364 * or to the right have been scanned yet.
367 while (gc != young) {
370 if (gc->gc.gc_refs) {
371 /* gc is definitely reachable from outside the
372 * original 'young'. Mark it as such, and traverse
373 * its pointers to find any other objects that may
374 * be directly reachable from it. Note that the
375 * call to tp_traverse may append objects to young,
376 * so we have to wait until it returns to determine
377 * the next object to visit.
379 PyObject *op = FROM_GC(gc);
380 traverseproc traverse = Py_TYPE(op)->tp_traverse;
381 assert(gc->gc.gc_refs > 0);
382 gc->gc.gc_refs = GC_REACHABLE;
384 (visitproc)visit_reachable,
386 next = gc->gc.gc_next;
389 /* This *may* be unreachable. To make progress,
390 * assume it is. gc isn't directly reachable from
391 * any object we've already traversed, but may be
392 * reachable from an object we haven't gotten to yet.
393 * visit_reachable will eventually move gc back into
394 * young if that's so, and we'll see it again.
396 next = gc->gc.gc_next;
397 gc_list_move(gc, unreachable);
398 gc->gc.gc_refs = GC_TENTATIVELY_UNREACHABLE;
404 /* Return true if object has a finalization method.
405 * CAUTION: An instance of an old-style class has to be checked for a
406 *__del__ method, and earlier versions of this used to call PyObject_HasAttr,
407 * which in turn could call the class's __getattr__ hook (if any). That
408 * could invoke arbitrary Python code, mutating the object graph in arbitrary
409 * ways, and that was the source of some excruciatingly subtle bugs.
412 has_finalizer(PyObject *op)
414 if (PyInstance_Check(op)) {
415 assert(delstr != NULL);
416 return _PyInstance_Lookup(op, delstr) != NULL;
418 else if (PyType_HasFeature(op->ob_type, Py_TPFLAGS_HEAPTYPE))
419 return op->ob_type->tp_del != NULL;
420 else if (PyGen_CheckExact(op))
421 return PyGen_NeedsFinalizing((PyGenObject *)op);
426 /* Move the objects in unreachable with __del__ methods into `finalizers`.
427 * Objects moved into `finalizers` have gc_refs set to GC_REACHABLE; the
428 * objects remaining in unreachable are left at GC_TENTATIVELY_UNREACHABLE.
431 move_finalizers(PyGC_Head *unreachable, PyGC_Head *finalizers)
436 /* March over unreachable. Move objects with finalizers into
439 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
440 PyObject *op = FROM_GC(gc);
442 assert(IS_TENTATIVELY_UNREACHABLE(op));
443 next = gc->gc.gc_next;
445 if (has_finalizer(op)) {
446 gc_list_move(gc, finalizers);
447 gc->gc.gc_refs = GC_REACHABLE;
452 /* A traversal callback for move_finalizer_reachable. */
454 visit_move(PyObject *op, PyGC_Head *tolist)
456 if (PyObject_IS_GC(op)) {
457 if (IS_TENTATIVELY_UNREACHABLE(op)) {
458 PyGC_Head *gc = AS_GC(op);
459 gc_list_move(gc, tolist);
460 gc->gc.gc_refs = GC_REACHABLE;
466 /* Move objects that are reachable from finalizers, from the unreachable set
467 * into finalizers set.
470 move_finalizer_reachable(PyGC_Head *finalizers)
472 traverseproc traverse;
473 PyGC_Head *gc = finalizers->gc.gc_next;
474 for (; gc != finalizers; gc = gc->gc.gc_next) {
475 /* Note that the finalizers list may grow during this. */
476 traverse = Py_TYPE(FROM_GC(gc))->tp_traverse;
477 (void) traverse(FROM_GC(gc),
478 (visitproc)visit_move,
483 /* Clear all weakrefs to unreachable objects, and if such a weakref has a
484 * callback, invoke it if necessary. Note that it's possible for such
485 * weakrefs to be outside the unreachable set -- indeed, those are precisely
486 * the weakrefs whose callbacks must be invoked. See gc_weakref.txt for
487 * overview & some details. Some weakrefs with callbacks may be reclaimed
488 * directly by this routine; the number reclaimed is the return value. Other
489 * weakrefs with callbacks may be moved into the `old` generation. Objects
490 * moved into `old` have gc_refs set to GC_REACHABLE; the objects remaining in
491 * unreachable are left at GC_TENTATIVELY_UNREACHABLE. When this returns,
492 * no object in `unreachable` is weakly referenced anymore.
495 handle_weakrefs(PyGC_Head *unreachable, PyGC_Head *old)
498 PyObject *op; /* generally FROM_GC(gc) */
499 PyWeakReference *wr; /* generally a cast of op */
500 PyGC_Head wrcb_to_call; /* weakrefs with callbacks to call */
504 gc_list_init(&wrcb_to_call);
506 /* Clear all weakrefs to the objects in unreachable. If such a weakref
507 * also has a callback, move it into `wrcb_to_call` if the callback
508 * needs to be invoked. Note that we cannot invoke any callbacks until
509 * all weakrefs to unreachable objects are cleared, lest the callback
510 * resurrect an unreachable object via a still-active weakref. We
511 * make another pass over wrcb_to_call, invoking callbacks, after this
514 for (gc = unreachable->gc.gc_next; gc != unreachable; gc = next) {
515 PyWeakReference **wrlist;
518 assert(IS_TENTATIVELY_UNREACHABLE(op));
519 next = gc->gc.gc_next;
521 if (! PyType_SUPPORTS_WEAKREFS(Py_TYPE(op)))
524 /* It supports weakrefs. Does it have any? */
525 wrlist = (PyWeakReference **)
526 PyObject_GET_WEAKREFS_LISTPTR(op);
528 /* `op` may have some weakrefs. March over the list, clear
529 * all the weakrefs, and move the weakrefs with callbacks
530 * that must be called into wrcb_to_call.
532 for (wr = *wrlist; wr != NULL; wr = *wrlist) {
533 PyGC_Head *wrasgc; /* AS_GC(wr) */
535 /* _PyWeakref_ClearRef clears the weakref but leaves
536 * the callback pointer intact. Obscure: it also
539 assert(wr->wr_object == op);
540 _PyWeakref_ClearRef(wr);
541 assert(wr->wr_object == Py_None);
542 if (wr->wr_callback == NULL)
543 continue; /* no callback */
545 /* Headache time. `op` is going away, and is weakly referenced by
546 * `wr`, which has a callback. Should the callback be invoked? If wr
549 * 1. There's no need to call it. The object and the weakref are
550 * both going away, so it's legitimate to pretend the weakref is
551 * going away first. The user has to ensure a weakref outlives its
552 * referent if they want a guarantee that the wr callback will get
555 * 2. It may be catastrophic to call it. If the callback is also in
556 * cyclic trash (CT), then although the CT is unreachable from
557 * outside the current generation, CT may be reachable from the
558 * callback. Then the callback could resurrect insane objects.
560 * Since the callback is never needed and may be unsafe in this case,
561 * wr is simply left in the unreachable set. Note that because we
562 * already called _PyWeakref_ClearRef(wr), its callback will never
565 * OTOH, if wr isn't part of CT, we should invoke the callback: the
566 * weakref outlived the trash. Note that since wr isn't CT in this
567 * case, its callback can't be CT either -- wr acted as an external
568 * root to this generation, and therefore its callback did too. So
569 * nothing in CT is reachable from the callback either, so it's hard
570 * to imagine how calling it later could create a problem for us. wr
571 * is moved to wrcb_to_call in this case.
573 if (IS_TENTATIVELY_UNREACHABLE(wr))
575 assert(IS_REACHABLE(wr));
577 /* Create a new reference so that wr can't go away
578 * before we can process it again.
582 /* Move wr to wrcb_to_call, for the next pass. */
584 assert(wrasgc != next); /* wrasgc is reachable, but
585 next isn't, so they can't
587 gc_list_move(wrasgc, &wrcb_to_call);
591 /* Invoke the callbacks we decided to honor. It's safe to invoke them
592 * because they can't reference unreachable objects.
594 while (! gc_list_is_empty(&wrcb_to_call)) {
598 gc = wrcb_to_call.gc.gc_next;
600 assert(IS_REACHABLE(op));
601 assert(PyWeakref_Check(op));
602 wr = (PyWeakReference *)op;
603 callback = wr->wr_callback;
604 assert(callback != NULL);
606 /* copy-paste of weakrefobject.c's handle_callback() */
607 temp = PyObject_CallFunctionObjArgs(callback, wr, NULL);
609 PyErr_WriteUnraisable(callback);
613 /* Give up the reference we created in the first pass. When
614 * op's refcount hits 0 (which it may or may not do right now),
615 * op's tp_dealloc will decref op->wr_callback too. Note
616 * that the refcount probably will hit 0 now, and because this
617 * weakref was reachable to begin with, gc didn't already
618 * add it to its count of freed objects. Example: a reachable
619 * weak value dict maps some key to this reachable weakref.
620 * The callback removes this key->weakref mapping from the
621 * dict, leaving no other references to the weakref (excepting
625 if (wrcb_to_call.gc.gc_next == gc) {
626 /* object is still alive -- move it */
627 gc_list_move(gc, old);
637 debug_instance(char *msg, PyInstanceObject *inst)
640 /* simple version of instance_repr */
641 PyObject *classname = inst->in_class->cl_name;
642 if (classname != NULL && PyString_Check(classname))
643 cname = PyString_AsString(classname);
646 PySys_WriteStderr("gc: %.100s <%.100s instance at %p>\n",
651 debug_cycle(char *msg, PyObject *op)
653 if ((debug & DEBUG_INSTANCES) && PyInstance_Check(op)) {
654 debug_instance(msg, (PyInstanceObject *)op);
656 else if (debug & DEBUG_OBJECTS) {
657 PySys_WriteStderr("gc: %.100s <%.100s %p>\n",
658 msg, Py_TYPE(op)->tp_name, op);
662 /* Handle uncollectable garbage (cycles with finalizers, and stuff reachable
663 * only from such cycles).
664 * If DEBUG_SAVEALL, all objects in finalizers are appended to the module
665 * garbage list (a Python list), else only the objects in finalizers with
666 * __del__ methods are appended to garbage. All objects in finalizers are
667 * merged into the old list regardless.
668 * Returns 0 if all OK, <0 on error (out of memory to grow the garbage list).
669 * The finalizers list is made empty on a successful return.
672 handle_finalizers(PyGC_Head *finalizers, PyGC_Head *old)
674 PyGC_Head *gc = finalizers->gc.gc_next;
676 if (garbage == NULL) {
677 garbage = PyList_New(0);
679 Py_FatalError("gc couldn't create gc.garbage list");
681 for (; gc != finalizers; gc = gc->gc.gc_next) {
682 PyObject *op = FROM_GC(gc);
684 if ((debug & DEBUG_SAVEALL) || has_finalizer(op)) {
685 if (PyList_Append(garbage, op) < 0)
690 gc_list_merge(finalizers, old);
694 /* Break reference cycles by clearing the containers involved. This is
695 * tricky business as the lists can be changing and we don't know which
696 * objects may be freed. It is possible I screwed something up here.
699 delete_garbage(PyGC_Head *collectable, PyGC_Head *old)
703 while (!gc_list_is_empty(collectable)) {
704 PyGC_Head *gc = collectable->gc.gc_next;
705 PyObject *op = FROM_GC(gc);
707 assert(IS_TENTATIVELY_UNREACHABLE(op));
708 if (debug & DEBUG_SAVEALL) {
709 PyList_Append(garbage, op);
712 if ((clear = Py_TYPE(op)->tp_clear) != NULL) {
718 if (collectable->gc.gc_next == gc) {
719 /* object is still alive, move it, it may die later */
720 gc_list_move(gc, old);
721 gc->gc.gc_refs = GC_REACHABLE;
726 /* Clear all free lists
727 * All free lists are cleared during the collection of the highest generation.
728 * Allocated items in the free list may keep a pymalloc arena occupied.
729 * Clearing the free lists may give back memory to the OS earlier.
732 clear_freelists(void)
734 (void)PyMethod_ClearFreeList();
735 (void)PyFrame_ClearFreeList();
736 (void)PyCFunction_ClearFreeList();
737 (void)PyTuple_ClearFreeList();
738 (void)PyUnicode_ClearFreeList();
739 (void)PyInt_ClearFreeList();
740 (void)PyFloat_ClearFreeList();
748 PyObject *f = PyObject_CallMethod(tmod, "time", NULL);
753 if (PyFloat_Check(f))
754 result = PyFloat_AsDouble(f);
761 /* This is the main function. Read this to understand how the
762 * collection process works. */
764 collect(int generation)
767 Py_ssize_t m = 0; /* # objects collected */
768 Py_ssize_t n = 0; /* # unreachable objects that couldn't be collected */
769 PyGC_Head *young; /* the generation we are examining */
770 PyGC_Head *old; /* next older generation */
771 PyGC_Head unreachable; /* non-problematic unreachable trash */
772 PyGC_Head finalizers; /* objects with, & reachable from, __del__ */
776 if (delstr == NULL) {
777 delstr = PyString_InternFromString("__del__");
779 Py_FatalError("gc couldn't allocate \"__del__\"");
782 if (debug & DEBUG_STATS) {
784 PySys_WriteStderr("gc: collecting generation %d...\n",
786 PySys_WriteStderr("gc: objects in each generation:");
787 for (i = 0; i < NUM_GENERATIONS; i++)
788 PySys_WriteStderr(" %" PY_FORMAT_SIZE_T "d",
789 gc_list_size(GEN_HEAD(i)));
790 PySys_WriteStderr("\n");
793 /* update collection and allocation counters */
794 if (generation+1 < NUM_GENERATIONS)
795 generations[generation+1].count += 1;
796 for (i = 0; i <= generation; i++)
797 generations[i].count = 0;
799 /* merge younger generations with one we are currently collecting */
800 for (i = 0; i < generation; i++) {
801 gc_list_merge(GEN_HEAD(i), GEN_HEAD(generation));
804 /* handy references */
805 young = GEN_HEAD(generation);
806 if (generation < NUM_GENERATIONS-1)
807 old = GEN_HEAD(generation+1);
811 /* Using ob_refcnt and gc_refs, calculate which objects in the
812 * container set are reachable from outside the set (i.e., have a
813 * refcount greater than 0 when all the references within the
814 * set are taken into account).
817 subtract_refs(young);
819 /* Leave everything reachable from outside young in young, and move
820 * everything else (in young) to unreachable.
821 * NOTE: This used to move the reachable objects into a reachable
822 * set instead. But most things usually turn out to be reachable,
823 * so it's more efficient to move the unreachable things.
825 gc_list_init(&unreachable);
826 move_unreachable(young, &unreachable);
828 /* Move reachable objects to next generation. */
830 gc_list_merge(young, old);
832 /* All objects in unreachable are trash, but objects reachable from
833 * finalizers can't safely be deleted. Python programmers should take
834 * care not to create such things. For Python, finalizers means
835 * instance objects with __del__ methods. Weakrefs with callbacks
836 * can also call arbitrary Python code but they will be dealt with by
839 gc_list_init(&finalizers);
840 move_finalizers(&unreachable, &finalizers);
841 /* finalizers contains the unreachable objects with a finalizer;
842 * unreachable objects reachable *from* those are also uncollectable,
843 * and we move those into the finalizers list too.
845 move_finalizer_reachable(&finalizers);
847 /* Collect statistics on collectable objects found and print
848 * debugging information.
850 for (gc = unreachable.gc.gc_next; gc != &unreachable;
851 gc = gc->gc.gc_next) {
853 if (debug & DEBUG_COLLECTABLE) {
854 debug_cycle("collectable", FROM_GC(gc));
858 /* Clear weakrefs and invoke callbacks as necessary. */
859 m += handle_weakrefs(&unreachable, old);
861 /* Call tp_clear on objects in the unreachable set. This will cause
862 * the reference cycles to be broken. It may also cause some objects
863 * in finalizers to be freed.
865 delete_garbage(&unreachable, old);
867 /* Collect statistics on uncollectable objects found and print
868 * debugging information. */
869 for (gc = finalizers.gc.gc_next;
871 gc = gc->gc.gc_next) {
873 if (debug & DEBUG_UNCOLLECTABLE)
874 debug_cycle("uncollectable", FROM_GC(gc));
876 if (debug & DEBUG_STATS) {
877 double t2 = get_time();
878 if (m == 0 && n == 0)
879 PySys_WriteStderr("gc: done");
883 "%" PY_FORMAT_SIZE_T "d unreachable, "
884 "%" PY_FORMAT_SIZE_T "d uncollectable",
887 PySys_WriteStderr(", %.4fs elapsed", t2-t1);
889 PySys_WriteStderr(".\n");
892 /* Append instances in the uncollectable set to a Python
893 * reachable list of garbage. The programmer has to deal with
894 * this if they insist on creating this type of structure.
896 (void)handle_finalizers(&finalizers, old);
898 /* Clear free list only during the collection of the higest
900 if (generation == NUM_GENERATIONS-1) {
904 if (PyErr_Occurred()) {
906 gc_str = PyString_FromString("garbage collection");
907 PyErr_WriteUnraisable(gc_str);
908 Py_FatalError("unexpected exception during garbage collection");
914 collect_generations(void)
919 /* Find the oldest generation (higest numbered) where the count
920 * exceeds the threshold. Objects in the that generation and
921 * generations younger than it will be collected. */
922 for (i = NUM_GENERATIONS-1; i >= 0; i--) {
923 if (generations[i].count > generations[i].threshold) {
931 PyDoc_STRVAR(gc_enable__doc__,
934 "Enable automatic garbage collection.\n");
937 gc_enable(PyObject *self, PyObject *noargs)
944 PyDoc_STRVAR(gc_disable__doc__,
945 "disable() -> None\n"
947 "Disable automatic garbage collection.\n");
950 gc_disable(PyObject *self, PyObject *noargs)
957 PyDoc_STRVAR(gc_isenabled__doc__,
958 "isenabled() -> status\n"
960 "Returns true if automatic garbage collection is enabled.\n");
963 gc_isenabled(PyObject *self, PyObject *noargs)
965 return PyBool_FromLong((long)enabled);
968 PyDoc_STRVAR(gc_collect__doc__,
969 "collect([generation]) -> n\n"
971 "With no arguments, run a full collection. The optional argument\n"
972 "may be an integer specifying which generation to collect. A ValueError\n"
973 "is raised if the generation number is invalid.\n\n"
974 "The number of unreachable objects is returned.\n");
977 gc_collect(PyObject *self, PyObject *args, PyObject *kws)
979 static char *keywords[] = {"generation", NULL};
980 int genarg = NUM_GENERATIONS - 1;
983 if (!PyArg_ParseTupleAndKeywords(args, kws, "|i", keywords, &genarg))
986 else if (genarg < 0 || genarg >= NUM_GENERATIONS) {
987 PyErr_SetString(PyExc_ValueError, "invalid generation");
992 n = 0; /* already collecting, don't do anything */
999 return PyInt_FromSsize_t(n);
1002 PyDoc_STRVAR(gc_set_debug__doc__,
1003 "set_debug(flags) -> None\n"
1005 "Set the garbage collection debugging flags. Debugging information is\n"
1006 "written to sys.stderr.\n"
1008 "flags is an integer and can have the following bits turned on:\n"
1010 " DEBUG_STATS - Print statistics during collection.\n"
1011 " DEBUG_COLLECTABLE - Print collectable objects found.\n"
1012 " DEBUG_UNCOLLECTABLE - Print unreachable but uncollectable objects found.\n"
1013 " DEBUG_INSTANCES - Print instance objects.\n"
1014 " DEBUG_OBJECTS - Print objects other than instances.\n"
1015 " DEBUG_SAVEALL - Save objects to gc.garbage rather than freeing them.\n"
1016 " DEBUG_LEAK - Debug leaking programs (everything but STATS).\n");
1019 gc_set_debug(PyObject *self, PyObject *args)
1021 if (!PyArg_ParseTuple(args, "i:set_debug", &debug))
1028 PyDoc_STRVAR(gc_get_debug__doc__,
1029 "get_debug() -> flags\n"
1031 "Get the garbage collection debugging flags.\n");
1034 gc_get_debug(PyObject *self, PyObject *noargs)
1036 return Py_BuildValue("i", debug);
1039 PyDoc_STRVAR(gc_set_thresh__doc__,
1040 "set_threshold(threshold0, [threshold1, threshold2]) -> None\n"
1042 "Sets the collection thresholds. Setting threshold0 to zero disables\n"
1046 gc_set_thresh(PyObject *self, PyObject *args)
1049 if (!PyArg_ParseTuple(args, "i|ii:set_threshold",
1050 &generations[0].threshold,
1051 &generations[1].threshold,
1052 &generations[2].threshold))
1054 for (i = 2; i < NUM_GENERATIONS; i++) {
1055 /* generations higher than 2 get the same threshold */
1056 generations[i].threshold = generations[2].threshold;
1063 PyDoc_STRVAR(gc_get_thresh__doc__,
1064 "get_threshold() -> (threshold0, threshold1, threshold2)\n"
1066 "Return the current collection thresholds\n");
1069 gc_get_thresh(PyObject *self, PyObject *noargs)
1071 return Py_BuildValue("(iii)",
1072 generations[0].threshold,
1073 generations[1].threshold,
1074 generations[2].threshold);
1077 PyDoc_STRVAR(gc_get_count__doc__,
1078 "get_count() -> (count0, count1, count2)\n"
1080 "Return the current collection counts\n");
1083 gc_get_count(PyObject *self, PyObject *noargs)
1085 return Py_BuildValue("(iii)",
1086 generations[0].count,
1087 generations[1].count,
1088 generations[2].count);
1092 referrersvisit(PyObject* obj, PyObject *objs)
1095 for (i = 0; i < PyTuple_GET_SIZE(objs); i++)
1096 if (PyTuple_GET_ITEM(objs, i) == obj)
1102 gc_referrers_for(PyObject *objs, PyGC_Head *list, PyObject *resultlist)
1106 traverseproc traverse;
1107 for (gc = list->gc.gc_next; gc != list; gc = gc->gc.gc_next) {
1109 traverse = Py_TYPE(obj)->tp_traverse;
1110 if (obj == objs || obj == resultlist)
1112 if (traverse(obj, (visitproc)referrersvisit, objs)) {
1113 if (PyList_Append(resultlist, obj) < 0)
1114 return 0; /* error */
1117 return 1; /* no error */
1120 PyDoc_STRVAR(gc_get_referrers__doc__,
1121 "get_referrers(*objs) -> list\n\
1122 Return the list of objects that directly refer to any of objs.");
1125 gc_get_referrers(PyObject *self, PyObject *args)
1128 PyObject *result = PyList_New(0);
1129 if (!result) return NULL;
1131 for (i = 0; i < NUM_GENERATIONS; i++) {
1132 if (!(gc_referrers_for(args, GEN_HEAD(i), result))) {
1140 /* Append obj to list; return true if error (out of memory), false if OK. */
1142 referentsvisit(PyObject *obj, PyObject *list)
1144 return PyList_Append(list, obj) < 0;
1147 PyDoc_STRVAR(gc_get_referents__doc__,
1148 "get_referents(*objs) -> list\n\
1149 Return the list of objects that are directly referred to by objs.");
1152 gc_get_referents(PyObject *self, PyObject *args)
1155 PyObject *result = PyList_New(0);
1160 for (i = 0; i < PyTuple_GET_SIZE(args); i++) {
1161 traverseproc traverse;
1162 PyObject *obj = PyTuple_GET_ITEM(args, i);
1164 if (! PyObject_IS_GC(obj))
1166 traverse = Py_TYPE(obj)->tp_traverse;
1169 if (traverse(obj, (visitproc)referentsvisit, result)) {
1177 PyDoc_STRVAR(gc_get_objects__doc__,
1178 "get_objects() -> [...]\n"
1180 "Return a list of objects tracked by the collector (excluding the list\n"
1184 gc_get_objects(PyObject *self, PyObject *noargs)
1189 result = PyList_New(0);
1192 for (i = 0; i < NUM_GENERATIONS; i++) {
1193 if (append_objects(result, GEN_HEAD(i))) {
1202 PyDoc_STRVAR(gc__doc__,
1203 "This module provides access to the garbage collector for reference cycles.\n"
1205 "enable() -- Enable automatic garbage collection.\n"
1206 "disable() -- Disable automatic garbage collection.\n"
1207 "isenabled() -- Returns true if automatic collection is enabled.\n"
1208 "collect() -- Do a full collection right now.\n"
1209 "get_count() -- Return the current collection counts.\n"
1210 "set_debug() -- Set debugging flags.\n"
1211 "get_debug() -- Get debugging flags.\n"
1212 "set_threshold() -- Set the collection thresholds.\n"
1213 "get_threshold() -- Return the current the collection thresholds.\n"
1214 "get_objects() -- Return a list of all objects tracked by the collector.\n"
1215 "get_referrers() -- Return the list of objects that refer to an object.\n"
1216 "get_referents() -- Return the list of objects that an object refers to.\n");
1218 static PyMethodDef GcMethods[] = {
1219 {"enable", gc_enable, METH_NOARGS, gc_enable__doc__},
1220 {"disable", gc_disable, METH_NOARGS, gc_disable__doc__},
1221 {"isenabled", gc_isenabled, METH_NOARGS, gc_isenabled__doc__},
1222 {"set_debug", gc_set_debug, METH_VARARGS, gc_set_debug__doc__},
1223 {"get_debug", gc_get_debug, METH_NOARGS, gc_get_debug__doc__},
1224 {"get_count", gc_get_count, METH_NOARGS, gc_get_count__doc__},
1225 {"set_threshold", gc_set_thresh, METH_VARARGS, gc_set_thresh__doc__},
1226 {"get_threshold", gc_get_thresh, METH_NOARGS, gc_get_thresh__doc__},
1227 {"collect", (PyCFunction)gc_collect,
1228 METH_VARARGS | METH_KEYWORDS, gc_collect__doc__},
1229 {"get_objects", gc_get_objects,METH_NOARGS, gc_get_objects__doc__},
1230 {"get_referrers", gc_get_referrers, METH_VARARGS,
1231 gc_get_referrers__doc__},
1232 {"get_referents", gc_get_referents, METH_VARARGS,
1233 gc_get_referents__doc__},
1234 {NULL, NULL} /* Sentinel */
1242 m = Py_InitModule4("gc",
1246 PYTHON_API_VERSION);
1250 if (garbage == NULL) {
1251 garbage = PyList_New(0);
1252 if (garbage == NULL)
1256 if (PyModule_AddObject(m, "garbage", garbage) < 0)
1259 /* Importing can't be done in collect() because collect()
1260 * can be called via PyGC_Collect() in Py_Finalize().
1261 * This wouldn't be a problem, except that <initialized> is
1262 * reset to 0 before calling collect which trips up
1263 * the import and triggers an assertion.
1266 tmod = PyImport_ImportModuleNoBlock("time");
1271 #define ADD_INT(NAME) if (PyModule_AddIntConstant(m, #NAME, NAME) < 0) return
1272 ADD_INT(DEBUG_STATS);
1273 ADD_INT(DEBUG_COLLECTABLE);
1274 ADD_INT(DEBUG_UNCOLLECTABLE);
1275 ADD_INT(DEBUG_INSTANCES);
1276 ADD_INT(DEBUG_OBJECTS);
1277 ADD_INT(DEBUG_SAVEALL);
1278 ADD_INT(DEBUG_LEAK);
1282 /* API to invoke gc.collect() from C */
1289 n = 0; /* already collecting, don't do anything */
1292 n = collect(NUM_GENERATIONS - 1);
1301 _PyGC_Dump(PyGC_Head *g)
1303 _PyObject_Dump(FROM_GC(g));
1306 /* extension modules might be compiled with GC support so these
1307 functions must always be available */
1309 #undef PyObject_GC_Track
1310 #undef PyObject_GC_UnTrack
1311 #undef PyObject_GC_Del
1312 #undef _PyObject_GC_Malloc
1315 PyObject_GC_Track(void *op)
1317 _PyObject_GC_TRACK(op);
1320 /* for binary compatibility with 2.2 */
1322 _PyObject_GC_Track(PyObject *op)
1324 PyObject_GC_Track(op);
1328 PyObject_GC_UnTrack(void *op)
1330 /* Obscure: the Py_TRASHCAN mechanism requires that we be able to
1331 * call PyObject_GC_UnTrack twice on an object.
1334 _PyObject_GC_UNTRACK(op);
1337 /* for binary compatibility with 2.2 */
1339 _PyObject_GC_UnTrack(PyObject *op)
1341 PyObject_GC_UnTrack(op);
1345 _PyObject_GC_Malloc(size_t basicsize)
1349 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head))
1350 return PyErr_NoMemory();
1351 g = (PyGC_Head *)PyObject_MALLOC(
1352 sizeof(PyGC_Head) + basicsize);
1354 return PyErr_NoMemory();
1355 g->gc.gc_refs = GC_UNTRACKED;
1356 generations[0].count++; /* number of allocated GC objects */
1357 if (generations[0].count > generations[0].threshold &&
1359 generations[0].threshold &&
1361 !PyErr_Occurred()) {
1363 collect_generations();
1371 _PyObject_GC_New(PyTypeObject *tp)
1373 PyObject *op = _PyObject_GC_Malloc(_PyObject_SIZE(tp));
1375 op = PyObject_INIT(op, tp);
1380 _PyObject_GC_NewVar(PyTypeObject *tp, Py_ssize_t nitems)
1382 const size_t size = _PyObject_VAR_SIZE(tp, nitems);
1383 PyVarObject *op = (PyVarObject *) _PyObject_GC_Malloc(size);
1385 op = PyObject_INIT_VAR(op, tp, nitems);
1390 _PyObject_GC_Resize(PyVarObject *op, Py_ssize_t nitems)
1392 const size_t basicsize = _PyObject_VAR_SIZE(Py_TYPE(op), nitems);
1393 PyGC_Head *g = AS_GC(op);
1394 if (basicsize > PY_SSIZE_T_MAX - sizeof(PyGC_Head))
1395 return (PyVarObject *)PyErr_NoMemory();
1396 g = (PyGC_Head *)PyObject_REALLOC(g, sizeof(PyGC_Head) + basicsize);
1398 return (PyVarObject *)PyErr_NoMemory();
1399 op = (PyVarObject *) FROM_GC(g);
1400 Py_SIZE(op) = nitems;
1405 PyObject_GC_Del(void *op)
1407 PyGC_Head *g = AS_GC(op);
1410 if (generations[0].count > 0) {
1411 generations[0].count--;
1416 /* for binary compatibility with 2.2 */
1417 #undef _PyObject_GC_Del
1419 _PyObject_GC_Del(PyObject *op)
1421 PyObject_GC_Del(op);