Understand CPython's memory model: reference counting, garbage collection, tracemalloc for leak detection, __slots__ memory savings, weak references for caches, and array/memoryview for zero-copy data.
Background
CPython manages memory with two layers: reference counting (objects are freed immediately when count hits 0) and a cyclic garbage collector (handles obj.self = obj cycles). Understanding both lets you write code that doesn't leak memory, uses memory efficiently, and avoids GC pauses.
Time
35 minutes
Prerequisites
Lab 01 (Metaprogramming)
Tools
Docker: zchencow/innozverse-python:latest
Lab Instructions
Step 1: Reference Counting & sys.getrefcount
💡 sys.getrefcount(x) returns count + 1 because the function call itself creates a temporary reference to x as its argument. Every variable assignment, list element, dict value, and function argument increments the refcount. When it hits zero, __del__ is called and memory is freed immediately — no GC needed for non-cyclic objects.
docker run --rm zchencow/innozverse-python:latest python3 -c "
import sys, gc
# sys.getrefcount always returns +1 (the argument itself is a reference)
x = 'hello'
print(f'refs to x: {sys.getrefcount(x) - 1}')
lst = [x, x, x] # 3 more references
print(f'refs to x (in list): {sys.getrefcount(x) - 1}')
lst.clear()
print(f'refs to x (after clear): {sys.getrefcount(x) - 1}')
# Small integers and interned strings are cached
a = 42; b = 42
print(f'42 is 42: {a is b}') # True — CPython caches -5..256
c = 10000; d = 10000
print(f'10000 is 10000: {c is d}') # May be False
# String interning
s1 = sys.intern('hello_world')
s2 = sys.intern('hello_world')
print(f'interned strings: {s1 is s2}')
# Object lifecycle with __del__
class Tracked:
count = 0
def __init__(self, name):
self.name = name
Tracked.count += 1
print(f' Created: {name} (total={Tracked.count})')
def __del__(self):
Tracked.count -= 1
print(f' Deleted: {self.name} (total={Tracked.count})')
print()
print('=== Object lifecycle ===')
t1 = Tracked('A')
t2 = Tracked('B')
print(f'Before del: {Tracked.count}')
del t1
print(f'After del t1: {Tracked.count}')
t2 = None # drop last reference
gc.collect()
print(f'After t2=None: {Tracked.count}')
"
refs to x: 1
refs to x (in list): 4
refs to x (after clear): 1
42 is 42: True
10000 is 10000: False
interned strings: True
=== Object lifecycle ===
Created: A (total=1)
Created: B (total=2)
Before del: 2
Deleted: A (total=1)
After del t1: 1
Deleted: B (total=0)
After t2=None: 0
docker run --rm zchencow/innozverse-python:latest python3 -c "
import tracemalloc, gc
# Start tracing
tracemalloc.start(10) # keep 10-frame deep stack traces
# Snapshot before
snapshot1 = tracemalloc.take_snapshot()
# Simulate a leak: global list accumulates data
_cache = []
for i in range(500):
_cache.append({'id': i, 'name': f'Product-{i}', 'data': 'x' * 100})
snapshot2 = tracemalloc.take_snapshot()
# Compare
print('=== Top memory allocations (diff) ===')
stats = snapshot2.compare_to(snapshot1, 'lineno')
for stat in stats[:5]:
size_kb = stat.size_diff / 1024
print(f' {size_kb:+8.1f} KB {stat}')
# Peak memory
current, peak = tracemalloc.get_traced_memory()
print(f'Current: {current/1024:.1f} KB | Peak: {peak/1024:.1f} KB')
# Clear the leak and verify
_cache.clear()
gc.collect()
snapshot3 = tracemalloc.take_snapshot()
stats2 = snapshot3.compare_to(snapshot2, 'lineno')
freed = sum(s.size_diff for s in stats2) / 1024
print(f'After clear: freed ~{abs(freed):.1f} KB')
tracemalloc.stop()
# Object sizes comparison
import sys, array
print()
print('=== Object size survey ===')
objects = [
('int', 42),
('float', 3.14),
('bool', True),
('str-10', 'x'*10),
('str-1000', 'x'*1000),
('bytes-1000', b'x'*1000),
('list-empty', []),
('list-1000', list(range(1000))),
('tuple-1000', tuple(range(1000))),
('dict-empty', {}),
('dict-100', {i: i for i in range(100)}),
('set-100', set(range(100))),
('array.l-1000', array.array('l', range(1000))),
]
for name, obj in objects:
size = sys.getsizeof(obj)
print(f' {name:18s}: {size:>8,} bytes')
"