Memory leaks at cattrs converter #90

Reskov · 2024-05-17T09:19:35Z

I suppose here is a memory leak. cattrs converter function use linecache by default to store a byte code of the generated converted function, so each time we called _get_cattrs_converter our custom dict function is cached.

POC

Click to expand

from appstoreserverlibrary.models.JWSTransactionDecodedPayload import JWSTransactionDecodedPayload
from appstoreserverlibrary.models.LibraryUtility import _get_cattrs_converter

import gc


def leaky_function():
    c = _get_cattrs_converter(JWSTransactionDecodedPayload)  # cattrs.Converter()
    c.structure(
        {"originalTransactionId": "123"},
        JWSTransactionDecodedPayload,
    )


def count_objects_by_type():
    gc.collect()

    type_count = {}
    for obj in gc.get_objects():
        obj_type = type(obj)
        type_count[obj_type] = type_count.get(obj_type, 0) + 1
    return type_count


def diff_snapshots(before, after):
    diff = {}
    for key in after:
        before_count = before.get(key, 0)
        after_count = after[key]
        if after_count != before_count:
            diff[key] = (before_count, after_count, after_count - before_count)
    for key in before:
        if key not in after:
            diff[key] = (before[key], 0, -before[key])
    return diff


def print_diff(diff):
    sorted_diff = sorted(diff.items(), key=lambda item: abs(item[1][2]), reverse=True)

    print("Type                 | Before | After | Diff")
    print("---------------------|--------|-------|------")
    for key, (before_count, after_count, diff_count) in sorted_diff:
        print(
            f"{key.__name__:<20} | {before_count:6} | {after_count:6} | {diff_count:+6}")


def test_memory_leak():
    print("Taking snapshot before calling leaky_function...")
    before = count_objects_by_type()

    # Call the suspected leaky function multiple times
    for _ in range(1000):
        leaky_function()

    print("Taking snapshot after calling leaky_function...")
    after = count_objects_by_type()

    diff = diff_snapshots(before, after)
    print_diff(diff)


if __name__ == "__main__":
    test_memory_leak()

Taking snapshot before calling leaky_function...
Taking snapshot after calling leaky_function...
Type                 | Before | After | Diff
---------------------|--------|-------|------
list                 |    460 |   1460 |  +1000
tuple                |   2052 |   3052 |  +1000
dict                 |   1493 |   1497 |     +4

Leaking code

app-store-server-library-python/appstoreserverlibrary/models/LibraryUtility.py

Lines 67 to 68 in 26f2682

    
           c.register_structure_hook_factory(has, lambda cl: make_dict_structure_fn(cl, c, **cattrs_overrides)) 
        
           c.register_unstructure_hook_factory(has, lambda cl: make_dict_unstructure_fn(cl, c, **cattrs_overrides))

Possible fix

Looks like we can cache entirely _get_cattrs_converter

@lru_cache(maxsize=None)
def _get_cattrs_converter(destination_class: Type[T]) -> cattrs.Converter:

or disable line caching at the cattrs

def _get_cattrs_converter
...
make_dict_structure_fn(cl, c, _cattrs_use_linecache=False,

The text was updated successfully, but these errors were encountered:

Reskov mentioned this issue May 20, 2024

cache cattrs converter to prevent memory leak #92

Merged

alexanderjordanbaker closed this as completed in #92 May 22, 2024

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Memory leaks at cattrs converter #90

Memory leaks at cattrs converter #90

Reskov commented May 17, 2024 •

edited

Memory leaks at cattrs converter #90

Memory leaks at cattrs converter #90

Comments

Reskov commented May 17, 2024 • edited

POC

Leaking code

Possible fix

Reskov commented May 17, 2024 •

edited