Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 122 additions & 0 deletions src/launchpad/size/treemap/known_libraries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
"""Catalog of well-known iOS libraries for treemap grouping.

Statically-linked third-party libraries show up in the main binary as Swift module
nodes (e.g. ``Alamofire``) or Objective-C class nodes (e.g. ``FIRApp``). This catalog
lets the treemap builder recognize those nodes and group them under a single
``Libraries`` parent so reviewers can see how much size known SDKs contribute.

Keep entries conservative: exact Swift module names, and distinctive Objective-C class
prefixes (3+ chars) that are unlikely to collide with first-party code.
"""

from __future__ import annotations

from dataclasses import dataclass, field


@dataclass(frozen=True)
class KnownLibrary:
"""A known iOS library and the identifiers used to recognize it."""

name: str
# Exact Swift module names that map to this library.
swift_modules: frozenset[str] = field(default_factory=frozenset)
# Distinctive Objective-C class-name prefixes that map to this library.
objc_prefixes: tuple[str, ...] = ()


# Curated, non-exhaustive catalog of popular iOS libraries. Extend as needed.
KNOWN_LIBRARIES: tuple[KnownLibrary, ...] = (
KnownLibrary("Sentry", frozenset({"Sentry", "SentrySwift", "SentrySwiftUI"}), ("Sentry",)),
KnownLibrary(
"Firebase",
frozenset(
{
"FirebaseCore",
"FirebaseCoreInternal",
"FirebaseAnalytics",
"FirebaseCrashlytics",
"FirebaseMessaging",
"FirebaseFirestore",
"FirebaseFirestoreSwift",
"FirebaseAuth",
"FirebaseDatabase",
"FirebaseStorage",
"FirebaseRemoteConfig",
"FirebaseInstallations",
"FirebasePerformance",
"FirebaseDynamicLinks",
"FirebaseInAppMessaging",
"FirebaseAppCheck",
}
),
("FIR", "FBLPromise", "GUL"),
),
KnownLibrary("Alamofire", frozenset({"Alamofire"})),
KnownLibrary("Lottie", frozenset({"Lottie"})),
KnownLibrary("Kingfisher", frozenset({"Kingfisher"})),
KnownLibrary("SnapKit", frozenset({"SnapKit"})),
KnownLibrary("Nuke", frozenset({"Nuke", "NukeUI"})),
KnownLibrary("SDWebImage", frozenset({"SDWebImage", "SDWebImageSwiftUI"}), ("SDWeb", "SDImage", "SDAnimated")),
KnownLibrary("RxSwift", frozenset({"RxSwift", "RxCocoa", "RxRelay", "RxBlocking"})),
KnownLibrary("Realm", frozenset({"Realm", "RealmSwift"}), ("RLM",)),
KnownLibrary("GoogleMaps", frozenset({"GoogleMaps", "GoogleMapsBase"}), ("GMS",)),
KnownLibrary("GoogleSignIn", frozenset({"GoogleSignIn"}), ("GID",)),
KnownLibrary("Facebook", frozenset({"FacebookCore", "FacebookLogin", "FacebookShare"}), ("FBSDK",)),
KnownLibrary("AFNetworking", frozenset(), ("AFHTTP", "AFURL", "AFNetwork", "AFSecurity")),
KnownLibrary("Stripe", frozenset({"Stripe", "StripeCore", "StripePayments", "StripeUICore"}), ("STP",)),
KnownLibrary("Branch", frozenset({"BranchSDK"}), ("BNC", "Branch")),
KnownLibrary("Mixpanel", frozenset({"Mixpanel"}), ("Mixpanel",)),
KnownLibrary("Amplitude", frozenset({"Amplitude", "AmplitudeSwift"}), ("AMP",)),
KnownLibrary("Segment", frozenset({"Segment"}), ("SEG",)),
KnownLibrary("Adjust", frozenset({"Adjust", "AdjustSdk"}), ("ADJ",)),
KnownLibrary("AppsFlyer", frozenset({"AppsFlyerLib"}), ("AppsFlyer",)),
KnownLibrary("Bugsnag", frozenset({"Bugsnag"}), ("BSG", "Bugsnag")),
KnownLibrary(
"Datadog", frozenset({"Datadog", "DatadogCore", "DatadogLogs", "DatadogRUM", "DatadogTrace"}), ("DD",)
),
KnownLibrary("Charts", frozenset({"Charts", "DGCharts"})),
KnownLibrary("SwiftyJSON", frozenset({"SwiftyJSON"})),
KnownLibrary("Moya", frozenset({"Moya"})),
KnownLibrary("PromiseKit", frozenset({"PromiseKit"})),
KnownLibrary("EmergeTools", frozenset({"EmergeSnapshots", "SnapshotPreferences", "SnapshotPreviewsCore"})),
)


def _build_swift_module_index() -> dict[str, str]:
index: dict[str, str] = {}
for library in KNOWN_LIBRARIES:
for module in library.swift_modules:
index[module] = library.name
return index


def _build_objc_prefix_index() -> tuple[tuple[str, str], ...]:
# Sort by descending prefix length so the most specific prefix wins.
pairs = [(prefix, library.name) for library in KNOWN_LIBRARIES for prefix in library.objc_prefixes]
pairs.sort(key=lambda pair: len(pair[0]), reverse=True)
return tuple(pairs)


_SWIFT_MODULE_INDEX = _build_swift_module_index()
_OBJC_PREFIX_INDEX = _build_objc_prefix_index()


def resolve_known_library(node_name: str) -> str | None:
"""Resolve a treemap node name to a known library's canonical name.

Matches an exact Swift module name first, then a distinctive Objective-C class
prefix. Returns ``None`` when the name doesn't correspond to a known library.
"""
if not node_name:
return None

library = _SWIFT_MODULE_INDEX.get(node_name)
if library is not None:
return library

for prefix, library_name in _OBJC_PREFIX_INDEX:
if node_name.startswith(prefix):
return library_name

return None

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Prefix matching mislabels modules

Medium Severity

resolve_known_library runs Objective-C prefix checks on every symbol child name, including Swift module nodes, so first-party modules that only share a prefix (for example Branch or SentryUI) can be grouped under third-party Libraries. The Datadog DD prefix is two characters and matches unrelated ObjC types such as CocoaLumberjack’s DDLog, despite the catalog comment requiring 3+ character prefixes.

Additional Locations (1)
Fix in Cursor Fix in Web

Reviewed by Cursor Bugbot for commit 7282aeb. Configure here.

66 changes: 64 additions & 2 deletions src/launchpad/size/treemap/macho_element_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from launchpad.size.models.treemap import TreemapElement, TreemapType
from launchpad.size.symbols.partitioner import SymbolInfo
from launchpad.size.symbols.types import SwiftSymbolTypeGroup
from launchpad.size.treemap.known_libraries import resolve_known_library
from launchpad.size.treemap.treemap_element_builder import TreemapElementBuilder
from launchpad.utils.logging import get_logger

Expand Down Expand Up @@ -131,9 +132,13 @@ def debit_section(seg_name: str | None, sec_name: str | None, sz: int) -> int:

# Add symbols if this slice has symbol_info (only primary slice will have this)
if arch_slice.symbol_info:
# Swift module and ObjC class nodes are collected separately so recognized
# third-party libraries can be grouped under a single "Libraries" node.
symbol_children: List[TreemapElement] = []

self._add_swift_symbols(
arch_slice.symbol_info,
binary_children,
symbol_children,
section_subtractions,
debit_section,
canonical_key,
Expand All @@ -142,13 +147,16 @@ def debit_section(seg_name: str | None, sec_name: str | None, sz: int) -> int:

self._add_objc_symbols(
arch_slice.symbol_info,
binary_children,
symbol_children,
section_subtractions,
debit_section,
canonical_key,
zerofill_sections_set,
)

binary_children.extend(self._group_known_libraries(symbol_children))

# "Other Symbols" (C/C++/compiler-generated) is not a library and is not grouped.
self._add_other_symbols(
arch_slice.symbol_info,
binary_children,
Expand All @@ -169,6 +177,60 @@ def debit_section(seg_name: str | None, sec_name: str | None, sz: int) -> int:

return binary_children

def _group_known_libraries(self, symbol_children: List[TreemapElement]) -> List[TreemapElement]:
"""Group recognized third-party library nodes under a single "Libraries" node.

Swift module and ObjC class nodes whose names map to a known library (via
``resolve_known_library``) are collected under a "Libraries" parent; everything
else keeps its original position and order. Sizes are preserved.
"""
result: List[TreemapElement] = []
# Preserve first-seen order of libraries and their matched nodes.
library_nodes: Dict[str, List[TreemapElement]] = {}

for node in symbol_children:
library_name = resolve_known_library(node.name)
if library_name is None:
result.append(node)
else:
library_nodes.setdefault(library_name, []).append(node)

if not library_nodes:
return result

library_children: List[TreemapElement] = []
for library_name, nodes in library_nodes.items():
# Collapse the redundant "Library -> Library" case (single node, same name).
if len(nodes) == 1 and nodes[0].name == library_name:
library_children.append(nodes[0])
continue

library_children.append(
TreemapElement(
name=library_name,
size=sum(n.size for n in nodes),
type=TreemapType.MODULES,
path=None,
is_dir=False,
children=sorted(nodes, key=lambda n: n.size, reverse=True),
)
)

library_children.sort(key=lambda n: n.size, reverse=True)

result.append(
TreemapElement(
name="Libraries",
size=sum(n.size for n in library_children),
type=TreemapType.MODULES,
path=None,
is_dir=False,
children=library_children,
)
)

return result

def _build_arch_slice_metadata(self, arch_slice: ArchitectureSlice) -> List[TreemapElement]:
"""Build metadata components for an architecture slice."""
metadata_children: List[TreemapElement] = []
Expand Down
32 changes: 32 additions & 0 deletions tests/unit/size/treemap/test_known_libraries.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
"""Unit tests for the known-libraries catalog."""

from launchpad.size.treemap.known_libraries import resolve_known_library


def test_resolves_exact_swift_module() -> None:
assert resolve_known_library("Alamofire") == "Alamofire"
assert resolve_known_library("Lottie") == "Lottie"
assert resolve_known_library("RxCocoa") == "RxSwift"


def test_groups_multiple_modules_to_one_library() -> None:
assert resolve_known_library("FirebaseCore") == "Firebase"
assert resolve_known_library("FirebaseAnalytics") == "Firebase"


def test_resolves_objc_class_prefix() -> None:
assert resolve_known_library("FIRApp") == "Firebase"
assert resolve_known_library("RLMRealm") == "Realm"
assert resolve_known_library("GMSMapView") == "GoogleMaps"


def test_swift_module_takes_priority_over_prefix() -> None:
# "Sentry" is both an exact module and an ObjC prefix; either way it resolves
# to the same library, but the exact-module path should win.
assert resolve_known_library("Sentry") == "Sentry"


def test_unknown_and_empty_names_return_none() -> None:
assert resolve_known_library("MyApp") is None
assert resolve_known_library("AppViewModel") is None
assert resolve_known_library("") is None
108 changes: 107 additions & 1 deletion tests/unit/size/treemap/test_macho_element_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,54 @@
SegmentInfo,
)
from launchpad.size.models.common import FileInfo
from launchpad.size.models.treemap import TreemapType
from launchpad.size.models.treemap import TreemapElement, TreemapType
from launchpad.size.symbols.macho_symbol_sizes import SymbolSize
from launchpad.size.symbols.partitioner import SymbolInfo
from launchpad.size.symbols.types import SwiftSymbolTypeGroup
from launchpad.size.treemap.macho_element_builder import MachOElementBuilder


def _swift_group(module: str, type_name: str, size: int) -> SwiftSymbolTypeGroup:
"""Build a SwiftSymbolTypeGroup with a single symbol living in __TEXT.__text."""
symbol = SymbolSize(
mangled_name=f"_{module}_{type_name}",
section_name="__text",
segment_name="__TEXT",
address=0,
size=size,
)
return SwiftSymbolTypeGroup(
module=module,
type_name=type_name,
components=[module, type_name],
symbol_count=1,
symbols=[symbol],
)


def _create_arch_slice_with_swift_modules(
arch_name: str, size: int, groups: list[SwiftSymbolTypeGroup]
) -> ArchitectureSlice:
"""Like _create_arch_slice but attaches Swift symbol groups to the slice."""
arch_slice = _create_arch_slice(arch_name, size)
arch_slice.symbol_info = SymbolInfo(
symbol_sizes=[s for g in groups for s in g.symbols],
swift_type_groups=groups,
objc_type_groups=[],
cpp_type_groups=[],
other_symbols=[],
compiler_generated_symbols=[],
)
return arch_slice


def _find_child(element: TreemapElement, name: str) -> TreemapElement | None:
for child in element.children:
if child.name == name:
return child
return None


def _create_arch_slice(arch_name: str, size: int) -> ArchitectureSlice:
"""Helper to create an ArchitectureSlice with minimal required fields."""
return ArchitectureSlice(
Expand Down Expand Up @@ -174,3 +218,65 @@ def test_binary_not_in_map_returns_none(self):
element = builder.build_element(file_info, "UnknownBinary")

assert element is None


class TestMachOElementBuilderKnownLibraryGrouping:
"""Tests for grouping recognized library modules under a "Libraries" node."""

def _build(self, groups: list[SwiftSymbolTypeGroup]) -> TreemapElement:
arch_slice = _create_arch_slice_with_swift_modules("ARM64", size=100000, groups=groups)
binary_analysis = _create_binary_analysis("MyApp", [arch_slice])
builder = MachOElementBuilder(
filesystem_block_size=4096,
binary_analysis_map={"MyApp": binary_analysis},
)
element = builder.build_element(_create_file_info("MyApp", 100000), "MyApp")
assert element is not None
return element

def test_known_library_grouped_app_module_stays_flat(self):
element = self._build(
[
_swift_group("Alamofire", "Session", 500),
_swift_group("MyApp", "ViewController", 300),
]
)

# App module remains a direct child of the binary.
my_app = _find_child(element, "MyApp")
assert my_app is not None
assert my_app.size == 300

# Recognized library is moved under a "Libraries" node.
assert _find_child(element, "Alamofire") is None
libraries = _find_child(element, "Libraries")
assert libraries is not None
assert libraries.type == TreemapType.MODULES

alamofire = _find_child(libraries, "Alamofire")
assert alamofire is not None
assert alamofire.size == 500
assert libraries.size == 500

def test_multiple_modules_collapse_into_one_library(self):
element = self._build(
[
_swift_group("FirebaseCore", "App", 400),
_swift_group("FirebaseAnalytics", "Logger", 600),
]
)

libraries = _find_child(element, "Libraries")
assert libraries is not None

firebase = _find_child(libraries, "Firebase")
assert firebase is not None
assert firebase.size == 1000
child_names = {c.name for c in firebase.children}
assert child_names == {"FirebaseCore", "FirebaseAnalytics"}

def test_no_libraries_node_when_nothing_recognized(self):
element = self._build([_swift_group("MyApp", "ViewController", 300)])

assert _find_child(element, "Libraries") is None
assert _find_child(element, "MyApp") is not None
Loading