diff --git a/src/launchpad/size/treemap/known_libraries.py b/src/launchpad/size/treemap/known_libraries.py new file mode 100644 index 00000000..640461f1 --- /dev/null +++ b/src/launchpad/size/treemap/known_libraries.py @@ -0,0 +1,122 @@ +"""Catalog of well-known iOS libraries for treemap grouping. + +Statically-linked third-party libraries show up in the main binary as Swift module +nodes (e.g. ``Alamofire``) or Objective-C class nodes (e.g. ``FIRApp``). This catalog +lets the treemap builder recognize those nodes and group them under a single +``Libraries`` parent so reviewers can see how much size known SDKs contribute. + +Keep entries conservative: exact Swift module names, and distinctive Objective-C class +prefixes (3+ chars) that are unlikely to collide with first-party code. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field + + +@dataclass(frozen=True) +class KnownLibrary: + """A known iOS library and the identifiers used to recognize it.""" + + name: str + # Exact Swift module names that map to this library. + swift_modules: frozenset[str] = field(default_factory=frozenset) + # Distinctive Objective-C class-name prefixes that map to this library. + objc_prefixes: tuple[str, ...] = () + + +# Curated, non-exhaustive catalog of popular iOS libraries. Extend as needed. +KNOWN_LIBRARIES: tuple[KnownLibrary, ...] = ( + KnownLibrary("Sentry", frozenset({"Sentry", "SentrySwift", "SentrySwiftUI"}), ("Sentry",)), + KnownLibrary( + "Firebase", + frozenset( + { + "FirebaseCore", + "FirebaseCoreInternal", + "FirebaseAnalytics", + "FirebaseCrashlytics", + "FirebaseMessaging", + "FirebaseFirestore", + "FirebaseFirestoreSwift", + "FirebaseAuth", + "FirebaseDatabase", + "FirebaseStorage", + "FirebaseRemoteConfig", + "FirebaseInstallations", + "FirebasePerformance", + "FirebaseDynamicLinks", + "FirebaseInAppMessaging", + "FirebaseAppCheck", + } + ), + ("FIR", "FBLPromise", "GUL"), + ), + KnownLibrary("Alamofire", frozenset({"Alamofire"})), + KnownLibrary("Lottie", frozenset({"Lottie"})), + KnownLibrary("Kingfisher", frozenset({"Kingfisher"})), + KnownLibrary("SnapKit", frozenset({"SnapKit"})), + KnownLibrary("Nuke", frozenset({"Nuke", "NukeUI"})), + KnownLibrary("SDWebImage", frozenset({"SDWebImage", "SDWebImageSwiftUI"}), ("SDWeb", "SDImage", "SDAnimated")), + KnownLibrary("RxSwift", frozenset({"RxSwift", "RxCocoa", "RxRelay", "RxBlocking"})), + KnownLibrary("Realm", frozenset({"Realm", "RealmSwift"}), ("RLM",)), + KnownLibrary("GoogleMaps", frozenset({"GoogleMaps", "GoogleMapsBase"}), ("GMS",)), + KnownLibrary("GoogleSignIn", frozenset({"GoogleSignIn"}), ("GID",)), + KnownLibrary("Facebook", frozenset({"FacebookCore", "FacebookLogin", "FacebookShare"}), ("FBSDK",)), + KnownLibrary("AFNetworking", frozenset(), ("AFHTTP", "AFURL", "AFNetwork", "AFSecurity")), + KnownLibrary("Stripe", frozenset({"Stripe", "StripeCore", "StripePayments", "StripeUICore"}), ("STP",)), + KnownLibrary("Branch", frozenset({"BranchSDK"}), ("BNC", "Branch")), + KnownLibrary("Mixpanel", frozenset({"Mixpanel"}), ("Mixpanel",)), + KnownLibrary("Amplitude", frozenset({"Amplitude", "AmplitudeSwift"}), ("AMP",)), + KnownLibrary("Segment", frozenset({"Segment"}), ("SEG",)), + KnownLibrary("Adjust", frozenset({"Adjust", "AdjustSdk"}), ("ADJ",)), + KnownLibrary("AppsFlyer", frozenset({"AppsFlyerLib"}), ("AppsFlyer",)), + KnownLibrary("Bugsnag", frozenset({"Bugsnag"}), ("BSG", "Bugsnag")), + KnownLibrary( + "Datadog", frozenset({"Datadog", "DatadogCore", "DatadogLogs", "DatadogRUM", "DatadogTrace"}), ("DD",) + ), + KnownLibrary("Charts", frozenset({"Charts", "DGCharts"})), + KnownLibrary("SwiftyJSON", frozenset({"SwiftyJSON"})), + KnownLibrary("Moya", frozenset({"Moya"})), + KnownLibrary("PromiseKit", frozenset({"PromiseKit"})), + KnownLibrary("EmergeTools", frozenset({"EmergeSnapshots", "SnapshotPreferences", "SnapshotPreviewsCore"})), +) + + +def _build_swift_module_index() -> dict[str, str]: + index: dict[str, str] = {} + for library in KNOWN_LIBRARIES: + for module in library.swift_modules: + index[module] = library.name + return index + + +def _build_objc_prefix_index() -> tuple[tuple[str, str], ...]: + # Sort by descending prefix length so the most specific prefix wins. + pairs = [(prefix, library.name) for library in KNOWN_LIBRARIES for prefix in library.objc_prefixes] + pairs.sort(key=lambda pair: len(pair[0]), reverse=True) + return tuple(pairs) + + +_SWIFT_MODULE_INDEX = _build_swift_module_index() +_OBJC_PREFIX_INDEX = _build_objc_prefix_index() + + +def resolve_known_library(node_name: str) -> str | None: + """Resolve a treemap node name to a known library's canonical name. + + Matches an exact Swift module name first, then a distinctive Objective-C class + prefix. Returns ``None`` when the name doesn't correspond to a known library. + """ + if not node_name: + return None + + library = _SWIFT_MODULE_INDEX.get(node_name) + if library is not None: + return library + + for prefix, library_name in _OBJC_PREFIX_INDEX: + if node_name.startswith(prefix): + return library_name + + return None diff --git a/src/launchpad/size/treemap/macho_element_builder.py b/src/launchpad/size/treemap/macho_element_builder.py index 0e8fd163..59400c68 100644 --- a/src/launchpad/size/treemap/macho_element_builder.py +++ b/src/launchpad/size/treemap/macho_element_builder.py @@ -8,6 +8,7 @@ from launchpad.size.models.treemap import TreemapElement, TreemapType from launchpad.size.symbols.partitioner import SymbolInfo from launchpad.size.symbols.types import SwiftSymbolTypeGroup +from launchpad.size.treemap.known_libraries import resolve_known_library from launchpad.size.treemap.treemap_element_builder import TreemapElementBuilder from launchpad.utils.logging import get_logger @@ -131,9 +132,13 @@ def debit_section(seg_name: str | None, sec_name: str | None, sz: int) -> int: # Add symbols if this slice has symbol_info (only primary slice will have this) if arch_slice.symbol_info: + # Swift module and ObjC class nodes are collected separately so recognized + # third-party libraries can be grouped under a single "Libraries" node. + symbol_children: List[TreemapElement] = [] + self._add_swift_symbols( arch_slice.symbol_info, - binary_children, + symbol_children, section_subtractions, debit_section, canonical_key, @@ -142,13 +147,16 @@ def debit_section(seg_name: str | None, sec_name: str | None, sz: int) -> int: self._add_objc_symbols( arch_slice.symbol_info, - binary_children, + symbol_children, section_subtractions, debit_section, canonical_key, zerofill_sections_set, ) + binary_children.extend(self._group_known_libraries(symbol_children)) + + # "Other Symbols" (C/C++/compiler-generated) is not a library and is not grouped. self._add_other_symbols( arch_slice.symbol_info, binary_children, @@ -169,6 +177,60 @@ def debit_section(seg_name: str | None, sec_name: str | None, sz: int) -> int: return binary_children + def _group_known_libraries(self, symbol_children: List[TreemapElement]) -> List[TreemapElement]: + """Group recognized third-party library nodes under a single "Libraries" node. + + Swift module and ObjC class nodes whose names map to a known library (via + ``resolve_known_library``) are collected under a "Libraries" parent; everything + else keeps its original position and order. Sizes are preserved. + """ + result: List[TreemapElement] = [] + # Preserve first-seen order of libraries and their matched nodes. + library_nodes: Dict[str, List[TreemapElement]] = {} + + for node in symbol_children: + library_name = resolve_known_library(node.name) + if library_name is None: + result.append(node) + else: + library_nodes.setdefault(library_name, []).append(node) + + if not library_nodes: + return result + + library_children: List[TreemapElement] = [] + for library_name, nodes in library_nodes.items(): + # Collapse the redundant "Library -> Library" case (single node, same name). + if len(nodes) == 1 and nodes[0].name == library_name: + library_children.append(nodes[0]) + continue + + library_children.append( + TreemapElement( + name=library_name, + size=sum(n.size for n in nodes), + type=TreemapType.MODULES, + path=None, + is_dir=False, + children=sorted(nodes, key=lambda n: n.size, reverse=True), + ) + ) + + library_children.sort(key=lambda n: n.size, reverse=True) + + result.append( + TreemapElement( + name="Libraries", + size=sum(n.size for n in library_children), + type=TreemapType.MODULES, + path=None, + is_dir=False, + children=library_children, + ) + ) + + return result + def _build_arch_slice_metadata(self, arch_slice: ArchitectureSlice) -> List[TreemapElement]: """Build metadata components for an architecture slice.""" metadata_children: List[TreemapElement] = [] diff --git a/tests/unit/size/treemap/test_known_libraries.py b/tests/unit/size/treemap/test_known_libraries.py new file mode 100644 index 00000000..b9a846ff --- /dev/null +++ b/tests/unit/size/treemap/test_known_libraries.py @@ -0,0 +1,32 @@ +"""Unit tests for the known-libraries catalog.""" + +from launchpad.size.treemap.known_libraries import resolve_known_library + + +def test_resolves_exact_swift_module() -> None: + assert resolve_known_library("Alamofire") == "Alamofire" + assert resolve_known_library("Lottie") == "Lottie" + assert resolve_known_library("RxCocoa") == "RxSwift" + + +def test_groups_multiple_modules_to_one_library() -> None: + assert resolve_known_library("FirebaseCore") == "Firebase" + assert resolve_known_library("FirebaseAnalytics") == "Firebase" + + +def test_resolves_objc_class_prefix() -> None: + assert resolve_known_library("FIRApp") == "Firebase" + assert resolve_known_library("RLMRealm") == "Realm" + assert resolve_known_library("GMSMapView") == "GoogleMaps" + + +def test_swift_module_takes_priority_over_prefix() -> None: + # "Sentry" is both an exact module and an ObjC prefix; either way it resolves + # to the same library, but the exact-module path should win. + assert resolve_known_library("Sentry") == "Sentry" + + +def test_unknown_and_empty_names_return_none() -> None: + assert resolve_known_library("MyApp") is None + assert resolve_known_library("AppViewModel") is None + assert resolve_known_library("") is None diff --git a/tests/unit/size/treemap/test_macho_element_builder.py b/tests/unit/size/treemap/test_macho_element_builder.py index 9fbda8b5..9eba8a50 100644 --- a/tests/unit/size/treemap/test_macho_element_builder.py +++ b/tests/unit/size/treemap/test_macho_element_builder.py @@ -11,10 +11,54 @@ SegmentInfo, ) from launchpad.size.models.common import FileInfo -from launchpad.size.models.treemap import TreemapType +from launchpad.size.models.treemap import TreemapElement, TreemapType +from launchpad.size.symbols.macho_symbol_sizes import SymbolSize +from launchpad.size.symbols.partitioner import SymbolInfo +from launchpad.size.symbols.types import SwiftSymbolTypeGroup from launchpad.size.treemap.macho_element_builder import MachOElementBuilder +def _swift_group(module: str, type_name: str, size: int) -> SwiftSymbolTypeGroup: + """Build a SwiftSymbolTypeGroup with a single symbol living in __TEXT.__text.""" + symbol = SymbolSize( + mangled_name=f"_{module}_{type_name}", + section_name="__text", + segment_name="__TEXT", + address=0, + size=size, + ) + return SwiftSymbolTypeGroup( + module=module, + type_name=type_name, + components=[module, type_name], + symbol_count=1, + symbols=[symbol], + ) + + +def _create_arch_slice_with_swift_modules( + arch_name: str, size: int, groups: list[SwiftSymbolTypeGroup] +) -> ArchitectureSlice: + """Like _create_arch_slice but attaches Swift symbol groups to the slice.""" + arch_slice = _create_arch_slice(arch_name, size) + arch_slice.symbol_info = SymbolInfo( + symbol_sizes=[s for g in groups for s in g.symbols], + swift_type_groups=groups, + objc_type_groups=[], + cpp_type_groups=[], + other_symbols=[], + compiler_generated_symbols=[], + ) + return arch_slice + + +def _find_child(element: TreemapElement, name: str) -> TreemapElement | None: + for child in element.children: + if child.name == name: + return child + return None + + def _create_arch_slice(arch_name: str, size: int) -> ArchitectureSlice: """Helper to create an ArchitectureSlice with minimal required fields.""" return ArchitectureSlice( @@ -174,3 +218,65 @@ def test_binary_not_in_map_returns_none(self): element = builder.build_element(file_info, "UnknownBinary") assert element is None + + +class TestMachOElementBuilderKnownLibraryGrouping: + """Tests for grouping recognized library modules under a "Libraries" node.""" + + def _build(self, groups: list[SwiftSymbolTypeGroup]) -> TreemapElement: + arch_slice = _create_arch_slice_with_swift_modules("ARM64", size=100000, groups=groups) + binary_analysis = _create_binary_analysis("MyApp", [arch_slice]) + builder = MachOElementBuilder( + filesystem_block_size=4096, + binary_analysis_map={"MyApp": binary_analysis}, + ) + element = builder.build_element(_create_file_info("MyApp", 100000), "MyApp") + assert element is not None + return element + + def test_known_library_grouped_app_module_stays_flat(self): + element = self._build( + [ + _swift_group("Alamofire", "Session", 500), + _swift_group("MyApp", "ViewController", 300), + ] + ) + + # App module remains a direct child of the binary. + my_app = _find_child(element, "MyApp") + assert my_app is not None + assert my_app.size == 300 + + # Recognized library is moved under a "Libraries" node. + assert _find_child(element, "Alamofire") is None + libraries = _find_child(element, "Libraries") + assert libraries is not None + assert libraries.type == TreemapType.MODULES + + alamofire = _find_child(libraries, "Alamofire") + assert alamofire is not None + assert alamofire.size == 500 + assert libraries.size == 500 + + def test_multiple_modules_collapse_into_one_library(self): + element = self._build( + [ + _swift_group("FirebaseCore", "App", 400), + _swift_group("FirebaseAnalytics", "Logger", 600), + ] + ) + + libraries = _find_child(element, "Libraries") + assert libraries is not None + + firebase = _find_child(libraries, "Firebase") + assert firebase is not None + assert firebase.size == 1000 + child_names = {c.name for c in firebase.children} + assert child_names == {"FirebaseCore", "FirebaseAnalytics"} + + def test_no_libraries_node_when_nothing_recognized(self): + element = self._build([_swift_group("MyApp", "ViewController", 300)]) + + assert _find_child(element, "Libraries") is None + assert _find_child(element, "MyApp") is not None