Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 37 additions & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,43 @@ require_once __DIR__ . '/../src/ZVec.php';
- `__destruct()` calls `close()` automatically if not already closed
- After `destroy()`, any method call causes **segfault** (handle invalidated)

### Memory Leak Regression Tests

The project includes regression tests to detect FFI memory leaks:

**Test files:**
- `tests/test_memory_collection_lifecycle.phpt` — 50x create/open/close/destroy cycle
- `tests/test_memory_deserialize_buffer.phpt` — 100x serialize/deserialize cycle
- `tests/test_memory_query_output_fields.phpt` — 100x query with/without output fields
- `tests/test_memory_init_error_path.phpt` — 20x init error/recovery cycles
- `tests/test_memory_delete_cstrings.phpt` — 30x insert/delete/fetch cycles

**Methodology:**
- PHP heap monitoring: `memory_get_usage()` delta between start and end
- Native memory monitoring: VmRSS from `/proc/self/status` (Linux only)
- Threshold: 500KB maximum growth per test scenario
- Each test uses `try-finally` with `uniqid()` temp directory cleanup

**Adding new memory tests:**
1. Create `tests/test_memory_<scenario>.phpt`
2. Record `memory_get_usage()` before the loop
3. Run the scenario N times (typically 20-100 iterations)
4. Record `memory_get_usage()` after the loop
5. Assert delta is within 500KB threshold
6. Use `try-finally` for cleanup

**VmRSS monitoring (optional, Linux only):**
```php
function getVmRSS(): int {
$status = @file_get_contents('/proc/self/status');
if ($status === false) return 0;
if (preg_match('/^VmRSS:\s+(\d+)\s+kB$/m', $status, $m)) {
return (int)$m[1];
}
return 0;
}
```

### Debug & Logging

```php
Expand Down
12 changes: 12 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,18 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

### Added

- **TEST-007: Memory leak regression tests for FFI memory safety** (#105)
- Added 5 `.phpt` test files for memory leak regression testing:
- `test_memory_collection_lifecycle.phpt` — 50x create/open/close/destroy cycle with memory growth tracking
- `test_memory_deserialize_buffer.phpt` — 100x serialize/deserialize cycle with buffer leak detection
- `test_memory_query_output_fields.phpt` — 100x query with/without output fields for C string leak detection
- `test_memory_init_error_path.phpt` — 20x init error/recovery cycles for C allocation leak detection
- `test_memory_delete_cstrings.phpt` — 30x insert/delete/fetch cycles for C string array leak detection
- Added `examples/07_memory_management.php` — FFI memory cleanup patterns (C string free, try-finally guards, VmRSS monitoring)
- Each test uses `try-finally` with `uniqid()` temp directory under `test_dbs/` and `exec("rm -rf ...")` cleanup
- Tests verify both PHP heap (`memory_get_usage()`) and native memory (VmRSS) stability
- 500KB threshold for memory growth detection across all test scenarios

- **SMELL-013: Migrated all classes to `CrazyGoat\ZVec\` namespace with PSR-4 autoloading** (#94)
- All library classes now live under `CrazyGoat\ZVec\` namespace
- Global class names preserved via `class_alias()` for backward compatibility
Expand Down
156 changes: 156 additions & 0 deletions examples/07_memory_management.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,156 @@
<?php

declare(strict_types=1);

/**
* Example 7: Memory Management — FFI memory cleanup patterns
*
* Demonstrates:
* - VmRSS monitoring for native memory leak detection
* - C string lifecycle (allocation via FFI, freeing via FFI::free)
* - try-finally guards for guaranteed cleanup
* - Collection lifecycle memory patterns
* - Serialize/deserialize buffer management
*/

require_once __DIR__ . '/../src/ZVec.php';

echo "=== Example 7: Memory Management ===\n\n";

ZVec::init(logType: ZVec::LOG_CONSOLE, logLevel: ZVec::LOG_WARN);

/**
* Get current VmRSS ( Resident Set Size ) in KB from /proc/self/status.
* This measures native (C/C++) memory usage, not PHP heap.
*/
function getVmRSS(): int {
$status = @file_get_contents('/proc/self/status');
if ($status === false) {
return 0; // Not on Linux
}
if (preg_match('/^VmRSS:\s+(\d+)\s+kB$/m', $status, $m)) {
return (int)$m[1];
}
return 0;
}

// --- 1. Collection lifecycle memory test ---
echo "[1] Collection lifecycle memory test\n";
$path1 = __DIR__ . '/../test_dbs/example_07_a_' . uniqid();

$schema = new ZVecSchema('mem_demo');
$schema->setMaxDocCountPerSegment(1000)
->addInt64('id', nullable: false)
->addVectorFp32('vec', dimension: 4, metricType: ZVecSchema::METRIC_IP);

$vmBefore = getVmRSS();

for ($i = 0; $i < 20; $i++) {
$c = ZVec::create($path1, $schema);
$doc = new ZVecDoc("doc{$i}");
$doc->setInt64('id', $i)->setVectorFp32('vec', [(float)$i, 0.0, 0.0, 0.0]);
$c->insert($doc);
$c->optimize();
$c->close();
$c = ZVec::open($path1);
$c->destroy();
}

$vmAfter = getVmRSS();
$deltaKb = $vmAfter - $vmBefore;
echo " VmRSS delta: {$deltaKb} KB (20x create/destroy)\n";

if ($deltaKb > 500) {
echo " WARNING: Possible native memory leak (+{$deltaKb} KB)\n";
} else {
echo " OK: No significant native memory growth\n";
}

// --- 2. Serialize/deserialize buffer test ---
echo "\n[2] Serialize/deserialize buffer test\n";

$vmBefore = getVmRSS();
$memBefore = memory_get_usage();

for ($i = 0; $i < 50; $i++) {
$doc = new ZVecDoc('test_pk');
$doc->setInt64('id', $i)
->setString('name', "item_{$i}")
->setVectorFp32('vec', [(float)$i, 1.0, 0.0, 0.0]);

$data = $doc->serialize();
$restored = ZVecDoc::deserialize($data);
// $restored is freed when it goes out of scope
}

$vmAfter = getVmRSS();
$memAfter = memory_get_usage();
$vmDeltaKb = $vmAfter - $vmBefore;
$phpDelta = $memAfter - $memBefore;

echo " VmRSS delta: {$vmDeltaKb} KB, PHP heap delta: {$phpDelta} bytes (50x serialize/deserialize)\n";

if ($vmDeltaKb > 200) {
echo " WARNING: Possible native buffer leak (+{$vmDeltaKb} KB)\n";
} else {
echo " OK: No native buffer memory growth\n";
}

// --- 3. C string cleanup pattern (try-finally) ---
echo "\n[3] C string cleanup pattern (try-finally)\n";
$path3 = __DIR__ . '/../test_dbs/example_07_b_' . uniqid();

$schema3 = new ZVecSchema('cstr_demo');
$schema3->setMaxDocCountPerSegment(1000)
->addInt64('id', nullable: false)
->addString('name', nullable: true)
->addVectorFp32('vec', dimension: 4, metricType: ZVecSchema::METRIC_IP);

$c = ZVec::create($path3, $schema3);

// Insert docs with string fields (each string is a C string allocation)
for ($i = 0; $i < 20; $i++) {
$doc = new ZVecDoc("doc{$i}");
$doc->setInt64('id', $i)
->setString('name', "product_name_{$i}_with_padding_to_make_it_longer")
->setVectorFp32('vec', [(float)$i, 0.0, 0.0, 0.0]);
$c->insert($doc);
}

$vmBefore = getVmRSS();

// Query with outputFields — each outputField name is a C string
// allocated via toCStringArray() and freed via freeCStringArray()
for ($i = 0; $i < 30; $i++) {
$results = $c->query(
'vec',
[1.0, 0.0, 0.0, 0.0],
topk: 5,
outputFields: ['id', 'name']
);
}

$vmAfter = getVmRSS();
$deltaKb = $vmAfter - $vmBefore;
echo " VmRSS delta: {$deltaKb} KB (30x query with outputFields)\n";

if ($deltaKb > 200) {
echo " WARNING: Possible C string leak (+{$deltaKb} KB)\n";
} else {
echo " OK: C strings properly freed\n";
}

$c->close();
exec("rm -rf " . escapeshellarg($path3));

// --- 4. Summary ---
echo "\n=== Memory Management Summary ===\n";
echo "Key patterns for preventing FFI memory leaks:\n";
echo " 1. Always use try-finally for C string arrays (toCStringArray/freeCStringArray)\n";
echo " 2. Free serialized buffers immediately after use (FFI::free)\n";
echo " 3. Collection destructors auto-call close() — but explicit close() is safer\n";
echo " 4. VmRSS monitoring catches native leaks that memory_get_usage() misses\n";
echo " 5. Never store FFI\\CData in long-lived PHP variables\n";

exec("rm -rf " . escapeshellarg($path1));
echo "\nDone!\n";
58 changes: 58 additions & 0 deletions tests/test_memory_collection_lifecycle.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
--TEST--
Memory leak: collection lifecycle (create/open/close/destroy) does not leak
--SKIPIF--
<?php if (!extension_loaded('ffi')) die('skip FFI extension not available'); ?>
--FILE--
<?php
require_once __DIR__ . '/../src/ZVec.php';

ZVec::init(logType: ZVec::LOG_CONSOLE, logLevel: ZVec::LOG_WARN);

$basePath = __DIR__ . '/../test_dbs/mem_lifecycle_';
$paths = [];

try {
$memBefore = memory_get_usage();

// 50x create/open/close/destroy cycle
for ($i = 0; $i < 50; $i++) {
$path = $basePath . uniqid();
$paths[] = $path;

$schema = new ZVecSchema('mem_test');
$schema->setMaxDocCountPerSegment(1000)
->addInt64('id', nullable: false)
->addVectorFp32('vec', dimension: 4, metricType: ZVecSchema::METRIC_IP);

$c = ZVec::create($path, $schema);
$doc = new ZVecDoc('doc1');
$doc->setInt64('id', 1)->setVectorFp32('vec', [1.0, 0.0, 0.0, 0.0]);
$c->insert($doc);
$c->optimize();
$c->close();

// Re-open and destroy
$c2 = ZVec::open($path);
$c2->destroy();
}

$memAfter = memory_get_usage();
$delta = $memAfter - $memBefore;

// Allow 500KB tolerance for PHP internal allocations
if ($delta > 500 * 1024) {
echo "FAIL: Memory grew by {$delta} bytes (threshold 500KB)\n";
exit(1);
}

echo "50x lifecycle OK (delta: {$delta} bytes)\n";
echo "PASS: Collection lifecycle does not leak\n";
} finally {
foreach ($paths as $p) {
exec("rm -rf " . escapeshellarg($p));
}
}
?>
--EXPECTF--
50x lifecycle OK (delta: %d bytes)
PASS: Collection lifecycle does not leak
92 changes: 92 additions & 0 deletions tests/test_memory_delete_cstrings.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
--TEST--
Memory leak: delete operations free C strings properly
--SKIPIF--
<?php if (!extension_loaded('ffi')) die('skip FFI extension not available'); ?>
--FILE--
<?php
require_once __DIR__ . '/../src/ZVec.php';

ZVec::init(logType: ZVec::LOG_CONSOLE, logLevel: ZVec::LOG_WARN);

$path = __DIR__ . '/../test_dbs/mem_delete_cstrings_' . uniqid();

try {
$schema = new ZVecSchema('mem_delete_test');
$schema->setMaxDocCountPerSegment(1000)
->addInt64('id', nullable: false)
->addVectorFp32('embedding', dimension: 4, metricType: ZVecSchema::METRIC_IP);

$c = ZVec::create($path, $schema);
$c->createHnswIndex('embedding', metricType: ZVecSchema::METRIC_IP, m: 16, efConstruction: 100);

$memBefore = memory_get_usage();

// 30x insert/delete cycle (exercises toCStringArray/freeCStringArray in delete)
for ($i = 0; $i < 30; $i++) {
// Insert 10 documents
for ($j = 0; $j < 10; $j++) {
$doc = new ZVecDoc("pk_{$i}_{$j}");
$doc->setInt64('id', $j)
->setVectorFp32('embedding', [(float)$j, 1.0, 0.0, 0.0]);
$c->insert($doc);
}

// Delete by PKs (multiple at once)
$pks = [];
for ($j = 0; $j < 10; $j++) {
$pks[] = "pk_{$i}_{$j}";
}
$c->delete(...$pks);
}

// Also test deleteByFilter
for ($i = 0; $i < 10; $i++) {
$doc = new ZVecDoc("filter_pk_{$i}");
$doc->setInt64('id', $i)
->setVectorFp32('embedding', [(float)$i, 0.0, 0.0, 0.0]);
$c->insert($doc);
}

for ($i = 0; $i < 10; $i++) {
$c->deleteByFilter("id = {$i}");
}

// Test fetch (also uses toCStringArray)
for ($i = 0; $i < 20; $i++) {
$doc = new ZVecDoc("fetch_pk_{$i}");
$doc->setInt64('id', $i)
->setVectorFp32('embedding', [(float)$i, 0.0, 0.0, 0.0]);
$c->insert($doc);
}

for ($i = 0; $i < 20; $i++) {
$results = $c->fetch("fetch_pk_{$i}");
assert(count($results) === 1, 'Should fetch exactly one doc');
}

// Cleanup fetched docs
$fetchPks = [];
for ($i = 0; $i < 20; $i++) {
$fetchPks[] = "fetch_pk_{$i}";
}
$c->delete(...$fetchPks);

$memAfter = memory_get_usage();
$delta = $memAfter - $memBefore;

// Allow 500KB tolerance
if ($delta > 500 * 1024) {
echo "FAIL: Memory grew by {$delta} bytes (threshold 500KB)\n";
exit(1);
}

echo "Delete/fetch cycles OK (delta: {$delta} bytes)\n";
$c->close();
echo "PASS: Delete operations free C strings properly\n";
} finally {
exec("rm -rf " . escapeshellarg($path));
}
?>
--EXPECTF--
Delete/fetch cycles OK (delta: %d bytes)
PASS: Delete operations free C strings properly
Loading
Loading