diff --git a/src/ZVec.php b/src/ZVec.php index 071c876..5db7010 100644 --- a/src/ZVec.php +++ b/src/ZVec.php @@ -250,7 +250,7 @@ private static function parseGroupResult(FFI\CData $result): array return $groups; } - public static function create(string $path, ZVecSchema $schema, bool $readOnly = false, bool $enableMmap = true, int $maxBufferSize = 67108864): self + public static function create(string $path, ZVecSchema $schema, bool $readOnly = false, bool $enableMmap = true, int $maxBufferSize = self::DEFAULT_MAX_BUFFER_SIZE): self { $path = self::validateCollectionPath($path); $ffi = self::ffi(); @@ -260,7 +260,7 @@ public static function create(string $path, ZVecSchema $schema, bool $readOnly = return new self($out, $path); } - public static function open(string $path, bool $readOnly = false, bool $enableMmap = true, int $maxBufferSize = 67108864): self + public static function open(string $path, bool $readOnly = false, bool $enableMmap = true, int $maxBufferSize = self::DEFAULT_MAX_BUFFER_SIZE): self { $path = self::validateCollectionPath($path); $ffi = self::ffi(); @@ -336,7 +336,7 @@ public function destroy(): void // Re-open via stored path so we can destroy the on-disk data $ffi = self::ffi(); $newHandle = $ffi->new('zvec_collection_t'); - $status = $ffi->zvec_collection_open($this->path, 0, 1, 67108864, FFI::addr($newHandle)); + $status = $ffi->zvec_collection_open($this->path, 0, 1, self::DEFAULT_MAX_BUFFER_SIZE, FFI::addr($newHandle)); self::checkStatus($status); try { self::checkStatus($ffi->zvec_collection_destroy($newHandle)); @@ -358,7 +358,7 @@ public function schema(): string { $this->checkClosed(); $ffi = self::ffi(); - $bufSize = 8192; + $bufSize = self::SCHEMA_BUFFER_SIZE; while (true) { $buf = $ffi->new("char[$bufSize]"); self::checkStatus($ffi->zvec_collection_schema($this->handle, $buf, $bufSize)); @@ -367,7 +367,7 @@ public function schema(): string return $str; } $bufSize *= 2; - if ($bufSize > 1048576) { + if ($bufSize > self::MAX_STRING_BUFFER_SIZE) { throw new ZVecException('Schema string exceeds maximum buffer size of 1 MB'); } } @@ -377,7 +377,7 @@ public function path(): string { $this->checkClosed(); $ffi = self::ffi(); - $bufSize = 4096; + $bufSize = self::PATH_BUFFER_SIZE; while (true) { $buf = $ffi->new("char[$bufSize]"); self::checkStatus($ffi->zvec_collection_path($this->handle, $buf, $bufSize)); @@ -386,7 +386,7 @@ public function path(): string return $str; } $bufSize *= 2; - if ($bufSize > 1048576) { + if ($bufSize > self::MAX_STRING_BUFFER_SIZE) { throw new ZVecException('Path string exceeds maximum buffer size of 1 MB'); } } @@ -504,13 +504,13 @@ public function createInvertIndex(string $fieldName, bool $enableRange = true, b } /** @deprecated Use createIndex() with ZVecIndexParams::forHnsw() instead */ - public function createHnswIndex(string $fieldName, int $metricType = ZVecSchema::METRIC_IP, int $m = 50, int $efConstruction = 500, int $quantizeType = 0, int $concurrency = 0, bool $useContiguousMemory = false): void + public function createHnswIndex(string $fieldName, int $metricType = ZVecSchema::METRIC_IP, int $m = self::DEFAULT_HNSW_M, int $efConstruction = self::DEFAULT_HNSW_EF_CONSTRUCTION, int $quantizeType = 0, int $concurrency = 0, bool $useContiguousMemory = false): void { $this->createIndex($fieldName, ZVecIndexParams::forHnsw($metricType, $m, $efConstruction, $quantizeType, $useContiguousMemory), $concurrency); } /** @deprecated Use createIndex() with ZVecIndexParams::forHnswRabitq() instead */ - public function createHnswRabitqIndex(string $fieldName, int $metricType = ZVecSchema::METRIC_IP, int $totalBits = 7, int $numClusters = 16, int $m = 50, int $efConstruction = 500, int $sampleCount = 0, int $concurrency = 0): void + public function createHnswRabitqIndex(string $fieldName, int $metricType = ZVecSchema::METRIC_IP, int $totalBits = 7, int $numClusters = 16, int $m = self::DEFAULT_HNSW_M, int $efConstruction = self::DEFAULT_HNSW_EF_CONSTRUCTION, int $sampleCount = 0, int $concurrency = 0): void { $this->createIndex($fieldName, ZVecIndexParams::forHnswRabitq($metricType, $totalBits, $numClusters, $m, $efConstruction, $sampleCount), $concurrency); } @@ -657,6 +657,17 @@ public function fetch(string ...$pks): array public const LOG_ERROR = 3; public const LOG_FATAL = 4; + // Default buffer sizes + public const DEFAULT_MAX_BUFFER_SIZE = 67108864; // 64 MB + public const SCHEMA_BUFFER_SIZE = 8192; + public const PATH_BUFFER_SIZE = 4096; + public const MAX_STRING_BUFFER_SIZE = 1048576; // 1 MB max for schema/stats strings + public const BYTES_PER_MB = 1048576; + + // Default HNSW index parameters + public const DEFAULT_HNSW_M = 50; + public const DEFAULT_HNSW_EF_CONSTRUCTION = 500; + // Data types for alterColumn (scalar numeric only) public const TYPE_INT32 = 4; public const TYPE_INT64 = 5; @@ -738,7 +749,7 @@ public static function init( $ffi->zvec_config_data_set_brute_force_by_keys_ratio($configData, $bruteForceByKeysRatio); } if ($memoryLimitMb > 0) { - $ffi->zvec_config_data_set_memory_limit($configData, $memoryLimitMb * 1048576); + $ffi->zvec_config_data_set_memory_limit($configData, $memoryLimitMb * self::BYTES_PER_MB); } try { @@ -1354,7 +1365,7 @@ public function stats(): string { $this->checkClosed(); $ffi = self::ffi(); - $bufSize = 4096; + $bufSize = self::PATH_BUFFER_SIZE; while (true) { $buf = $ffi->new("char[$bufSize]"); self::checkStatus($ffi->zvec_collection_stats($this->handle, $buf, $bufSize)); @@ -1363,7 +1374,7 @@ public function stats(): string return $str; } $bufSize *= 2; - if ($bufSize > 1048576) { + if ($bufSize > self::MAX_STRING_BUFFER_SIZE) { throw new ZVecException('Stats string exceeds maximum buffer size of 1 MB'); } } diff --git a/src/ZVecIndexParams.php b/src/ZVecIndexParams.php index 66802ed..23280fb 100644 --- a/src/ZVecIndexParams.php +++ b/src/ZVecIndexParams.php @@ -27,7 +27,7 @@ public function getHandle(): FFI\CData return $this->handle; } - public static function forHnsw(int $metricType, int $m = 50, int $efConstruction = 500, int $quantizeType = ZVec::QUANTIZE_UNDEFINED, bool $useContiguousMemory = false): self + public static function forHnsw(int $metricType, int $m = ZVec::DEFAULT_HNSW_M, int $efConstruction = ZVec::DEFAULT_HNSW_EF_CONSTRUCTION, int $quantizeType = ZVec::QUANTIZE_UNDEFINED, bool $useContiguousMemory = false): self { if ($m <= 0) { throw new ZVecException("m must be a positive integer, got: {$m}"); @@ -41,7 +41,7 @@ public static function forHnsw(int $metricType, int $m = 50, int $efConstruction return new self($handle); } - public static function forHnswRabitq(int $metricType, int $totalBits = 7, int $numClusters = 16, int $m = 50, int $efConstruction = 500, int $sampleCount = 0): self + public static function forHnswRabitq(int $metricType, int $totalBits = 7, int $numClusters = 16, int $m = ZVec::DEFAULT_HNSW_M, int $efConstruction = ZVec::DEFAULT_HNSW_EF_CONSTRUCTION, int $sampleCount = 0): self { if ($m <= 0) { throw new ZVecException("m must be a positive integer, got: {$m}"); diff --git a/tests/test_magic_number_constants.phpt b/tests/test_magic_number_constants.phpt new file mode 100644 index 0000000..3402bc7 --- /dev/null +++ b/tests/test_magic_number_constants.phpt @@ -0,0 +1,188 @@ +--TEST-- +SMELL-007: Named constants replace magic numbers +--SKIPIF-- + +--FILE-- + $line) { + if (strpos($line, '67108864') !== false) { + $found67108864[] = $i + 1; + } +} +if (count($found67108864) === 1 && strpos($lines[$found67108864[0] - 1], 'DEFAULT_MAX_BUFFER_SIZE') !== false) { + echo "OK: 67108864 only in constant declaration\n"; +} else { + echo "FAIL: 67108864 found at lines: " . implode(', ', $found67108864) . "\n"; + $failed++; +} + +// Check for 8192 - should only appear in constant declaration +$found8192 = []; +foreach ($lines as $i => $line) { + if (preg_match('/\b8192\b/', $line)) { + $found8192[] = $i + 1; + } +} +if (count($found8192) === 1 && strpos($lines[$found8192[0] - 1], 'SCHEMA_BUFFER_SIZE') !== false) { + echo "OK: 8192 only in constant declaration\n"; +} else { + echo "FAIL: 8192 found at lines: " . implode(', ', $found8192) . "\n"; + $failed++; +} + +// Check for 4096 - should only appear in constant declaration +$found4096 = []; +foreach ($lines as $i => $line) { + if (preg_match('/\b4096\b/', $line)) { + $found4096[] = $i + 1; + } +} +if (count($found4096) === 1 && strpos($lines[$found4096[0] - 1], 'PATH_BUFFER_SIZE') !== false) { + echo "OK: 4096 only in constant declaration\n"; +} else { + echo "FAIL: 4096 found at lines: " . implode(', ', $found4096) . "\n"; + $failed++; +} + +// Check for 1048576 - should only appear in constant declarations +$found1048576 = []; +foreach ($lines as $i => $line) { + if (preg_match('/\b1048576\b/', $line)) { + $found1048576[] = $i + 1; + } +} +if (count($found1048576) === 2) { + $bothConstants = true; + foreach ($found1048576 as $lineNum) { + $line = $lines[$lineNum - 1]; + if (strpos($line, 'MAX_STRING_BUFFER_SIZE') === false && strpos($line, 'BYTES_PER_MB') === false) { + $bothConstants = false; + } + } + if ($bothConstants) { + echo "OK: 1048576 only in constant declarations\n"; + } else { + echo "FAIL: 1048576 found at non-constant lines: " . implode(', ', $found1048576) . "\n"; + $failed++; + } +} else { + echo "FAIL: 1048576 found at lines: " . implode(', ', $found1048576) . "\n"; + $failed++; +} + +// Check for 50 as default HNSW m - should only appear in constant declaration and deprecated methods +$found50 = []; +foreach ($lines as $i => $line) { + if (preg_match('/\b50\b/', $line) && strpos($line, 'LOG_WARN') === false) { + $found50[] = $i + 1; + } +} +$valid50 = true; +foreach ($found50 as $lineNum) { + $line = $lines[$lineNum - 1]; + if (strpos($line, 'DEFAULT_HNSW_M') === false && + strpos($line, 'deprecated') === false && + strpos($line, '@deprecated') === false) { + $valid50 = false; + } +} +if ($valid50 && count($found50) >= 1) { + echo "OK: 50 only in constant declaration and deprecated methods\n"; +} else { + echo "FAIL: 50 found at lines: " . implode(', ', $found50) . "\n"; + $failed++; +} + +// Check for 500 as default HNSW efConstruction - should only appear in constant declaration and deprecated methods +$found500 = []; +foreach ($lines as $i => $line) { + if (preg_match('/\b500\b/', $line)) { + $found500[] = $i + 1; + } +} +$valid500 = true; +foreach ($found500 as $lineNum) { + $line = $lines[$lineNum - 1]; + if (strpos($line, 'DEFAULT_HNSW_EF_CONSTRUCTION') === false && + strpos($line, 'deprecated') === false && + strpos($line, '@deprecated') === false) { + $valid500 = false; + } +} +if ($valid500 && count($found500) >= 1) { + echo "OK: 500 only in constant declaration and deprecated methods\n"; +} else { + echo "FAIL: 500 found at lines: " . implode(', ', $found500) . "\n"; + $failed++; +} + +// Verify ZVecIndexParams uses the constants +$indexParamsSource = file_get_contents(__DIR__ . '/../src/ZVecIndexParams.php'); +if (strpos($indexParamsSource, 'ZVec::DEFAULT_HNSW_M') !== false) { + echo "OK: ZVecIndexParams uses ZVec::DEFAULT_HNSW_M\n"; +} else { + echo "FAIL: ZVecIndexParams does not use ZVec::DEFAULT_HNSW_M\n"; + $failed++; +} +if (strpos($indexParamsSource, 'ZVec::DEFAULT_HNSW_EF_CONSTRUCTION') !== false) { + echo "OK: ZVecIndexParams uses ZVec::DEFAULT_HNSW_EF_CONSTRUCTION\n"; +} else { + echo "FAIL: ZVecIndexParams does not use ZVec::DEFAULT_HNSW_EF_CONSTRUCTION\n"; + $failed++; +} + +// Summary +echo "\n"; +if ($failed === 0) { + echo "All tests passed!\n"; +} else { + echo "FAILED: $failed test(s) failed\n"; +} +?> +--EXPECT-- +OK: ZVec::DEFAULT_MAX_BUFFER_SIZE = 67108864 +OK: ZVec::SCHEMA_BUFFER_SIZE = 8192 +OK: ZVec::PATH_BUFFER_SIZE = 4096 +OK: ZVec::MAX_STRING_BUFFER_SIZE = 1048576 +OK: ZVec::BYTES_PER_MB = 1048576 +OK: ZVec::DEFAULT_HNSW_M = 50 +OK: ZVec::DEFAULT_HNSW_EF_CONSTRUCTION = 500 +OK: 67108864 only in constant declaration +OK: 8192 only in constant declaration +OK: 4096 only in constant declaration +OK: 1048576 only in constant declarations +OK: 50 only in constant declaration and deprecated methods +OK: 500 only in constant declaration and deprecated methods +OK: ZVecIndexParams uses ZVec::DEFAULT_HNSW_M +OK: ZVecIndexParams uses ZVec::DEFAULT_HNSW_EF_CONSTRUCTION + +All tests passed!