Overview
Implement a vector store for fast approximate nearest neighbor search. This provides the optional FAISS backend with an in-memory fallback for systems where FAISS is not available.
Dependencies
Files to Create
| File |
Action |
Description |
Sources/SortAI/Core/GraphRAG/FAISSVectorStore.swift |
Create |
FAISS wrapper + in-memory fallback |
Sources/SortAI/Core/GraphRAG/VectorStoreProtocol.swift |
Create |
Protocol definition |
Implementation Details
1. VectorStore Protocol
/// Protocol for vector similarity search backends
protocol VectorStore: Sendable {
/// Dimension of vectors stored
var dimension: Int { get }
/// Number of vectors in the store
var count: Int { get async }
/// Add a vector with associated ID
func add(id: String, vector: [Float]) async throws
/// Add multiple vectors
func addBatch(items: [(id: String, vector: [Float])]) async throws
/// Search for k nearest neighbors
func search(query: [Float], k: Int) async throws -> [(id: String, distance: Float)]
/// Remove a vector by ID
func remove(id: String) async throws
/// Clear all vectors
func clear() async throws
/// Persist to disk
func save(to path: URL) async throws
/// Load from disk
func load(from path: URL) async throws
}
2. InMemoryVectorStore (Default/Fallback)
import Accelerate
/// Simple in-memory vector store using brute-force search
actor InMemoryVectorStore: VectorStore {
let dimension: Int
private var vectors: [[Float]] = []
private var ids: [String] = []
private var idToIndex: [String: Int] = [:]
var count: Int { vectors.count }
init(dimension: Int) {
self.dimension = dimension
}
func add(id: String, vector: [Float]) async throws {
guard vector.count == dimension else {
throw VectorStoreError.dimensionMismatch(expected: dimension, got: vector.count)
}
// Update if exists, otherwise append
if let index = idToIndex[id] {
vectors[index] = vector
} else {
let index = vectors.count
vectors.append(vector)
ids.append(id)
idToIndex[id] = index
}
}
func addBatch(items: [(id: String, vector: [Float])]) async throws {
for (id, vector) in items {
try await add(id: id, vector: vector)
}
}
func search(query: [Float], k: Int) async throws -> [(id: String, distance: Float)] {
guard query.count == dimension else {
throw VectorStoreError.dimensionMismatch(expected: dimension, got: query.count)
}
guard !vectors.isEmpty else { return [] }
// Calculate distances to all vectors
var distances: [(Int, Float)] = []
for (index, vector) in vectors.enumerated() {
let distance = euclideanDistance(query, vector)
distances.append((index, distance))
}
// Sort by distance and take top k
distances.sort { $0.1 < $1.1 }
return distances.prefix(k).map { (index, distance) in
(ids[index], distance)
}
}
func remove(id: String) async throws {
guard let index = idToIndex[id] else { return }
// Remove and update indices
vectors.remove(at: index)
ids.remove(at: index)
idToIndex.removeValue(forKey: id)
// Rebuild index map
idToIndex = Dictionary(uniqueKeysWithValues: ids.enumerated().map { ($1, $0) })
}
func clear() async throws {
vectors = []
ids = []
idToIndex = [:]
}
func save(to path: URL) async throws {
let data = VectorStoreData(
dimension: dimension,
vectors: vectors,
ids: ids
)
let encoded = try JSONEncoder().encode(data)
try encoded.write(to: path)
}
func load(from path: URL) async throws {
let data = try Data(contentsOf: path)
let decoded = try JSONDecoder().decode(VectorStoreData.self, from: data)
guard decoded.dimension == dimension else {
throw VectorStoreError.dimensionMismatch(expected: dimension, got: decoded.dimension)
}
self.vectors = decoded.vectors
self.ids = decoded.ids
self.idToIndex = Dictionary(uniqueKeysWithValues: ids.enumerated().map { ($1, $0) })
}
// MARK: - Distance Calculation
private func euclideanDistance(_ a: [Float], _ b: [Float]) -> Float {
var diff = [Float](repeating: 0, count: a.count)
vDSP_vsub(b, 1, a, 1, &diff, 1, vDSP_Length(a.count))
var sumSquares: Float = 0
vDSP_dotpr(diff, 1, diff, 1, &sumSquares, vDSP_Length(diff.count))
return sqrt(sumSquares)
}
}
private struct VectorStoreData: Codable {
let dimension: Int
let vectors: [[Float]]
let ids: [String]
}
3. FAISSVectorStore (Optional)
#if canImport(FAISS)
import FAISS
/// FAISS-backed vector store for large-scale similarity search
actor FAISSVectorStore: VectorStore {
let dimension: Int
private var index: OpaquePointer?
private var idMap: [Int64: String] = [:]
private var stringToId: [String: Int64] = [:]
private var nextId: Int64 = 0
var count: Int { Int(faiss_index_ntotal(index)) }
init(dimension: Int, indexType: String = "Flat") {
self.dimension = dimension
self.index = faiss_index_factory(Int32(dimension), indexType, METRIC_L2)
}
deinit {
if let index = index {
faiss_index_free(index)
}
}
func add(id: String, vector: [Float]) async throws {
guard vector.count == dimension else {
throw VectorStoreError.dimensionMismatch(expected: dimension, got: vector.count)
}
let internalId = nextId
nextId += 1
idMap[internalId] = id
stringToId[id] = internalId
var mutableVector = vector
faiss_index_add(index, 1, &mutableVector)
}
func search(query: [Float], k: Int) async throws -> [(id: String, distance: Float)] {
guard query.count == dimension else {
throw VectorStoreError.dimensionMismatch(expected: dimension, got: query.count)
}
var distances = [Float](repeating: 0, count: k)
var indices = [Int64](repeating: -1, count: k)
var mutableQuery = query
faiss_index_search(index, 1, &mutableQuery, Int32(k), &distances, &indices)
var results: [(String, Float)] = []
for i in 0..<k {
if indices[i] >= 0, let id = idMap[indices[i]] {
results.append((id, distances[i]))
}
}
return results
}
// ... other methods
}
#endif
4. VectorStoreFactory
/// Factory for creating the appropriate vector store
enum VectorStoreFactory {
static func create(dimension: Int) -> any VectorStore {
#if canImport(FAISS)
return FAISSVectorStore(dimension: dimension)
#else
return InMemoryVectorStore(dimension: dimension)
#endif
}
static func createInMemory(dimension: Int) -> InMemoryVectorStore {
return InMemoryVectorStore(dimension: dimension)
}
}
5. Error Types
enum VectorStoreError: LocalizedError {
case dimensionMismatch(expected: Int, got: Int)
case notFound(String)
case persistenceFailed(Error)
case loadFailed(Error)
var errorDescription: String? {
switch self {
case .dimensionMismatch(let expected, let got):
return "Vector dimension mismatch: expected \(expected), got \(got)"
case .notFound(let id):
return "Vector not found: \(id)"
case .persistenceFailed(let error):
return "Failed to save vector store: \(error.localizedDescription)"
case .loadFailed(let error):
return "Failed to load vector store: \(error.localizedDescription)"
}
}
}
Performance Characteristics
| Store Type |
Add |
Search (k=10) |
Memory |
Best For |
| InMemory |
O(1) |
O(n) |
High |
< 100K vectors |
| FAISS Flat |
O(1) |
O(n) |
Medium |
< 1M vectors |
| FAISS IVF |
O(1) |
O(sqrt(n)) |
Low |
> 1M vectors |
For SortAI's typical use case (< 100K documents), the in-memory store is sufficient and has zero dependencies.
Acceptance Criteria
Testing
func testVectorStoreAdd() async throws {
let store = InMemoryVectorStore(dimension: 512)
let vector = [Float](repeating: 0.1, count: 512)
try await store.add(id: "doc1", vector: vector)
let count = await store.count
XCTAssertEqual(count, 1)
}
func testVectorStoreSearch() async throws {
let store = InMemoryVectorStore(dimension: 128)
// Add vectors
let vec1 = [Float](repeating: 0.1, count: 128)
let vec2 = [Float](repeating: 0.9, count: 128)
try await store.add(id: "doc1", vector: vec1)
try await store.add(id: "doc2", vector: vec2)
// Search for similar to vec1
let query = [Float](repeating: 0.15, count: 128)
let results = try await store.search(query: query, k: 2)
XCTAssertEqual(results.first?.id, "doc1", "doc1 should be closest")
}
func testDimensionMismatch() async {
let store = InMemoryVectorStore(dimension: 512)
let wrongDimVector = [Float](repeating: 0.1, count: 256)
do {
try await store.add(id: "test", vector: wrongDimVector)
XCTFail("Should throw dimension mismatch error")
} catch VectorStoreError.dimensionMismatch {
// Expected
} catch {
XCTFail("Wrong error type: \(error)")
}
}
func testPersistence() async throws {
let store = InMemoryVectorStore(dimension: 128)
let vector = [Float](repeating: 0.5, count: 128)
try await store.add(id: "doc1", vector: vector)
// Save
let tempURL = FileManager.default.temporaryDirectory.appendingPathComponent("test_vectors.json")
try await store.save(to: tempURL)
// Load into new store
let store2 = InMemoryVectorStore(dimension: 128)
try await store2.load(from: tempURL)
let count = await store2.count
XCTAssertEqual(count, 1)
// Cleanup
try FileManager.default.removeItem(at: tempURL)
}
Estimated Size
~200 lines of code (InMemory implementation + protocol)
Risk Assessment
Medium - FAISS native bridging is complex. Mitigation: in-memory fallback always available, no hard dependency on FAISS.
Overview
Implement a vector store for fast approximate nearest neighbor search. This provides the optional FAISS backend with an in-memory fallback for systems where FAISS is not available.
Dependencies
Files to Create
Sources/SortAI/Core/GraphRAG/FAISSVectorStore.swiftSources/SortAI/Core/GraphRAG/VectorStoreProtocol.swiftImplementation Details
1. VectorStore Protocol
2. InMemoryVectorStore (Default/Fallback)
3. FAISSVectorStore (Optional)
4. VectorStoreFactory
5. Error Types
Performance Characteristics
For SortAI's typical use case (< 100K documents), the in-memory store is sufficient and has zero dependencies.
Acceptance Criteria
Testing
Estimated Size
~200 lines of code (InMemory implementation + protocol)
Risk Assessment
Medium - FAISS native bridging is complex. Mitigation: in-memory fallback always available, no hard dependency on FAISS.