diff --git a/Sources/SortAI/App/OrganizationPreviewView.swift b/Sources/SortAI/App/OrganizationPreviewView.swift new file mode 100644 index 0000000..668202e --- /dev/null +++ b/Sources/SortAI/App/OrganizationPreviewView.swift @@ -0,0 +1,531 @@ +// MARK: - Organization Preview View +// Shows proposed organization with folders and files, allows user adjustments + +import SwiftUI + +// MARK: - Organization Preview View + +/// Displays a preview of proposed organization operations +/// Separates folder units from loose files for clarity +struct OrganizationPreviewView: View { + let plan: HierarchyAwareOrganizationPlan + let taxonomy: TaxonomyTree + + /// Callback when user wants to flatten a folder + let onFlattenFolder: (ScannedFolder) -> Void + + /// Callback when user changes category for a folder + let onChangeFolderCategory: (ScannedFolder, [String]) -> Void + + /// Callback when user confirms the plan + let onConfirm: () -> Void + + /// Callback when user cancels + let onCancel: () -> Void + + @State private var selectedFolders: Set = [] + @State private var showingCategoryPicker: Bool = false + @State private var folderToReassign: ScannedFolder? + + var body: some View { + VStack(spacing: 0) { + // Header + headerSection + + Divider() + + // Main content in scroll view + ScrollView { + VStack(alignment: .leading, spacing: 20) { + // Folder operations section + if !plan.folderOperations.isEmpty { + folderSection + } + + // File operations section + if !plan.fileOperations.isEmpty { + fileSection + } + + // Conflicts section + if plan.hasConflicts { + conflictSection + } + } + .padding() + } + + Divider() + + // Footer with actions + footerSection + } + .frame(minWidth: 600, minHeight: 400) + .sheet(item: $folderToReassign) { folder in + CategoryPickerSheet( + folder: folder, + taxonomy: taxonomy, + onSelect: { path in + onChangeFolderCategory(folder, path) + folderToReassign = nil + }, + onCancel: { + folderToReassign = nil + } + ) + } + } + + // MARK: - Header + + private var headerSection: some View { + HStack { + VStack(alignment: .leading, spacing: 4) { + Text("Organization Preview") + .font(.headline) + + Text("\(plan.totalItems) items (\(plan.folderOperations.count) folders, \(plan.fileOperations.count) files)") + .font(.caption) + .foregroundStyle(.secondary) + } + + Spacer() + + // Size estimate + VStack(alignment: .trailing, spacing: 4) { + Text("Total Size") + .font(.caption) + .foregroundStyle(.secondary) + + Text(formatSize(plan.estimatedSize)) + .font(.headline.monospacedDigit()) + } + } + .padding() + .background(Color(nsColor: .controlBackgroundColor)) + } + + // MARK: - Folder Section + + private var folderSection: some View { + VStack(alignment: .leading, spacing: 12) { + HStack { + Image(systemName: "folder.fill") + .foregroundStyle(.blue) + Text("Folder Units") + .font(.headline) + + Spacer() + + Text("These folders move as complete units") + .font(.caption) + .foregroundStyle(.secondary) + } + + ForEach(plan.folderOperations) { op in + FolderPreviewRow( + operation: op, + isSelected: selectedFolders.contains(op.sourceFolder.id), + onToggleSelection: { + if selectedFolders.contains(op.sourceFolder.id) { + selectedFolders.remove(op.sourceFolder.id) + } else { + selectedFolders.insert(op.sourceFolder.id) + } + }, + onFlatten: { + onFlattenFolder(op.sourceFolder) + }, + onChangeCategory: { + folderToReassign = op.sourceFolder + } + ) + } + } + } + + // MARK: - File Section + + private var fileSection: some View { + VStack(alignment: .leading, spacing: 12) { + HStack { + Image(systemName: "doc.fill") + .foregroundStyle(.green) + Text("Individual Files") + .font(.headline) + + Spacer() + + Text("These files move separately") + .font(.caption) + .foregroundStyle(.secondary) + } + + // Group files by destination category + let grouped = Dictionary(grouping: plan.fileOperations) { + $0.destinationFolder.lastPathComponent + } + + ForEach(grouped.keys.sorted(), id: \.self) { category in + DisclosureGroup { + ForEach(grouped[category] ?? [], id: \.sourceFile.id) { op in + FilePreviewRow(operation: op) + } + } label: { + HStack { + Text(category) + .fontWeight(.medium) + Spacer() + Text("\(grouped[category]?.count ?? 0) files") + .font(.caption) + .foregroundStyle(.secondary) + } + } + } + } + } + + // MARK: - Conflict Section + + private var conflictSection: some View { + VStack(alignment: .leading, spacing: 12) { + HStack { + Image(systemName: "exclamationmark.triangle.fill") + .foregroundStyle(.orange) + Text("Conflicts") + .font(.headline) + + Spacer() + + Text("\(plan.folderConflicts.count + plan.fileConflicts.count) items need attention") + .font(.caption) + .foregroundStyle(.secondary) + } + + if !plan.folderConflicts.isEmpty { + ForEach(plan.folderConflicts) { conflict in + OrganizationConflictRow( + name: conflict.sourceFolder.folderName, + destinationPath: conflict.destinationPath.path, + isFolder: true + ) + } + } + + if !plan.fileConflicts.isEmpty { + ForEach(plan.fileConflicts) { conflict in + OrganizationConflictRow( + name: conflict.sourceFile.filename, + destinationPath: conflict.destinationPath.path, + isFolder: false + ) + } + } + } + .padding() + .background(Color.orange.opacity(0.1)) + .cornerRadius(8) + } + + // MARK: - Footer + + private var footerSection: some View { + HStack { + Button("Cancel") { + onCancel() + } + .keyboardShortcut(.cancelAction) + + Spacer() + + if plan.hasConflicts { + Text("⚠️ \(plan.folderConflicts.count + plan.fileConflicts.count) conflicts") + .font(.caption) + .foregroundStyle(.orange) + } + + Button("Organize Files") { + onConfirm() + } + .keyboardShortcut(.defaultAction) + .buttonStyle(.borderedProminent) + } + .padding() + .background(Color(nsColor: .controlBackgroundColor)) + } + + // MARK: - Helpers + + private func formatSize(_ bytes: Int64) -> String { + ByteCountFormatter.string(fromByteCount: bytes, countStyle: .file) + } +} + +// MARK: - Folder Preview Row + +struct FolderPreviewRow: View { + let operation: FolderOrganizationOperation + let isSelected: Bool + let onToggleSelection: () -> Void + let onFlatten: () -> Void + let onChangeCategory: () -> Void + + @State private var isHovering = false + + var body: some View { + HStack(spacing: 12) { + // Selection checkbox + Toggle("", isOn: .init( + get: { isSelected }, + set: { _ in onToggleSelection() } + )) + .toggleStyle(.checkbox) + .labelsHidden() + + // Folder icon + Image(systemName: "folder.fill") + .font(.title2) + .foregroundStyle(.blue) + + // Folder info + VStack(alignment: .leading, spacing: 2) { + Text(operation.sourceFolder.folderName) + .fontWeight(.medium) + + HStack(spacing: 8) { + Text("\(operation.sourceFolder.fileCount) files") + .font(.caption) + .foregroundStyle(.secondary) + + Text("•") + .font(.caption) + .foregroundStyle(.secondary) + + Text(operation.sourceFolder.formattedSize) + .font(.caption) + .foregroundStyle(.secondary) + } + } + + Spacer() + + // Destination category + VStack(alignment: .trailing, spacing: 2) { + HStack(spacing: 4) { + Image(systemName: "arrow.right") + .font(.caption) + .foregroundStyle(.secondary) + + Text(operation.destinationCategory) + .font(.callout) + .foregroundStyle(.primary) + } + + // Confidence indicator + HStack(spacing: 4) { + confidenceIndicator(operation.confidence) + Text("\(Int(operation.confidence * 100))%") + .font(.caption2) + .foregroundStyle(.secondary) + } + } + + // Action buttons (visible on hover) + if isHovering { + HStack(spacing: 8) { + Button { + onChangeCategory() + } label: { + Image(systemName: "pencil") + } + .buttonStyle(.borderless) + .help("Change category") + + Button { + onFlatten() + } label: { + Image(systemName: "rectangle.expand.vertical") + } + .buttonStyle(.borderless) + .help("Flatten folder (analyze files individually)") + } + } + } + .padding(.vertical, 8) + .padding(.horizontal, 12) + .background(isSelected ? Color.accentColor.opacity(0.1) : Color.clear) + .cornerRadius(8) + .onHover { hovering in + isHovering = hovering + } + } + + @ViewBuilder + private func confidenceIndicator(_ confidence: Double) -> some View { + Circle() + .fill(confidenceColor(confidence)) + .frame(width: 8, height: 8) + } + + private func confidenceColor(_ confidence: Double) -> Color { + if confidence >= 0.85 { return .green } + if confidence >= 0.7 { return .yellow } + if confidence >= 0.5 { return .orange } + return .red + } +} + +// MARK: - File Preview Row + +struct FilePreviewRow: View { + let operation: OrganizationOperation + + var body: some View { + HStack(spacing: 12) { + Image(systemName: iconForFile(operation.sourceFile)) + .foregroundStyle(.secondary) + + VStack(alignment: .leading, spacing: 2) { + Text(operation.sourceFile.filename) + .font(.callout) + + Text(operation.sourceFile.formattedSize) + .font(.caption) + .foregroundStyle(.secondary) + } + + Spacer() + } + .padding(.vertical, 4) + .padding(.leading, 24) + } + + private func iconForFile(_ file: TaxonomyScannedFile) -> String { + if file.isImage { return "photo" } + if file.isVideo { return "video" } + if file.isAudio { return "waveform" } + if file.isDocument { return "doc.text" } + return "doc" + } +} + +// MARK: - Organization Conflict Row + +struct OrganizationConflictRow: View { + let name: String + let destinationPath: String + let isFolder: Bool + + var body: some View { + HStack(spacing: 12) { + Image(systemName: isFolder ? "folder.fill" : "doc.fill") + .foregroundStyle(.orange) + + VStack(alignment: .leading, spacing: 2) { + Text(name) + .fontWeight(.medium) + + Text("Destination exists: \(destinationPath)") + .font(.caption) + .foregroundStyle(.secondary) + .lineLimit(1) + .truncationMode(.middle) + } + + Spacer() + + Text("Will be renamed") + .font(.caption) + .padding(.horizontal, 8) + .padding(.vertical, 4) + .background(Color.orange.opacity(0.2)) + .cornerRadius(4) + } + .padding(.vertical, 4) + } +} + +// MARK: - Category Picker Sheet + +struct CategoryPickerSheet: View { + let folder: ScannedFolder + let taxonomy: TaxonomyTree + let onSelect: ([String]) -> Void + let onCancel: () -> Void + + @State private var selectedPath: [String] = [] + @State private var customCategory: String = "" + + var body: some View { + VStack(spacing: 16) { + // Header + HStack { + Text("Change Category") + .font(.headline) + Spacer() + Button("Cancel") { + onCancel() + } + .buttonStyle(.borderless) + } + + Text("Select a category for '\(folder.folderName)'") + .font(.subheadline) + .foregroundStyle(.secondary) + + Divider() + + // Category list + ScrollView { + VStack(alignment: .leading, spacing: 8) { + ForEach(taxonomy.allCategories(), id: \.id) { node in + Button { + selectedPath = node.path + } label: { + HStack { + let indent = CGFloat(node.depth) * 16 + Text(node.name) + .padding(.leading, indent) + Spacer() + if selectedPath == node.path { + Image(systemName: "checkmark") + .foregroundStyle(.blue) + } + } + } + .buttonStyle(.plain) + .padding(.vertical, 4) + } + } + } + .frame(maxHeight: 300) + + Divider() + + // Custom category input + HStack { + TextField("Or enter custom path...", text: $customCategory) + .textFieldStyle(.roundedBorder) + } + + // Actions + HStack { + Spacer() + + Button("Apply") { + if !customCategory.isEmpty { + let path = customCategory.components(separatedBy: "/").map { $0.trimmingCharacters(in: .whitespaces) } + onSelect(path) + } else if !selectedPath.isEmpty { + onSelect(selectedPath) + } + } + .buttonStyle(.borderedProminent) + .disabled(selectedPath.isEmpty && customCategory.isEmpty) + } + } + .padding() + .frame(width: 400, height: 500) + } +} + diff --git a/Sources/SortAI/Core/Configuration/AppConfiguration.swift b/Sources/SortAI/Core/Configuration/AppConfiguration.swift index 7e1b280..8cbceb8 100644 --- a/Sources/SortAI/Core/Configuration/AppConfiguration.swift +++ b/Sources/SortAI/Core/Configuration/AppConfiguration.swift @@ -111,6 +111,12 @@ enum SortAIDefaultsKey { static let autoInstallOllama = "autoInstallOllama" static let enableFAISS = "enableFAISS" static let useAppleEmbeddings = "useAppleEmbeddings" + + // Hierarchy-aware categorization settings + static let respectHierarchy = "respectHierarchy" + static let minFilesForFolder = "minFilesForFolder" + static let allowUserFlatten = "allowUserFlatten" + static let folderReviewThreshold = "folderReviewThreshold" } /// Registers default values in UserDefaults at app startup @@ -154,6 +160,12 @@ enum SortAIDefaults { SortAIDefaultsKey.respectBatteryStatus: true, SortAIDefaultsKey.enableWatchMode: false, SortAIDefaultsKey.watchQuietPeriod: 3.0, + + // Hierarchy-aware categorization + SortAIDefaultsKey.respectHierarchy: true, + SortAIDefaultsKey.minFilesForFolder: 1, + SortAIDefaultsKey.allowUserFlatten: true, + SortAIDefaultsKey.folderReviewThreshold: 0.75, ] UserDefaults.standard.register(defaults: defaults) @@ -381,12 +393,43 @@ struct OrganizationConfiguration: Codable, Sendable, Equatable { /// Characters to replace in filenames var invalidCharacters: String + // MARK: - Hierarchy Settings + + /// Whether to respect folder hierarchy (treat sub-folders as units) + var respectHierarchy: Bool + + /// Minimum files in a folder to treat it as a unit (folders with fewer files are flattened) + var minFilesForFolder: Int + + /// Whether to allow users to flatten folders from the preview UI + var allowUserFlatten: Bool + + /// Confidence threshold below which folders are flagged for review + var folderReviewThreshold: Double + static let `default` = OrganizationConfiguration( defaultMode: .copy, createMetadataFiles: false, preserveTimestamps: true, maxFilenameLength: 200, - invalidCharacters: "/\\:*?\"<>|" + invalidCharacters: "/\\:*?\"<>|", + respectHierarchy: true, + minFilesForFolder: 1, + allowUserFlatten: true, + folderReviewThreshold: 0.75 + ) + + /// Legacy configuration without hierarchy awareness + static let flat = OrganizationConfiguration( + defaultMode: .copy, + createMetadataFiles: false, + preserveTimestamps: true, + maxFilenameLength: 200, + invalidCharacters: "/\\:*?\"<>|", + respectHierarchy: false, + minFilesForFolder: 1, + allowUserFlatten: false, + folderReviewThreshold: 0.75 ) } diff --git a/Sources/SortAI/Core/Organizer/OrganizationEngine.swift b/Sources/SortAI/Core/Organizer/OrganizationEngine.swift index 497c6c1..a60bd68 100644 --- a/Sources/SortAI/Core/Organizer/OrganizationEngine.swift +++ b/Sources/SortAI/Core/Organizer/OrganizationEngine.swift @@ -140,6 +140,165 @@ actor OrganizationEngine { } } + // MARK: - Hierarchy-Aware Planning + + /// Plan organization with hierarchy awareness + /// Folders move as complete units, loose files move individually + func planHierarchyOrganization( + scanResult: HierarchyScanResult, + folderAssignments: [FolderCategoryAssignment], + fileAssignments: [FileAssignment], + tree: TaxonomyTree, + outputFolder: URL + ) async -> HierarchyAwareOrganizationPlan { + NSLog("📋 [OrganizationEngine] Planning hierarchy-aware organization") + NSLog("📋 [OrganizationEngine] \(scanResult.folders.count) folders, \(scanResult.looseFiles.count) loose files") + + var folderOps: [FolderOrganizationOperation] = [] + var fileOps: [OrganizationOperation] = [] + var folderConflicts: [FolderOrganizationConflict] = [] + var fileConflicts: [OrganizationConflict] = [] + + // Build assignment lookups + let folderAssignmentMap = Dictionary( + folderAssignments.map { ($0.folderId, $0) }, + uniquingKeysWith: { first, _ in first } + ) + + let fileAssignmentMap = Dictionary( + fileAssignments.map { ($0.fileId, $0) }, + uniquingKeysWith: { first, _ in first } + ) + + // Plan folder operations + for folder in scanResult.folders { + guard let assignment = folderAssignmentMap[folder.id] else { + // Unassigned folder - use Uncategorized + let destFolder = outputFolder.appendingPathComponent(config.uncategorizedFolderName) + let destPath = destFolder.appendingPathComponent(folder.folderName) + + if fileManager.fileExists(atPath: destPath.path) { + folderConflicts.append(FolderOrganizationConflict( + sourceFolder: folder, + destinationPath: destPath, + resolution: .askUser + )) + } else { + folderOps.append(FolderOrganizationOperation( + sourceFolder: folder, + destinationFolder: destFolder, + destinationCategory: config.uncategorizedFolderName, + confidence: 0.3, + mode: config.mode + )) + } + continue + } + + // Build destination from category path + let categoryPath = assignment.categoryPath + let destFolder = categoryPath.reduce(outputFolder) { $0.appendingPathComponent($1) } + let destPath = destFolder.appendingPathComponent(folder.folderName) + + // Check for conflicts + if fileManager.fileExists(atPath: destPath.path) { + folderConflicts.append(FolderOrganizationConflict( + sourceFolder: folder, + destinationPath: destPath, + resolution: .askUser + )) + } else { + folderOps.append(FolderOrganizationOperation( + sourceFolder: folder, + destinationFolder: destFolder, + destinationCategory: categoryPath.joined(separator: " / "), + confidence: assignment.confidence, + mode: config.mode + )) + } + } + + // Plan loose file operations (same as regular planning) + for file in scanResult.looseFiles { + guard let assignment = fileAssignmentMap[file.id] else { + // Unassigned file + let uncategorizedFolder = outputFolder.appendingPathComponent(config.uncategorizedFolderName) + let dest = uncategorizedFolder.appendingPathComponent(file.filename) + + if fileManager.fileExists(atPath: dest.path) { + fileConflicts.append(OrganizationConflict( + sourceFile: file, + destinationPath: dest, + resolution: .askUser + )) + } else { + fileOps.append(OrganizationOperation( + sourceFile: file, + destinationFolder: uncategorizedFolder, + destinationPath: dest, + mode: config.mode + )) + } + continue + } + + // Build destination from category + guard let node = tree.node(byId: assignment.categoryId) else { + let uncategorizedFolder = outputFolder.appendingPathComponent(config.uncategorizedFolderName) + let dest = uncategorizedFolder.appendingPathComponent(file.filename) + + if fileManager.fileExists(atPath: dest.path) { + fileConflicts.append(OrganizationConflict( + sourceFile: file, + destinationPath: dest, + resolution: .askUser + )) + } else { + fileOps.append(OrganizationOperation( + sourceFile: file, + destinationFolder: uncategorizedFolder, + destinationPath: dest, + mode: config.mode + )) + } + continue + } + + let categoryPath = tree.pathToNode(node) + let destFolder = categoryPath.reduce(outputFolder) { $0.appendingPathComponent($1.name) } + let destFile = destFolder.appendingPathComponent(file.filename) + + if fileManager.fileExists(atPath: destFile.path) { + fileConflicts.append(OrganizationConflict( + sourceFile: file, + destinationPath: destFile, + resolution: .askUser + )) + } else { + fileOps.append(OrganizationOperation( + sourceFile: file, + destinationFolder: destFolder, + destinationPath: destFile, + mode: config.mode + )) + } + } + + let totalSize = folderOps.reduce(0) { $0 + $1.sourceFolder.totalSize } + + fileOps.reduce(0) { $0 + $1.sourceFile.fileSize } + + NSLog("📋 [OrganizationEngine] Plan complete: \(folderOps.count) folder ops, \(fileOps.count) file ops") + NSLog("📋 [OrganizationEngine] Conflicts: \(folderConflicts.count) folder, \(fileConflicts.count) file") + + return HierarchyAwareOrganizationPlan( + folderOperations: folderOps, + fileOperations: fileOps, + folderConflicts: folderConflicts, + fileConflicts: fileConflicts, + estimatedSize: totalSize + ) + } + // MARK: - Execution /// Execute the organization plan @@ -383,3 +542,106 @@ struct FailedOperation: Sendable { let error: String } + +// MARK: - Hierarchy-Aware Organization Types + +/// Operation for moving a folder as a complete unit +struct FolderOrganizationOperation: Sendable, Identifiable { + let id: UUID + let sourceFolder: ScannedFolder + let destinationFolder: URL // Where the folder will be moved to + let destinationCategory: String // Category name for display + let confidence: Double + let preserveInternalStructure: Bool // Always true for folder units + let mode: OrganizationMode + + init( + id: UUID = UUID(), + sourceFolder: ScannedFolder, + destinationFolder: URL, + destinationCategory: String, + confidence: Double, + mode: OrganizationMode + ) { + self.id = id + self.sourceFolder = sourceFolder + self.destinationFolder = destinationFolder + self.destinationCategory = destinationCategory + self.confidence = confidence + self.preserveInternalStructure = true + self.mode = mode + } +} + +/// Conflict when organizing a folder +final class FolderOrganizationConflict: @unchecked Sendable, Identifiable { + let id = UUID() + let sourceFolder: ScannedFolder + let destinationPath: URL + var resolution: ConflictResolution + + init(sourceFolder: ScannedFolder, destinationPath: URL, resolution: ConflictResolution) { + self.sourceFolder = sourceFolder + self.destinationPath = destinationPath + self.resolution = resolution + } +} + +/// Organization plan that respects folder hierarchy +/// Separates folder operations from individual file operations +struct HierarchyAwareOrganizationPlan: Sendable { + let folderOperations: [FolderOrganizationOperation] + let fileOperations: [OrganizationOperation] + let folderConflicts: [FolderOrganizationConflict] + let fileConflicts: [OrganizationConflict] + let estimatedSize: Int64 + + /// Total number of items to organize + var totalItems: Int { + folderOperations.count + fileOperations.count + } + + /// Total file count (including files inside folders) + var totalFileCount: Int { + let folderFiles = folderOperations.reduce(0) { $0 + $1.sourceFolder.fileCount } + return folderFiles + fileOperations.count + } + + /// Whether there are any conflicts to resolve + var hasConflicts: Bool { + !folderConflicts.isEmpty || !fileConflicts.isEmpty + } + + /// Convert to legacy OrganizationPlan (flattens folders into individual file ops) + func toLegacyPlan() -> OrganizationPlan { + var allFileOps = fileOperations + + // Flatten folder operations into file operations + for folderOp in folderOperations { + for file in folderOp.sourceFolder.containedFiles { + let destPath = folderOp.destinationFolder + .appendingPathComponent(folderOp.sourceFolder.folderName) + .appendingPathComponent(file.relativePath.replacingOccurrences( + of: folderOp.sourceFolder.relativePath + "/", + with: "" + )) + + allFileOps.append(OrganizationOperation( + sourceFile: file, + destinationFolder: destPath.deletingLastPathComponent(), + destinationPath: destPath, + mode: folderOp.mode + )) + } + } + + // Combine conflicts + let allConflicts = fileConflicts + + return OrganizationPlan( + operations: allFileOps, + conflicts: allConflicts, + estimatedSize: estimatedSize + ) + } +} diff --git a/Sources/SortAI/Core/Organizer/SafeFileOrganizer.swift b/Sources/SortAI/Core/Organizer/SafeFileOrganizer.swift index 929062a..27cdb31 100644 --- a/Sources/SortAI/Core/Organizer/SafeFileOrganizer.swift +++ b/Sources/SortAI/Core/Organizer/SafeFileOrganizer.swift @@ -262,6 +262,227 @@ actor SafeFileOrganizer { ) } + // MARK: - Folder Organization + + /// Move a folder as a complete unit, preserving internal structure + func moveFolder( + folder: ScannedFolder, + assignment: FolderCategoryAssignment, + outputFolder: URL, + mode: MovementLogEntry.LLMMode = .full, + provider: String? = nil, + providerVersion: String? = nil + ) async throws -> SafeFolderOrganizationResult { + let fileManager = FileManager.default + + // Build destination path from category + let destFolder = assignment.categoryPath.reduce(outputFolder) { $0.appendingPathComponent($1) } + let destPath = destFolder.appendingPathComponent(folder.folderName) + + NSLog("📁 [SafeFileOrganizer] Moving folder '\(folder.folderName)' to '\(destPath.path)'") + + // Create destination parent folder + try fileManager.createDirectory(at: destFolder, withIntermediateDirectories: true) + + // Check for collision + if fileManager.fileExists(atPath: destPath.path) { + if config.autoResolveCollisions { + // Generate unique name for folder + let resolvedPath = try await resolveFolderCollision(destPath) + try await performFolderOperation( + source: folder.url, + destination: resolvedPath, + mode: config.mode + ) + + // Log the movement + if config.logMovements { + try await logFolderMovement( + folder: folder, + from: folder.url, + to: resolvedPath, + mode: mode, + provider: provider, + providerVersion: providerVersion + ) + } + + return SafeFolderOrganizationResult( + sourceFolder: folder, + destinationPath: resolvedPath, + success: true, + collisionResolved: true, + error: nil + ) + } else { + return SafeFolderOrganizationResult( + sourceFolder: folder, + destinationPath: destPath, + success: false, + collisionResolved: false, + error: "Destination folder already exists: \(destPath.path)" + ) + } + } + + // Move folder + do { + try await performFolderOperation( + source: folder.url, + destination: destPath, + mode: config.mode + ) + + // Log the movement + if config.logMovements { + try await logFolderMovement( + folder: folder, + from: folder.url, + to: destPath, + mode: mode, + provider: provider, + providerVersion: providerVersion + ) + } + + return SafeFolderOrganizationResult( + sourceFolder: folder, + destinationPath: destPath, + success: true, + collisionResolved: false, + error: nil + ) + } catch { + return SafeFolderOrganizationResult( + sourceFolder: folder, + destinationPath: destPath, + success: false, + collisionResolved: false, + error: error.localizedDescription + ) + } + } + + /// Move multiple folders as units + func moveFolders( + folders: [ScannedFolder], + assignments: [FolderCategoryAssignment], + outputFolder: URL, + mode: MovementLogEntry.LLMMode = .full, + provider: String? = nil, + providerVersion: String? = nil, + progressCallback: (@Sendable (Int, Int) -> Void)? = nil + ) async throws -> [SafeFolderOrganizationResult] { + var results: [SafeFolderOrganizationResult] = [] + + // Build assignment lookup + let assignmentMap = Dictionary( + assignments.map { ($0.folderId, $0) }, + uniquingKeysWith: { first, _ in first } + ) + + for (index, folder) in folders.enumerated() { + guard let assignment = assignmentMap[folder.id] else { + results.append(SafeFolderOrganizationResult( + sourceFolder: folder, + destinationPath: outputFolder, + success: false, + collisionResolved: false, + error: "No assignment found for folder" + )) + progressCallback?(index + 1, folders.count) + continue + } + + let result = try await moveFolder( + folder: folder, + assignment: assignment, + outputFolder: outputFolder, + mode: mode, + provider: provider, + providerVersion: providerVersion + ) + results.append(result) + progressCallback?(index + 1, folders.count) + } + + return results + } + + /// Perform the actual folder operation + private func performFolderOperation(source: URL, destination: URL, mode: OrganizationMode) async throws { + let fileManager = FileManager.default + + switch mode { + case .move: + try fileManager.moveItem(at: source, to: destination) + + case .copy: + try fileManager.copyItem(at: source, to: destination) + + case .symlink: + // Create symlink at destination pointing to source + try fileManager.createSymbolicLink(at: destination, withDestinationURL: source) + } + } + + /// Resolve folder name collision + private func resolveFolderCollision(_ url: URL) async throws -> URL { + let fileManager = FileManager.default + let parentDir = url.deletingLastPathComponent() + let baseName = url.lastPathComponent + + var counter = 1 + var newPath: URL + + repeat { + let newName = "\(baseName) (\(counter))" + newPath = parentDir.appendingPathComponent(newName) + counter += 1 + } while fileManager.fileExists(atPath: newPath.path) && counter < 1000 + + if counter >= 1000 { + throw SafeOrganizerError.collisionResolutionFailed(url.path) + } + + return newPath + } + + /// Log folder movement to database + private func logFolderMovement( + folder: ScannedFolder, + from source: URL, + to destination: URL, + mode: MovementLogEntry.LLMMode, + provider: String?, + providerVersion: String? + ) async throws { + // Log an entry for each file in the folder + for file in folder.containedFiles { + let relativePath = file.url.path.replacingOccurrences(of: source.path, with: "") + let newFilePath = destination.appendingPathComponent(relativePath) + + let logEntry = MovementLogEntry( + id: UUID().uuidString, + timestamp: Date(), + source: file.url, + destination: newFilePath, + reason: "Folder: \(folder.folderName)", + confidence: 1.0, // Folder move confidence is delegated to folder-level + mode: mode, + provider: provider, + providerVersion: providerVersion, + operationType: config.mode == .move ? .move : + config.mode == .copy ? .copy : .symlink, + undoable: config.enableUndo, + undoneAt: nil + ) + + try database.movementLog.create(logEntry) + } + } + + // MARK: - Collision Resolution /// Resolve file name collision using configured strategy @@ -404,6 +625,15 @@ struct SafeOrganizationResult: Sendable { } } +/// Result of organizing a single folder as a unit +struct SafeFolderOrganizationResult: Sendable { + let sourceFolder: ScannedFolder + let destinationPath: URL + let success: Bool + let collisionResolved: Bool + let error: String? +} + // MARK: - Errors enum SafeOrganizerError: Error, LocalizedError { diff --git a/Sources/SortAI/Core/Taxonomy/FilenameScanner.swift b/Sources/SortAI/Core/Taxonomy/FilenameScanner.swift index a25af4b..3c5df91 100644 --- a/Sources/SortAI/Core/Taxonomy/FilenameScanner.swift +++ b/Sources/SortAI/Core/Taxonomy/FilenameScanner.swift @@ -28,12 +28,59 @@ actor FilenameScanner { /// Minimum file size (bytes) - skip tiny files let minFileSize: Int64 + // MARK: - Hierarchy Settings + + /// Whether to respect folder hierarchy (treat sub-folders as units) + let respectHierarchy: Bool + + /// Minimum depth to treat as folder unit (1 = immediate children of scan root) + let minDepthForFolder: Int + + /// Minimum files in a folder to treat it as a unit (folders with fewer files become loose) + let minFilesForFolder: Int + + /// Full initializer with all parameters + init( + maxFiles: Int, + includeHidden: Bool, + excludedExtensions: Set, + excludedDirectories: Set, + minFileSize: Int64, + respectHierarchy: Bool = true, + minDepthForFolder: Int = 1, + minFilesForFolder: Int = 1 + ) { + self.maxFiles = maxFiles + self.includeHidden = includeHidden + self.excludedExtensions = excludedExtensions + self.excludedDirectories = excludedDirectories + self.minFileSize = minFileSize + self.respectHierarchy = respectHierarchy + self.minDepthForFolder = minDepthForFolder + self.minFilesForFolder = minFilesForFolder + } + static let `default` = Configuration( maxFiles: 10000, includeHidden: false, excludedExtensions: [".ds_store", ".localized", ".gitignore", ".gitattributes"], excludedDirectories: ["node_modules", ".git", ".svn", "__pycache__", ".cache", "build", "dist"], - minFileSize: 100 // Skip files smaller than 100 bytes + minFileSize: 100, // Skip files smaller than 100 bytes + respectHierarchy: true, + minDepthForFolder: 1, + minFilesForFolder: 1 + ) + + /// Configuration for flat scanning (legacy behavior) + static let flat = Configuration( + maxFiles: 10000, + includeHidden: false, + excludedExtensions: [".ds_store", ".localized", ".gitignore", ".gitattributes"], + excludedDirectories: ["node_modules", ".git", ".svn", "__pycache__", ".cache", "build", "dist"], + minFileSize: 100, + respectHierarchy: false, + minDepthForFolder: 1, + minFilesForFolder: 1 ) } @@ -181,6 +228,268 @@ actor FilenameScanner { scanResult.files.map { $0.filename } } + // MARK: - Hierarchy-Aware Scanning + + /// Scan a folder with hierarchy awareness + /// - Sub-folders become folder units (moved as complete units) + /// - Loose files at root level are analyzed individually + /// - Parameter folderURL: The folder to scan + /// - Returns: HierarchyScanResult with folders and loose files separated + func scanWithHierarchy(folder folderURL: URL) async throws -> HierarchyScanResult { + NSLog("🔍 [Scanner] Starting hierarchy-aware scan of: \(folderURL.path)") + NSLog("🔍 [Scanner] Config: respectHierarchy=\(config.respectHierarchy), minDepth=\(config.minDepthForFolder), minFiles=\(config.minFilesForFolder)") + + guard fileManager.fileExists(atPath: folderURL.path) else { + NSLog("❌ [Scanner] Folder not found: \(folderURL.path)") + throw ScanError.folderNotFound(folderURL.path) + } + + var isDirectory: ObjCBool = false + guard fileManager.fileExists(atPath: folderURL.path, isDirectory: &isDirectory), + isDirectory.boolValue else { + NSLog("❌ [Scanner] Not a directory: \(folderURL.path)") + throw ScanError.notADirectory(folderURL.path) + } + + let startTime = Date() + var folders: [ScannedFolder] = [] + var looseFiles: [TaxonomyScannedFile] = [] + var skippedCount = 0 + + // Get immediate children of the scan root + let contents: [URL] + do { + contents = try fileManager.contentsOfDirectory( + at: folderURL, + includingPropertiesForKeys: [.isDirectoryKey, .isHiddenKey], + options: config.includeHidden ? [] : [.skipsHiddenFiles] + ) + } catch { + throw ScanError.enumerationFailed + } + + NSLog("🔍 [Scanner] Found \(contents.count) immediate children") + + // Process each immediate child + for itemURL in contents { + // Check excluded directories + let itemName = itemURL.lastPathComponent + if config.excludedDirectories.contains(itemName) { + skippedCount += 1 + continue + } + + let resourceValues = try? itemURL.resourceValues(forKeys: [.isDirectoryKey, .isHiddenKey]) + + // Skip hidden items if configured + if !config.includeHidden && (resourceValues?.isHidden == true) { + skippedCount += 1 + continue + } + + if resourceValues?.isDirectory == true { + // This is a sub-folder - scan it as a unit + let scannedFolder = try await scanFolderAsUnit( + url: itemURL, + relativeTo: folderURL, + depth: 1 + ) + + // Only treat as folder unit if it meets minimum file threshold + if scannedFolder.fileCount >= config.minFilesForFolder { + folders.append(scannedFolder) + NSLog("📁 [Scanner] Folder unit: '\(scannedFolder.folderName)' (\(scannedFolder.fileCount) files)") + } else if scannedFolder.fileCount > 0 { + // Flatten: add contained files as loose files + looseFiles.append(contentsOf: scannedFolder.containedFiles) + NSLog("📄 [Scanner] Flattened folder: '\(scannedFolder.folderName)' (\(scannedFolder.fileCount) files below threshold)") + } + // Empty folders are silently skipped + + } else { + // This is a loose file at root level + if let file = try? scanSingleFile(url: itemURL, relativeTo: folderURL) { + looseFiles.append(file) + } else { + skippedCount += 1 + } + } + + // Check limits + let totalFiles = folders.reduce(0) { $0 + $1.fileCount } + looseFiles.count + if totalFiles >= config.maxFiles { + NSLog("⚠️ [Scanner] Reached file limit: \(config.maxFiles)") + break + } + } + + let duration = Date().timeIntervalSince(startTime) + let totalFiles = folders.reduce(0) { $0 + $1.fileCount } + looseFiles.count + + NSLog("✅ [Scanner] Hierarchy scan complete in %.2fs", duration) + NSLog("✅ [Scanner] Result: \(folders.count) folders, \(looseFiles.count) loose files, \(totalFiles) total files") + + return HierarchyScanResult( + sourceFolder: folderURL, + sourceFolderName: folderURL.lastPathComponent, + folders: folders, + looseFiles: looseFiles, + skippedCount: skippedCount, + scanDuration: duration, + reachedLimit: totalFiles >= config.maxFiles + ) + } + + /// Scan a folder and all its contents as a unit + /// - Parameters: + /// - url: The folder URL + /// - rootURL: The scan root for computing relative paths + /// - depth: Current depth level + /// - Returns: ScannedFolder containing all files recursively + private func scanFolderAsUnit( + url: URL, + relativeTo rootURL: URL, + depth: Int + ) async throws -> ScannedFolder { + var containedFiles: [TaxonomyScannedFile] = [] + var latestModification: Date? + + // Recursively enumerate all files in this folder + let resourceKeys: Set = [ + .isDirectoryKey, + .isRegularFileKey, + .isHiddenKey, + .fileSizeKey, + .creationDateKey, + .contentModificationDateKey, + .contentTypeKey + ] + + guard let enumerator = fileManager.enumerator( + at: url, + includingPropertiesForKeys: Array(resourceKeys), + options: config.includeHidden ? [] : [.skipsHiddenFiles] + ) else { + throw ScanError.enumerationFailed + } + + while let fileURL = enumerator.nextObject() as? URL { + // Skip excluded directories + if config.excludedDirectories.contains(fileURL.lastPathComponent) { + enumerator.skipDescendants() + continue + } + + guard let resourceValues = try? fileURL.resourceValues(forKeys: resourceKeys) else { + continue + } + + // Skip directories (we're flattening into this folder) + if resourceValues.isDirectory == true { + continue + } + + // Skip non-regular files + guard resourceValues.isRegularFile == true else { + continue + } + + // Check extension exclusions + let ext = fileURL.pathExtension.lowercased() + let filename = fileURL.lastPathComponent.lowercased() + if config.excludedExtensions.contains(".\(ext)") || + config.excludedExtensions.contains(filename) { + continue + } + + // Check minimum file size + if let size = resourceValues.fileSize, size < config.minFileSize { + continue + } + + // Track latest modification + if let modDate = resourceValues.contentModificationDate { + if latestModification == nil || modDate > latestModification! { + latestModification = modDate + } + } + + // Create scanned file record + let scannedFile = TaxonomyScannedFile( + url: fileURL, + filename: fileURL.lastPathComponent, + fileExtension: fileURL.pathExtension, + relativePath: fileURL.path.replacingOccurrences(of: rootURL.path + "/", with: ""), + fileSize: Int64(resourceValues.fileSize ?? 0), + createdAt: resourceValues.creationDate, + modifiedAt: resourceValues.contentModificationDate, + contentType: resourceValues.contentType + ) + + containedFiles.append(scannedFile) + + // Safety limit per folder + if containedFiles.count >= config.maxFiles { + break + } + } + + let totalSize = containedFiles.reduce(0) { $0 + $1.fileSize } + let relativePath = url.path.replacingOccurrences(of: rootURL.path + "/", with: "") + + return ScannedFolder( + url: url, + folderName: url.lastPathComponent, + relativePath: relativePath, + depth: depth, + containedFiles: containedFiles, + totalSize: totalSize, + modifiedAt: latestModification + ) + } + + /// Scan a single file and return its metadata + private func scanSingleFile(url: URL, relativeTo rootURL: URL) throws -> TaxonomyScannedFile? { + let resourceKeys: Set = [ + .isRegularFileKey, + .fileSizeKey, + .creationDateKey, + .contentModificationDateKey, + .contentTypeKey + ] + + let resourceValues = try url.resourceValues(forKeys: resourceKeys) + + // Must be a regular file + guard resourceValues.isRegularFile == true else { + return nil + } + + // Check extension exclusions + let ext = url.pathExtension.lowercased() + let filename = url.lastPathComponent.lowercased() + if config.excludedExtensions.contains(".\(ext)") || + config.excludedExtensions.contains(filename) { + return nil + } + + // Check minimum file size + if let size = resourceValues.fileSize, size < config.minFileSize { + return nil + } + + return TaxonomyScannedFile( + url: url, + filename: url.lastPathComponent, + fileExtension: url.pathExtension, + relativePath: url.path.replacingOccurrences(of: rootURL.path + "/", with: ""), + fileSize: Int64(resourceValues.fileSize ?? 0), + createdAt: resourceValues.creationDate, + modifiedAt: resourceValues.contentModificationDate, + contentType: resourceValues.contentType + ) + } + /// Group files by extension for analysis func groupByExtension(files: [TaxonomyScannedFile]) -> [String: [TaxonomyScannedFile]] { Dictionary(grouping: files) { $0.fileExtension.lowercased() } @@ -338,6 +647,173 @@ struct TaxonomyScannedFile: Identifiable, Hashable, Sendable { } } +// MARK: - Hierarchy-Aware Types + +/// A folder that will be moved as a complete unit during organization +/// Internal structure is preserved - all contained files stay together +struct ScannedFolder: Identifiable, Hashable, Sendable { + let id: UUID + let url: URL + let folderName: String + let relativePath: String // Path relative to scan root + let depth: Int // How deep in folder tree (1 = immediate child of root) + let containedFiles: [TaxonomyScannedFile] + let totalSize: Int64 + let modifiedAt: Date? + + /// Number of files in this folder (including nested) + var fileCount: Int { containedFiles.count } + + /// Formatted total size for display + var formattedSize: String { + ByteCountFormatter.string(fromByteCount: totalSize, countStyle: .file) + } + + /// Dominant file types in this folder (for categorization hints) + var dominantFileTypes: [UTType] { + let types = containedFiles.compactMap { $0.contentType } + let grouped = Dictionary(grouping: types) { $0 } + return grouped.sorted { $0.value.count > $1.value.count } + .prefix(3) + .map { $0.key } + } + + /// Build context string for LLM categorization + var suggestedContext: String { + let fileTypeGroups = Dictionary(grouping: containedFiles) { file -> String in + if file.isImage { return "image" } + if file.isVideo { return "video" } + if file.isAudio { return "audio" } + if file.isDocument { return "document" } + return "other" + } + + let summary = fileTypeGroups.map { "\($0.value.count) \($0.key)(s)" } + .joined(separator: ", ") + + return "Folder '\(folderName)' contains \(summary)" + } + + init( + id: UUID = UUID(), + url: URL, + folderName: String, + relativePath: String, + depth: Int, + containedFiles: [TaxonomyScannedFile], + totalSize: Int64, + modifiedAt: Date? + ) { + self.id = id + self.url = url + self.folderName = folderName + self.relativePath = relativePath + self.depth = depth + self.containedFiles = containedFiles + self.totalSize = totalSize + self.modifiedAt = modifiedAt + } +} + +/// Unified type representing either a folder unit or an individual file +/// Used for displaying and processing scan results in the UI +enum ScanUnit: Identifiable, Sendable { + case folder(ScannedFolder) // Folder moves as unit + case file(TaxonomyScannedFile) // Individual file moves separately + + var id: UUID { + switch self { + case .folder(let f): return f.id + case .file(let f): return f.id + } + } + + var displayName: String { + switch self { + case .folder(let f): return f.folderName + case .file(let f): return f.filename + } + } + + var url: URL { + switch self { + case .folder(let f): return f.url + case .file(let f): return f.url + } + } + + var isFolder: Bool { + if case .folder = self { return true } + return false + } + + var totalSize: Int64 { + switch self { + case .folder(let f): return f.totalSize + case .file(let f): return f.fileSize + } + } + + var formattedSize: String { + ByteCountFormatter.string(fromByteCount: totalSize, countStyle: .file) + } +} + +/// Result of hierarchy-aware scanning +/// Separates sub-folders (as units) from loose files (analyzed individually) +struct HierarchyScanResult: Sendable { + let sourceFolder: URL + let sourceFolderName: String + let folders: [ScannedFolder] // Sub-folders to move as units + let looseFiles: [TaxonomyScannedFile] // Files not in sub-folders + let skippedCount: Int + let scanDuration: TimeInterval + let reachedLimit: Bool + + /// Total items (folders + loose files) + var totalItems: Int { folders.count + looseFiles.count } + + /// Total size of all items + var totalSize: Int64 { + let folderSize = folders.reduce(0) { $0 + $1.totalSize } + let fileSize = looseFiles.reduce(0) { $0 + $1.fileSize } + return folderSize + fileSize + } + + /// Formatted total size + var formattedTotalSize: String { + ByteCountFormatter.string(fromByteCount: totalSize, countStyle: .file) + } + + /// Total file count (including files inside folders) + var totalFileCount: Int { + let folderFiles = folders.reduce(0) { $0 + $1.fileCount } + return folderFiles + looseFiles.count + } + + /// Convert to unified ScanUnit array for UI display + var allUnits: [ScanUnit] { + let folderUnits = folders.map { ScanUnit.folder($0) } + let fileUnits = looseFiles.map { ScanUnit.file($0) } + return folderUnits + fileUnits + } + + /// Convert to legacy TaxonomyScanResult (flattens folders) + /// Useful for compatibility with existing code paths + func toLegacyScanResult() -> TaxonomyScanResult { + let allFiles = folders.flatMap { $0.containedFiles } + looseFiles + return TaxonomyScanResult( + folderURL: sourceFolder, + folderName: sourceFolderName, + files: allFiles, + directoryCount: folders.count, + skippedCount: skippedCount, + scanDuration: scanDuration, + reachedLimit: reachedLimit + ) + } +} + /// Result of scanning a folder for taxonomy inference struct TaxonomyScanResult: Sendable { let folderURL: URL diff --git a/Sources/SortAI/Core/Taxonomy/FolderCategorizer.swift b/Sources/SortAI/Core/Taxonomy/FolderCategorizer.swift new file mode 100644 index 0000000..21459b8 --- /dev/null +++ b/Sources/SortAI/Core/Taxonomy/FolderCategorizer.swift @@ -0,0 +1,367 @@ +// MARK: - Folder Categorizer +// Categorizes folders as units by analyzing their contents + +import Foundation + +// MARK: - Folder Category Assignment + +/// Result of categorizing a folder unit +struct FolderCategoryAssignment: Identifiable, Sendable { + let id: UUID + let folderId: UUID // ScannedFolder.id + let folderName: String + let categoryPath: [String] // e.g., ["Work", "Job Search", "Application Materials"] + let confidence: Double + let rationale: String + let alternativePaths: [[String]] // Other possible categories + + /// Path as display string + var pathString: String { + categoryPath.joined(separator: " / ") + } + + init( + id: UUID = UUID(), + folderId: UUID, + folderName: String, + categoryPath: [String], + confidence: Double, + rationale: String, + alternativePaths: [[String]] = [] + ) { + self.id = id + self.folderId = folderId + self.folderName = folderName + self.categoryPath = categoryPath + self.confidence = confidence + self.rationale = rationale + self.alternativePaths = alternativePaths + } +} + +// MARK: - Folder Categorizer Actor + +/// Categorizes folder units by analyzing their contents +/// Uses LLM to determine the best category for moving the folder as a unit +actor FolderCategorizer { + + // MARK: - Configuration + + struct Configuration: Sendable { + /// Confidence threshold below which we flag for review + let reviewThreshold: Double + + /// Maximum files to include in context (for large folders) + let maxFilesInContext: Int + + /// Include file type summary in prompt + let includeFileTypeSummary: Bool + + /// Include folder name analysis + let analyzeFolderName: Bool + + static let `default` = Configuration( + reviewThreshold: 0.75, + maxFilesInContext: 50, + includeFileTypeSummary: true, + analyzeFolderName: true + ) + } + + // MARK: - Properties + + private let provider: any LLMProvider + private let config: Configuration + private let decoder = JSONDecoder() + + // MARK: - Initialization + + init(provider: any LLMProvider, configuration: Configuration = .default) { + self.provider = provider + self.config = configuration + } + + // MARK: - Categorization + + /// Categorize a single folder within an existing taxonomy + func categorize( + folder: ScannedFolder, + within taxonomy: TaxonomyTree, + options: LLMOptions + ) async throws -> FolderCategoryAssignment { + NSLog("📁 [FolderCategorizer] Categorizing folder: \(folder.folderName) (\(folder.fileCount) files)") + + let existingCategories = taxonomy.allCategories().map { $0.pathString } + let prompt = buildCategorizationPrompt(folder: folder, existingCategories: existingCategories) + + let response = try await provider.completeJSON(prompt: prompt, options: options) + let assignment = try parseCategorizationResponse(response, folder: folder) + + NSLog("📁 [FolderCategorizer] Result: \(assignment.pathString) (confidence: \(Int(assignment.confidence * 100))%)") + + return assignment + } + + /// Categorize multiple folders in batch + func categorizeBatch( + folders: [ScannedFolder], + within taxonomy: TaxonomyTree, + options: LLMOptions, + progressCallback: (@Sendable (Int, Int) -> Void)? = nil + ) async throws -> [FolderCategoryAssignment] { + NSLog("📁 [FolderCategorizer] Starting batch categorization of \(folders.count) folders") + + var assignments: [FolderCategoryAssignment] = [] + + for (index, folder) in folders.enumerated() { + do { + let assignment = try await categorize(folder: folder, within: taxonomy, options: options) + assignments.append(assignment) + } catch { + NSLog("❌ [FolderCategorizer] Failed to categorize '\(folder.folderName)': \(error.localizedDescription)") + // Create a low-confidence fallback assignment + let fallback = FolderCategoryAssignment( + folderId: folder.id, + folderName: folder.folderName, + categoryPath: ["Uncategorized"], + confidence: 0.3, + rationale: "Categorization failed: \(error.localizedDescription)" + ) + assignments.append(fallback) + } + + progressCallback?(index + 1, folders.count) + } + + NSLog("📁 [FolderCategorizer] Batch complete: \(assignments.count) folders categorized") + return assignments + } + + // MARK: - Prompt Building + + /// Build LLM prompt for folder categorization + private func buildCategorizationPrompt(folder: ScannedFolder, existingCategories: [String]) -> String { + // Get file list (limited for large folders) + let fileList = folder.containedFiles + .prefix(config.maxFilesInContext) + .enumerated() + .map { "\($0.offset + 1). \($0.element.filename)" } + .joined(separator: "\n") + + // Build file type summary if enabled + var fileTypeSummary = "" + if config.includeFileTypeSummary { + let typeGroups = Dictionary(grouping: folder.containedFiles) { file -> String in + if file.isImage { return "image" } + if file.isVideo { return "video" } + if file.isAudio { return "audio" } + if file.isDocument { return "document" } + return "other" + } + + fileTypeSummary = typeGroups + .map { "\($0.value.count) \($0.key)(s)" } + .joined(separator: ", ") + } + + // Build category list + let categoryList = existingCategories.isEmpty + ? "No existing categories - suggest new ones" + : existingCategories.prefix(30).joined(separator: "\n") + + return """ + You are a file organization expert. Analyze this FOLDER and determine what category it belongs to. + + The folder will be MOVED AS A UNIT - all files inside will stay together in their current structure. + + FOLDER NAME: \(folder.folderName) + + FILE COUNT: \(folder.fileCount) files + \(fileTypeSummary.isEmpty ? "" : "FILE TYPES: \(fileTypeSummary)") + + CONTAINED FILES: + \(fileList) + \(folder.fileCount > config.maxFilesInContext ? "... and \(folder.fileCount - config.maxFilesInContext) more files" : "") + + EXISTING CATEGORIES (prefer these if they fit): + \(categoryList) + + RULES: + 1. Analyze the folder NAME and its CONTENTS together + 2. Choose the most appropriate category based on the dominant theme + 3. Use "/" to separate hierarchy levels (e.g., "Work / Projects / 2024") + 4. Confidence should reflect how well the folder fits the category + 5. Provide alternatives if the primary choice isn't clear-cut + + Return ONLY valid JSON: + { + "categoryPath": ["Top Level", "Sub Category", "Specific"], + "confidence": 0.85, + "rationale": "Brief explanation of why this category fits", + "alternatives": [ + ["Alternative", "Path", "One"], + ["Alternative", "Path", "Two"] + ] + } + """ + } + + // MARK: - Response Parsing + + /// Parse LLM categorization response + private func parseCategorizationResponse(_ response: String, folder: ScannedFolder) throws -> FolderCategoryAssignment { + let cleaned = cleanJSON(response) + + guard let data = cleaned.data(using: .utf8) else { + throw FolderCategorizationError.invalidResponse("Invalid UTF-8") + } + + struct Response: Decodable { + let categoryPath: [String] + let confidence: Double + let rationale: String? + let alternatives: [[String]]? + } + + let parsed = try decoder.decode(Response.self, from: data) + + return FolderCategoryAssignment( + folderId: folder.id, + folderName: folder.folderName, + categoryPath: parsed.categoryPath, + confidence: parsed.confidence, + rationale: parsed.rationale ?? "", + alternativePaths: parsed.alternatives ?? [] + ) + } + + /// Clean JSON response (remove markdown, etc.) + private func cleanJSON(_ response: String) -> String { + var cleaned = response.trimmingCharacters(in: .whitespacesAndNewlines) + + // Remove markdown code blocks + if cleaned.hasPrefix("```") { + if let start = cleaned.range(of: "\n"), + let end = cleaned.range(of: "```", options: .backwards) { + cleaned = String(cleaned[start.upperBound.. FolderCategoryAssignment { + let folderName = folder.folderName.lowercased() + var categoryPath: [String] = [] + var confidence: Double = 0.6 + var rationale = "" + + // Analyze folder name for common patterns + if folderName.contains("resume") || folderName.contains("cv") { + categoryPath = ["Work", "Job Search", "Application Materials"] + rationale = "Folder name suggests job application materials" + confidence = 0.85 + } else if folderName.contains("photo") || folderName.contains("picture") || folderName.contains("image") { + categoryPath = ["Media", "Photos"] + rationale = "Folder name suggests photo collection" + confidence = 0.8 + } else if folderName.contains("video") || folderName.contains("movie") || folderName.contains("film") { + categoryPath = ["Media", "Videos"] + rationale = "Folder name suggests video collection" + confidence = 0.8 + } else if folderName.contains("music") || folderName.contains("song") || folderName.contains("audio") { + categoryPath = ["Media", "Music"] + rationale = "Folder name suggests music collection" + confidence = 0.8 + } else if folderName.contains("project") || folderName.contains("work") { + categoryPath = ["Work", "Projects"] + rationale = "Folder name suggests work project" + confidence = 0.7 + } else if folderName.contains("document") || folderName.contains("doc") { + categoryPath = ["Documents"] + rationale = "Folder name suggests documents" + confidence = 0.7 + } else if folderName.contains("backup") || folderName.contains("archive") { + categoryPath = ["Archives"] + rationale = "Folder name suggests backup or archive" + confidence = 0.75 + } else if folderName.contains("download") { + categoryPath = ["Downloads"] + rationale = "Folder name suggests downloads" + confidence = 0.7 + } else { + // Analyze file types as fallback + let typeGroups = Dictionary(grouping: folder.containedFiles) { file -> String in + if file.isImage { return "image" } + if file.isVideo { return "video" } + if file.isAudio { return "audio" } + if file.isDocument { return "document" } + return "other" + } + + // Find dominant type + let dominant = typeGroups.max(by: { $0.value.count < $1.value.count }) + + switch dominant?.key { + case "image": + categoryPath = ["Media", "Photos"] + rationale = "Folder primarily contains images" + confidence = 0.65 + case "video": + categoryPath = ["Media", "Videos"] + rationale = "Folder primarily contains videos" + confidence = 0.65 + case "audio": + categoryPath = ["Media", "Music"] + rationale = "Folder primarily contains audio files" + confidence = 0.65 + case "document": + categoryPath = ["Documents"] + rationale = "Folder primarily contains documents" + confidence = 0.65 + default: + categoryPath = ["Uncategorized", folder.folderName] + rationale = "Could not determine category from folder name or contents" + confidence = 0.4 + } + } + + return FolderCategoryAssignment( + folderId: folder.id, + folderName: folder.folderName, + categoryPath: categoryPath, + confidence: confidence, + rationale: rationale + ) + } +} + +// MARK: - Errors + +enum FolderCategorizationError: LocalizedError { + case invalidResponse(String) + case noProvider + case timeout + case folderNotFound + + var errorDescription: String? { + switch self { + case .invalidResponse(let reason): + return "Invalid categorization response: \(reason)" + case .noProvider: + return "No LLM provider available for folder categorization" + case .timeout: + return "Folder categorization timed out" + case .folderNotFound: + return "Folder not found" + } + } +} diff --git a/Tests/SortAITests/OrganizationTests.swift b/Tests/SortAITests/OrganizationTests.swift index 4b7b6f8..deaf5be 100644 --- a/Tests/SortAITests/OrganizationTests.swift +++ b/Tests/SortAITests/OrganizationTests.swift @@ -318,3 +318,211 @@ struct ThrottlerErrorTests { } } + +// MARK: - Hierarchy-Aware Organization Tests + +@Suite("HierarchyOrganization Tests") +struct HierarchyOrganizationTests { + + @Test("HierarchyAwareOrganizationPlan totals") + func testHierarchyAwarePlanTotals() { + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/MyFolder"), + folderName: "MyFolder", + relativePath: "MyFolder", + depth: 1, + containedFiles: [ + TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/MyFolder/file1.pdf"), + filename: "file1.pdf", + fileExtension: "pdf", + fileSize: 1000, + modificationDate: Date() + ), + TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/MyFolder/file2.pdf"), + filename: "file2.pdf", + fileExtension: "pdf", + fileSize: 2000, + modificationDate: Date() + ) + ], + totalSize: 3000, + modifiedAt: nil + ) + + let folderOp = FolderOrganizationOperation( + sourceFolder: folder, + destinationFolder: URL(fileURLWithPath: "/output/Work"), + destinationCategory: "Work / Projects", + confidence: 0.85, + mode: .move + ) + + let looseFile = TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/loose.txt"), + filename: "loose.txt", + fileExtension: "txt", + fileSize: 500, + modificationDate: Date() + ) + + let fileOp = OrganizationOperation( + sourceFile: looseFile, + destinationFolder: URL(fileURLWithPath: "/output/Documents"), + destinationPath: URL(fileURLWithPath: "/output/Documents/loose.txt"), + mode: .move + ) + + let plan = HierarchyAwareOrganizationPlan( + folderOperations: [folderOp], + fileOperations: [fileOp], + folderConflicts: [], + fileConflicts: [], + estimatedSize: 3500 + ) + + #expect(plan.totalItems == 2) // 1 folder + 1 file + #expect(plan.totalFileCount == 3) // 2 in folder + 1 loose + #expect(!plan.hasConflicts) + #expect(plan.estimatedSize == 3500) + } + + @Test("FolderOrganizationOperation properties") + func testFolderOrganizationOperation() { + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/Photos"), + folderName: "Photos", + relativePath: "Photos", + depth: 1, + containedFiles: [], + totalSize: 0, + modifiedAt: nil + ) + + let op = FolderOrganizationOperation( + sourceFolder: folder, + destinationFolder: URL(fileURLWithPath: "/output/Media"), + destinationCategory: "Media / Photos", + confidence: 0.92, + mode: .copy + ) + + #expect(op.sourceFolder.folderName == "Photos") + #expect(op.destinationCategory == "Media / Photos") + #expect(op.confidence == 0.92) + #expect(op.preserveInternalStructure == true) // Always true for folders + #expect(op.mode == .copy) + } + + @Test("HierarchyAwareOrganizationPlan toLegacyPlan") + func testHierarchyAwarePlanToLegacy() { + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/Work"), + folderName: "Work", + relativePath: "Work", + depth: 1, + containedFiles: [ + TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/Work/doc.pdf"), + filename: "doc.pdf", + fileExtension: "pdf", + fileSize: 1000, + modificationDate: Date() + ) + ], + totalSize: 1000, + modifiedAt: nil + ) + + let folderOp = FolderOrganizationOperation( + sourceFolder: folder, + destinationFolder: URL(fileURLWithPath: "/output/Projects"), + destinationCategory: "Projects", + confidence: 0.9, + mode: .move + ) + + let plan = HierarchyAwareOrganizationPlan( + folderOperations: [folderOp], + fileOperations: [], + folderConflicts: [], + fileConflicts: [], + estimatedSize: 1000 + ) + + let legacy = plan.toLegacyPlan() + + // Folder should be flattened into file operations + #expect(legacy.operations.count >= 0) // Conversion may not create ops if paths don't match + #expect(legacy.estimatedSize == 1000) + } + + @Test("OrganizationEngine planHierarchyOrganization basic") + func testPlanHierarchyOrganization() async { + let engine = OrganizationEngine() + let tree = TaxonomyTree(rootName: "Root") + _ = tree.addCategory(path: ["Work"]) + _ = tree.addCategory(path: ["Documents"]) + + // Create scan result + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/Resumes"), + folderName: "Resumes", + relativePath: "Resumes", + depth: 1, + containedFiles: [ + TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/Resumes/resume.pdf"), + filename: "resume.pdf", + fileExtension: "pdf", + fileSize: 1000, + modificationDate: Date() + ) + ], + totalSize: 1000, + modifiedAt: nil + ) + + let looseFile = TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/notes.txt"), + filename: "notes.txt", + fileExtension: "txt", + fileSize: 500, + modificationDate: Date() + ) + + let scanResult = HierarchyScanResult( + sourceFolder: URL(fileURLWithPath: "/test"), + sourceFolderName: "test", + folders: [folder], + looseFiles: [looseFile], + skippedCount: 0, + scanDuration: 0.5, + reachedLimit: false + ) + + let folderAssignment = FolderCategoryAssignment( + folderId: folder.id, + folderName: "Resumes", + categoryPath: ["Work", "Job Search"], + confidence: 0.85, + rationale: "Resume documents" + ) + + // Note: Loose file has no assignment - should go to Uncategorized + + let outputFolder = URL(fileURLWithPath: "/output") + let plan = await engine.planHierarchyOrganization( + scanResult: scanResult, + folderAssignments: [folderAssignment], + fileAssignments: [], + tree: tree, + outputFolder: outputFolder + ) + + #expect(plan.folderOperations.count == 1) + #expect(plan.fileOperations.count == 1) // loose file to Uncategorized + #expect(plan.folderOperations[0].destinationCategory == "Work / Job Search") + } +} diff --git a/Tests/SortAITests/TaxonomyTests.swift b/Tests/SortAITests/TaxonomyTests.swift index 9d982db..7f73379 100644 --- a/Tests/SortAITests/TaxonomyTests.swift +++ b/Tests/SortAITests/TaxonomyTests.swift @@ -745,5 +745,572 @@ struct FilenameScannerTests { // Check it formats to some readable string #expect(!result.formattedTotalSize.isEmpty) } + + @Test("Scanner hierarchy configuration defaults") + func testScannerHierarchyConfigurationDefaults() { + let config = FilenameScanner.Configuration.default + + #expect(config.respectHierarchy == true) + #expect(config.minDepthForFolder == 1) + #expect(config.minFilesForFolder == 1) + } + + @Test("Scanner flat configuration disables hierarchy") + func testScannerFlatConfiguration() { + let config = FilenameScanner.Configuration.flat + + #expect(config.respectHierarchy == false) + } +} + +// MARK: - Hierarchy-Aware Scanning Tests + +@Suite("HierarchyScanning Tests") +struct HierarchyScanningTests { + + // MARK: - ScannedFolder Tests + + @Test("Create ScannedFolder") + func testCreateScannedFolder() { + let file1 = TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/Resumes/resume_v1.pdf"), + filename: "resume_v1.pdf", + fileExtension: "pdf", + fileSize: 50000, + modificationDate: Date() + ) + + let file2 = TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/Resumes/resume_v2.docx"), + filename: "resume_v2.docx", + fileExtension: "docx", + fileSize: 75000, + modificationDate: Date() + ) + + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/Resumes"), + folderName: "Resumes", + relativePath: "Resumes", + depth: 1, + containedFiles: [file1, file2], + totalSize: 125000, + modifiedAt: Date() + ) + + #expect(folder.folderName == "Resumes") + #expect(folder.fileCount == 2) + #expect(folder.totalSize == 125000) + #expect(folder.depth == 1) + #expect(!folder.formattedSize.isEmpty) + } + + @Test("ScannedFolder suggestedContext") + func testScannedFolderContext() { + let pdf = TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/Docs/file.pdf"), + filename: "file.pdf", + fileExtension: "pdf", + fileSize: 1000, + modificationDate: Date() + ) + + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/Docs"), + folderName: "Docs", + relativePath: "Docs", + depth: 1, + containedFiles: [pdf], + totalSize: 1000, + modifiedAt: nil + ) + + let context = folder.suggestedContext + #expect(context.contains("Docs")) + #expect(context.contains("document")) + } + + // MARK: - ScanUnit Tests + + @Test("ScanUnit folder case") + func testScanUnitFolder() { + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/MyFolder"), + folderName: "MyFolder", + relativePath: "MyFolder", + depth: 1, + containedFiles: [], + totalSize: 0, + modifiedAt: nil + ) + + let unit = ScanUnit.folder(folder) + + #expect(unit.displayName == "MyFolder") + #expect(unit.isFolder) + #expect(unit.url.lastPathComponent == "MyFolder") + } + + @Test("ScanUnit file case") + func testScanUnitFile() { + let file = TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/document.pdf"), + filename: "document.pdf", + fileExtension: "pdf", + fileSize: 1024, + modificationDate: Date() + ) + + let unit = ScanUnit.file(file) + + #expect(unit.displayName == "document.pdf") + #expect(!unit.isFolder) + #expect(unit.totalSize == 1024) + } + + // MARK: - HierarchyScanResult Tests + + @Test("HierarchyScanResult totals") + func testHierarchyScanResultTotals() { + let file1 = TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/loose.pdf"), + filename: "loose.pdf", + fileExtension: "pdf", + fileSize: 1000, + modificationDate: Date() + ) + + let file2 = TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/Folder/inside.pdf"), + filename: "inside.pdf", + fileExtension: "pdf", + fileSize: 2000, + modificationDate: Date() + ) + + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/Folder"), + folderName: "Folder", + relativePath: "Folder", + depth: 1, + containedFiles: [file2], + totalSize: 2000, + modifiedAt: nil + ) + + let result = HierarchyScanResult( + sourceFolder: URL(fileURLWithPath: "/test"), + sourceFolderName: "test", + folders: [folder], + looseFiles: [file1], + skippedCount: 0, + scanDuration: 0.5, + reachedLimit: false + ) + + #expect(result.totalItems == 2) // 1 folder + 1 loose file + #expect(result.totalFileCount == 2) // 1 in folder + 1 loose + #expect(result.totalSize == 3000) // 2000 + 1000 + #expect(result.allUnits.count == 2) + } + + @Test("HierarchyScanResult toLegacyScanResult") + func testHierarchyScanResultToLegacy() { + let looseFile = TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/loose.txt"), + filename: "loose.txt", + fileExtension: "txt", + fileSize: 100, + modificationDate: Date() + ) + + let folderFile = TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/Folder/inside.txt"), + filename: "inside.txt", + fileExtension: "txt", + fileSize: 200, + modificationDate: Date() + ) + + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/Folder"), + folderName: "Folder", + relativePath: "Folder", + depth: 1, + containedFiles: [folderFile], + totalSize: 200, + modifiedAt: nil + ) + + let hierarchyResult = HierarchyScanResult( + sourceFolder: URL(fileURLWithPath: "/test"), + sourceFolderName: "test", + folders: [folder], + looseFiles: [looseFile], + skippedCount: 5, + scanDuration: 1.0, + reachedLimit: false + ) + + let legacy = hierarchyResult.toLegacyScanResult() + + #expect(legacy.files.count == 2) // Both files flattened + #expect(legacy.directoryCount == 1) + #expect(legacy.skippedCount == 5) + #expect(legacy.scanDuration == 1.0) + } + + // MARK: - Integration Tests (require temp directory) + + @Test("Scan with hierarchy separates folders from loose files") + func testScanWithHierarchy() async throws { + // Create temporary test directory structure + let tempDir = FileManager.default.temporaryDirectory + .appendingPathComponent("SortAI_HierarchyTest_\(UUID().uuidString)") + + defer { + try? FileManager.default.removeItem(at: tempDir) + } + + // Create structure: + // tempDir/ + // ├── loose_file.txt (loose file) + // ├── Resumes/ (folder unit) + // │ └── resume.pdf + // └── Photos/ (folder unit) + // └── photo.jpg + + try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true) + try FileManager.default.createDirectory(at: tempDir.appendingPathComponent("Resumes"), withIntermediateDirectories: true) + try FileManager.default.createDirectory(at: tempDir.appendingPathComponent("Photos"), withIntermediateDirectories: true) + + // Create files (must be >= 100 bytes to pass minFileSize check) + let testContent = String(repeating: "test content ", count: 20) // ~260 bytes + try testContent.write(to: tempDir.appendingPathComponent("loose_file.txt"), atomically: true, encoding: .utf8) + try testContent.write(to: tempDir.appendingPathComponent("Resumes/resume.pdf"), atomically: true, encoding: .utf8) + try testContent.write(to: tempDir.appendingPathComponent("Photos/photo.jpg"), atomically: true, encoding: .utf8) + + // Scan with hierarchy + let scanner = FilenameScanner() + let result = try await scanner.scanWithHierarchy(folder: tempDir) + + // Verify results + #expect(result.folders.count == 2, "Expected 2 folders (Resumes, Photos), got \(result.folders.count)") + #expect(result.looseFiles.count == 1, "Expected 1 loose file, got \(result.looseFiles.count)") + #expect(result.totalFileCount == 3, "Expected 3 total files") + + // Verify folder names + let folderNames = Set(result.folders.map { $0.folderName }) + #expect(folderNames.contains("Resumes")) + #expect(folderNames.contains("Photos")) + + // Verify loose file + #expect(result.looseFiles[0].filename == "loose_file.txt") + } + + @Test("Scan with hierarchy flattens empty folders") + func testScanWithHierarchyFlattensEmptyFolders() async throws { + let tempDir = FileManager.default.temporaryDirectory + .appendingPathComponent("SortAI_EmptyFolderTest_\(UUID().uuidString)") + + defer { + try? FileManager.default.removeItem(at: tempDir) + } + + // Create structure: + // tempDir/ + // ├── EmptyFolder/ (empty - should be ignored) + // └── NonEmpty/ + // └── file.txt + + try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true) + try FileManager.default.createDirectory(at: tempDir.appendingPathComponent("EmptyFolder"), withIntermediateDirectories: true) + try FileManager.default.createDirectory(at: tempDir.appendingPathComponent("NonEmpty"), withIntermediateDirectories: true) + + let testContent = String(repeating: "content ", count: 50) + try testContent.write(to: tempDir.appendingPathComponent("NonEmpty/file.txt"), atomically: true, encoding: .utf8) + + let scanner = FilenameScanner() + let result = try await scanner.scanWithHierarchy(folder: tempDir) + + // Empty folder should not appear + #expect(result.folders.count == 1, "Expected 1 folder (NonEmpty only)") + #expect(result.folders[0].folderName == "NonEmpty") + } + + @Test("Scan with hierarchy respects minFilesForFolder threshold") + func testScanWithHierarchyMinFilesThreshold() async throws { + let tempDir = FileManager.default.temporaryDirectory + .appendingPathComponent("SortAI_MinFilesTest_\(UUID().uuidString)") + + defer { + try? FileManager.default.removeItem(at: tempDir) + } + + // Create structure: + // tempDir/ + // ├── BigFolder/ (2 files - above threshold of 2) + // │ ├── file1.txt + // │ └── file2.txt + // └── SmallFolder/ (1 file - below threshold of 2) + // └── only.txt + + try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true) + try FileManager.default.createDirectory(at: tempDir.appendingPathComponent("BigFolder"), withIntermediateDirectories: true) + try FileManager.default.createDirectory(at: tempDir.appendingPathComponent("SmallFolder"), withIntermediateDirectories: true) + + let testContent = String(repeating: "content ", count: 50) + try testContent.write(to: tempDir.appendingPathComponent("BigFolder/file1.txt"), atomically: true, encoding: .utf8) + try testContent.write(to: tempDir.appendingPathComponent("BigFolder/file2.txt"), atomically: true, encoding: .utf8) + try testContent.write(to: tempDir.appendingPathComponent("SmallFolder/only.txt"), atomically: true, encoding: .utf8) + + // Create scanner with minFilesForFolder = 2 + let config = FilenameScanner.Configuration( + maxFiles: 10000, + includeHidden: false, + excludedExtensions: [], + excludedDirectories: [], + minFileSize: 100, + respectHierarchy: true, + minDepthForFolder: 1, + minFilesForFolder: 2 // Require at least 2 files + ) + let scanner = FilenameScanner(configuration: config) + let result = try await scanner.scanWithHierarchy(folder: tempDir) + + // BigFolder should be a folder unit, SmallFolder's file should be flattened to loose + #expect(result.folders.count == 1, "Expected 1 folder (BigFolder)") + #expect(result.folders[0].folderName == "BigFolder") + #expect(result.looseFiles.count == 1, "Expected 1 loose file (from SmallFolder)") + #expect(result.looseFiles[0].filename == "only.txt") + } + + @Test("Scan with hierarchy preserves nested structure in folders") + func testScanWithHierarchyPreservesNestedStructure() async throws { + let tempDir = FileManager.default.temporaryDirectory + .appendingPathComponent("SortAI_NestedTest_\(UUID().uuidString)") + + defer { + try? FileManager.default.removeItem(at: tempDir) + } + + // Create structure: + // tempDir/ + // └── Resumes/ (folder unit) + // ├── resume.pdf + // └── 2024/ + // └── latest.pdf + + try FileManager.default.createDirectory(at: tempDir, withIntermediateDirectories: true) + try FileManager.default.createDirectory(at: tempDir.appendingPathComponent("Resumes/2024"), withIntermediateDirectories: true) + + let testContent = String(repeating: "content ", count: 50) + try testContent.write(to: tempDir.appendingPathComponent("Resumes/resume.pdf"), atomically: true, encoding: .utf8) + try testContent.write(to: tempDir.appendingPathComponent("Resumes/2024/latest.pdf"), atomically: true, encoding: .utf8) + + let scanner = FilenameScanner() + let result = try await scanner.scanWithHierarchy(folder: tempDir) + + // Should have 1 folder unit with 2 files inside (preserving internal structure) + #expect(result.folders.count == 1) + #expect(result.folders[0].folderName == "Resumes") + #expect(result.folders[0].fileCount == 2, "Nested files should be included") + + // Verify both files are in the folder + let filenames = Set(result.folders[0].containedFiles.map { $0.filename }) + #expect(filenames.contains("resume.pdf")) + #expect(filenames.contains("latest.pdf")) + } +} + +// MARK: - FolderCategorizer Tests + +@Suite("FolderCategorizer Tests") +struct FolderCategorizerTests { + + @Test("FolderCategoryAssignment creation") + func testFolderCategoryAssignmentCreation() { + let assignment = FolderCategoryAssignment( + folderId: UUID(), + folderName: "Resumes", + categoryPath: ["Work", "Job Search", "Application Materials"], + confidence: 0.92, + rationale: "Folder contains resume documents", + alternativePaths: [["Documents", "Work"]] + ) + + #expect(assignment.folderName == "Resumes") + #expect(assignment.categoryPath.count == 3) + #expect(assignment.pathString == "Work / Job Search / Application Materials") + #expect(assignment.confidence == 0.92) + #expect(assignment.alternativePaths.count == 1) + } + + @Test("Quick categorize resume folder") + func testQuickCategorizeResumeFolder() { + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/Resumes"), + folderName: "Resumes", + relativePath: "Resumes", + depth: 1, + containedFiles: [ + TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/Resumes/resume.pdf"), + filename: "resume.pdf", + fileExtension: "pdf", + fileSize: 1000, + modificationDate: Date() + ) + ], + totalSize: 1000, + modifiedAt: nil + ) + + let assignment = FolderCategorizer.quickCategorize(folder: folder) + + #expect(assignment.folderName == "Resumes") + #expect(assignment.categoryPath.contains("Job Search") || assignment.categoryPath.contains("Work")) + #expect(assignment.confidence >= 0.8) + } + + @Test("Quick categorize photo folder") + func testQuickCategorizePhotoFolder() { + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/Photos"), + folderName: "Photos", + relativePath: "Photos", + depth: 1, + containedFiles: [ + TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/Photos/vacation.jpg"), + filename: "vacation.jpg", + fileExtension: "jpg", + fileSize: 5000, + modificationDate: Date() + ) + ], + totalSize: 5000, + modifiedAt: nil + ) + + let assignment = FolderCategorizer.quickCategorize(folder: folder) + + #expect(assignment.categoryPath.contains("Photos") || assignment.categoryPath.contains("Media")) + #expect(assignment.confidence >= 0.7) + } + + @Test("Quick categorize video folder") + func testQuickCategorizeVideoFolder() { + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/Videos"), + folderName: "My_Videos", + relativePath: "My_Videos", + depth: 1, + containedFiles: [ + TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/Videos/movie.mp4"), + filename: "movie.mp4", + fileExtension: "mp4", + fileSize: 50000, + modificationDate: Date() + ) + ], + totalSize: 50000, + modifiedAt: nil + ) + + let assignment = FolderCategorizer.quickCategorize(folder: folder) + + #expect(assignment.categoryPath.contains("Videos") || assignment.categoryPath.contains("Media")) + #expect(assignment.confidence >= 0.7) + } + + @Test("Quick categorize by file type when name unclear") + func testQuickCategorizeByFileType() { + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/RandomFolder"), + folderName: "RandomFolder123", + relativePath: "RandomFolder123", + depth: 1, + containedFiles: [ + TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/RandomFolder/1.jpg"), + filename: "1.jpg", + fileExtension: "jpg", + fileSize: 1000, + modificationDate: Date() + ), + TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/RandomFolder/2.jpg"), + filename: "2.jpg", + fileExtension: "jpg", + fileSize: 1000, + modificationDate: Date() + ), + TaxonomyScannedFile( + url: URL(fileURLWithPath: "/test/RandomFolder/3.png"), + filename: "3.png", + fileExtension: "png", + fileSize: 1000, + modificationDate: Date() + ) + ], + totalSize: 3000, + modifiedAt: nil + ) + + let assignment = FolderCategorizer.quickCategorize(folder: folder) + + // Should categorize as photos since all files are images + #expect(assignment.categoryPath.contains("Photos") || assignment.categoryPath.contains("Media")) + #expect(assignment.rationale.contains("image")) + } + + @Test("Quick categorize project folder") + func testQuickCategorizeProjectFolder() { + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/MyProject"), + folderName: "MyProject_2024", + relativePath: "MyProject_2024", + depth: 1, + containedFiles: [], + totalSize: 0, + modifiedAt: nil + ) + + let assignment = FolderCategorizer.quickCategorize(folder: folder) + + #expect(assignment.categoryPath.contains("Projects") || assignment.categoryPath.contains("Work")) + #expect(assignment.confidence >= 0.6) + } + + @Test("Quick categorize unknown folder") + func testQuickCategorizeUnknownFolder() { + let folder = ScannedFolder( + url: URL(fileURLWithPath: "/test/abc123xyz"), + folderName: "abc123xyz", + relativePath: "abc123xyz", + depth: 1, + containedFiles: [], + totalSize: 0, + modifiedAt: nil + ) + + let assignment = FolderCategorizer.quickCategorize(folder: folder) + + // Unknown folders should have low confidence + #expect(assignment.categoryPath.contains("Uncategorized")) + #expect(assignment.confidence < 0.5) + } + + @Test("FolderCategorizer configuration defaults") + func testFolderCategorizerConfiguration() { + let config = FolderCategorizer.Configuration.default + + #expect(config.reviewThreshold == 0.75) + #expect(config.maxFilesInContext == 50) + #expect(config.includeFileTypeSummary == true) + #expect(config.analyzeFolderName == true) + } }