From 48b4d4240ffca6858cf7f162fbe1d7b1f3ddd2df Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 14:44:34 +0800 Subject: [PATCH 01/15] Polish Windows port: platform-aware modifier labels and hidden a11y tile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The macOS-style modifier glyphs (⌘ Command, ⌥ Option) are jarring on Windows. Add a top-level _modifierLabel helper that returns Win/Ctrl/Alt/Shift on Windows and the macOS labels elsewhere, and route both the hotkey display and the recorder overlay through it. Also hide the "輔助使用權限" permission tile on Windows since SendInput does not require it (the native channel already returns true unconditionally on Windows; previously the tile was always-green-but-misleading). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../presentation/pages/settings_page.dart | 76 +++++++++++++------ 1 file changed, 54 insertions(+), 22 deletions(-) diff --git a/lib/features/settings/presentation/pages/settings_page.dart b/lib/features/settings/presentation/pages/settings_page.dart index ec37245..9ee077c 100644 --- a/lib/features/settings/presentation/pages/settings_page.dart +++ b/lib/features/settings/presentation/pages/settings_page.dart @@ -1,3 +1,5 @@ +import 'dart:io'; + import 'package:auto_route/auto_route.dart'; import 'package:flutter/material.dart'; import 'package:flutter/services.dart'; @@ -8,6 +10,35 @@ import 'package:zero_type/core/services/sound_service.dart'; import 'package:zero_type/core/theme/theme_controller.dart'; import '../controllers/settings_controller.dart'; +String _modifierLabel(HotKeyModifier mod) { + if (Platform.isWindows) { + switch (mod) { + case HotKeyModifier.meta: + return 'Win'; + case HotKeyModifier.control: + return 'Ctrl'; + case HotKeyModifier.alt: + return 'Alt'; + case HotKeyModifier.shift: + return 'Shift'; + default: + return ''; + } + } + switch (mod) { + case HotKeyModifier.meta: + return '⌘ Command'; + case HotKeyModifier.control: + return '⌃ Control'; + case HotKeyModifier.alt: + return '⌥ Option'; + case HotKeyModifier.shift: + return '⇧ Shift'; + default: + return ''; + } +} + @RoutePage() class SettingsPage extends ConsumerStatefulWidget { const SettingsPage({super.key}); @@ -283,19 +314,21 @@ class _SettingsPageState extends ConsumerState with WidgetsBinding const SizedBox(height: 12), _SettingsCard( children: [ - settings.when( - data: (data) => _PermissionTile( - icon: Icons.accessibility_new, - title: '輔助使用權限', - subtitle: '自動貼上功能需要此權限以模擬鍵盤動作', - isAuthorized: data.isAccessibilityAuthorized, - onCheck: () => const MethodChannel('com.zerotype.app/permission') - .invokeMethod('openAccessibilitySettings'), + if (!Platform.isWindows) ...[ + settings.when( + data: (data) => _PermissionTile( + icon: Icons.accessibility_new, + title: '輔助使用權限', + subtitle: '自動貼上功能需要此權限以模擬鍵盤動作', + isAuthorized: data.isAccessibilityAuthorized, + onCheck: () => const MethodChannel('com.zerotype.app/permission') + .invokeMethod('openAccessibilitySettings'), + ), + loading: () => const _LoadingTile(), + error: (_, __) => const SizedBox.shrink(), ), - loading: () => const _LoadingTile(), - error: (_, __) => const SizedBox.shrink(), - ), - const Divider(height: 1, indent: 56), + const Divider(height: 1, indent: 56), + ], settings.when( data: (data) => _PermissionTile( icon: Icons.mic, @@ -334,12 +367,7 @@ class _SettingsPageState extends ConsumerState with WidgetsBinding if (hotkey.modifiers != null) { for (final mod in hotkey.modifiers!) { - String label = ''; - if (mod == HotKeyModifier.meta) label = '⌘ Command'; - if (mod == HotKeyModifier.shift) label = '⇧ Shift'; - if (mod == HotKeyModifier.alt) label = '⌥ Option'; - if (mod == HotKeyModifier.control) label = '⌃ Control'; - + final label = _modifierLabel(mod); if (label.isNotEmpty) { if (widgets.isNotEmpty) widgets.add(const Padding(padding: EdgeInsets.symmetric(horizontal: 4), child: Text('+'))); widgets.add(_KeyBadge(label: label)); @@ -422,13 +450,17 @@ class _HotkeyRecorderOverlayState extends State<_HotkeyRecorderOverlay> { for (final key in sortedKeys) { if (_isMeta(key)) { - if (!parts.contains('⌘ Command')) parts.add('⌘ Command'); + final label = _modifierLabel(HotKeyModifier.meta); + if (!parts.contains(label)) parts.add(label); } else if (_isControl(key)) { - if (!parts.contains('⌃ Control')) parts.add('⌃ Control'); + final label = _modifierLabel(HotKeyModifier.control); + if (!parts.contains(label)) parts.add(label); } else if (_isAlt(key)) { - if (!parts.contains('⌥ Option')) parts.add('⌥ Option'); + final label = _modifierLabel(HotKeyModifier.alt); + if (!parts.contains(label)) parts.add(label); } else if (_isShift(key)) { - if (!parts.contains('⇧ Shift')) parts.add('⇧ Shift'); + final label = _modifierLabel(HotKeyModifier.shift); + if (!parts.contains(label)) parts.add(label); } else if (key == PhysicalKeyboardKey.space) { parts.add('Space'); } else { From 7215d8e085a0468b29624fcedcd94ceab25c4616 Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 15:47:10 +0800 Subject: [PATCH 02/15] Add LiteLLM support with dynamic model discovery MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ZeroType already exposed a free-text "custom endpoint" for the OpenAI provider, which technically pointed at any OpenAI-compatible proxy. But the endpoint was hidden in an "advanced" expansion and the model list was static, so using a LiteLLM proxy meant the user had to hand-type whatever model alias their proxy exposed. This commit adds a first-class `litellm` provider: - assets/config/providers.json: new `litellm` entry with empty static model list (filled dynamically at runtime). - SpeechRecognitionService: - new `case 'litellm'` that requires the user-supplied base URL and routes the request through the existing OpenAI multipart handler, appending /v1/audio/transcriptions to the base. - new `fetchAvailableModels(baseUrl, apiKey)` that GETs /v1/models on the proxy and returns id+name records for the picker. - ModelConfigRepository: `getCachedModels` / `saveCachedModels` keyed by providerId, JSON-encoded into SharedPreferences. - DynamicModelsController (riverpod family by providerId, keepAlive): loads cached list on build; `refresh(baseUrl, apiKey)` hits /v1/models and persists. - model_config_page UI: when the selected provider is `litellm`, render a required Proxy-Base-URL input inline (with a hint that /v1 is added automatically), and replace the static model dropdown with `_LiteLLMModelPicker` — a dropdown sourced from the dynamic controller plus a refresh icon button. Errors and "not yet fetched" states surface inline. Other providers retain the existing static dropdown and "advanced" custom endpoint UX. Co-Authored-By: Claude Opus 4.7 (1M context) --- assets/config/providers.json | 7 +- .../services/speech_recognition_service.dart | 47 +++ .../model_config_repository_impl.dart | 25 ++ .../repositories/model_config_repository.dart | 5 + .../controllers/model_config_controller.dart | 35 +++ .../model_config_controller.g.dart | 113 +++++++ .../presentation/pages/model_config_page.dart | 277 +++++++++++++++++- 7 files changed, 493 insertions(+), 16 deletions(-) diff --git a/assets/config/providers.json b/assets/config/providers.json index 17e1c6f..cde40cd 100644 --- a/assets/config/providers.json +++ b/assets/config/providers.json @@ -23,6 +23,11 @@ "name": "Gemini 2.5 Flash" } ] + }, + { + "id": "litellm", + "name": "LiteLLM", + "models": [] } ] -} \ No newline at end of file +} diff --git a/lib/core/services/speech_recognition_service.dart b/lib/core/services/speech_recognition_service.dart index a17aba8..0ee765b 100644 --- a/lib/core/services/speech_recognition_service.dart +++ b/lib/core/services/speech_recognition_service.dart @@ -41,11 +41,58 @@ class SpeechRecognitionService { prompt: prompt, customEndpoint: customEndpoint, ); + case 'litellm': + if (customEndpoint == null || customEndpoint.isEmpty) { + throw Exception('LiteLLM 需要在「進階設定」中填寫 Proxy Base URL'); + } + return _transcribeWithOpenAI( + audioFilePath: audioFilePath, + apiKey: apiKey, + model: model, + prompt: prompt, + customEndpoint: + '${_stripTrailingSlash(customEndpoint)}/v1/audio/transcriptions', + ); default: throw Exception('不支援的語音辨識服務商:$provider'); } } + /// Fetches the model list from an OpenAI-compatible `/v1/models` endpoint + /// (e.g. a LiteLLM proxy). Returns id+name records; callers map to UI + /// entities. The `id` is what gets sent to the transcription endpoint. + Future> fetchAvailableModels({ + required String baseUrl, + required String apiKey, + }) async { + final url = '${_stripTrailingSlash(baseUrl)}/v1/models'; + final response = await _dio.get( + url, + options: Options( + headers: {'Authorization': 'Bearer $apiKey'}, + ), + ); + + Map? body; + if (response.data is Map) { + body = response.data as Map; + } else if (response.data is String) { + body = jsonDecode(response.data as String) as Map; + } + final list = body?['data'] as List? ?? const []; + return list + .whereType>() + .map((m) { + final id = m['id'] as String? ?? ''; + return (id: id, name: id); + }) + .where((m) => m.id.isNotEmpty) + .toList(); + } + + static String _stripTrailingSlash(String s) => + s.replaceAll(RegExp(r'/+$'), ''); + Future _transcribeWithOpenAI({ required String audioFilePath, required String apiKey, diff --git a/lib/features/model_config/data/repositories/model_config_repository_impl.dart b/lib/features/model_config/data/repositories/model_config_repository_impl.dart index e31dfc3..4cfbbcd 100644 --- a/lib/features/model_config/data/repositories/model_config_repository_impl.dart +++ b/lib/features/model_config/data/repositories/model_config_repository_impl.dart @@ -70,4 +70,29 @@ class ModelConfigRepositoryImpl implements ModelConfigRepository { @override Future saveCustomEndpoint(String providerId, String endpoint) async => _prefs.setString('custom_endpoint_$providerId', endpoint); + + @override + Future> getCachedModels(String providerId) async { + final raw = _prefs.getString('cached_models_$providerId'); + if (raw == null || raw.isEmpty) return const []; + try { + final list = jsonDecode(raw) as List; + return list + .map((m) => AiModel( + id: (m as Map)['id'] as String, + name: m['name'] as String, + )) + .toList(); + } catch (_) { + return const []; + } + } + + @override + Future saveCachedModels(String providerId, List models) async { + final raw = jsonEncode( + models.map((m) => {'id': m.id, 'name': m.name}).toList(), + ); + await _prefs.setString('cached_models_$providerId', raw); + } } diff --git a/lib/features/model_config/domain/repositories/model_config_repository.dart b/lib/features/model_config/domain/repositories/model_config_repository.dart index c2ee5e7..472047a 100644 --- a/lib/features/model_config/domain/repositories/model_config_repository.dart +++ b/lib/features/model_config/domain/repositories/model_config_repository.dart @@ -16,4 +16,9 @@ abstract class ModelConfigRepository { Future getCustomEndpoint(String providerId); Future saveCustomEndpoint(String providerId, String endpoint); + + /// Cached model list fetched from a dynamic provider's /v1/models endpoint. + /// Empty list when nothing has been fetched yet. + Future> getCachedModels(String providerId); + Future saveCachedModels(String providerId, List models); } diff --git a/lib/features/model_config/presentation/controllers/model_config_controller.dart b/lib/features/model_config/presentation/controllers/model_config_controller.dart index 08ca71d..c95804c 100644 --- a/lib/features/model_config/presentation/controllers/model_config_controller.dart +++ b/lib/features/model_config/presentation/controllers/model_config_controller.dart @@ -1,5 +1,6 @@ import 'package:riverpod_annotation/riverpod_annotation.dart'; import 'package:zero_type/core/di/injection.dart'; +import 'package:zero_type/core/services/speech_recognition_service.dart'; import 'package:zero_type/features/model_config/data/repositories/model_config_repository_impl.dart'; import 'package:zero_type/features/model_config/domain/entities/ai_provider.dart'; import 'package:zero_type/features/model_config/domain/repositories/model_config_repository.dart'; @@ -73,3 +74,37 @@ class SpeechProviderController extends _$SpeechProviderController { } } +/// Holds the dynamically-fetched model list for an OpenAI-compatible provider +/// (currently used by `litellm`). Returns the cached list immediately on +/// build; call `refresh()` to hit `/v1/models` and update the cache. +@Riverpod(keepAlive: true) +class DynamicModelsController extends _$DynamicModelsController { + ModelConfigRepository get _repo => _buildRepository(); + SpeechRecognitionService get _service => getIt(); + + @override + Future> build(String providerId) async { + return _repo.getCachedModels(providerId); + } + + Future refresh({ + required String providerId, + required String baseUrl, + required String apiKey, + }) async { + state = const AsyncLoading(); + try { + final fetched = await _service.fetchAvailableModels( + baseUrl: baseUrl, + apiKey: apiKey, + ); + final models = + fetched.map((m) => AiModel(id: m.id, name: m.name)).toList(); + await _repo.saveCachedModels(providerId, models); + state = AsyncData(models); + } catch (e, st) { + state = AsyncError(e, st); + } + } +} + diff --git a/lib/features/model_config/presentation/controllers/model_config_controller.g.dart b/lib/features/model_config/presentation/controllers/model_config_controller.g.dart index cb05de7..8a75127 100644 --- a/lib/features/model_config/presentation/controllers/model_config_controller.g.dart +++ b/lib/features/model_config/presentation/controllers/model_config_controller.g.dart @@ -157,3 +157,116 @@ abstract class _$SpeechProviderController element.handleCreate(ref, build); } } + +/// Holds the dynamically-fetched model list for an OpenAI-compatible provider +/// (currently used by `litellm`). Returns the cached list immediately on +/// build; call `refresh()` to hit `/v1/models` and update the cache. + +@ProviderFor(DynamicModelsController) +final dynamicModelsControllerProvider = DynamicModelsControllerFamily._(); + +/// Holds the dynamically-fetched model list for an OpenAI-compatible provider +/// (currently used by `litellm`). Returns the cached list immediately on +/// build; call `refresh()` to hit `/v1/models` and update the cache. +final class DynamicModelsControllerProvider + extends $AsyncNotifierProvider> { + /// Holds the dynamically-fetched model list for an OpenAI-compatible provider + /// (currently used by `litellm`). Returns the cached list immediately on + /// build; call `refresh()` to hit `/v1/models` and update the cache. + DynamicModelsControllerProvider._({ + required DynamicModelsControllerFamily super.from, + required String super.argument, + }) : super( + retry: null, + name: r'dynamicModelsControllerProvider', + isAutoDispose: false, + dependencies: null, + $allTransitiveDependencies: null, + ); + + @override + String debugGetCreateSourceHash() => _$dynamicModelsControllerHash(); + + @override + String toString() { + return r'dynamicModelsControllerProvider' + '' + '($argument)'; + } + + @$internal + @override + DynamicModelsController create() => DynamicModelsController(); + + @override + bool operator ==(Object other) { + return other is DynamicModelsControllerProvider && + other.argument == argument; + } + + @override + int get hashCode { + return argument.hashCode; + } +} + +String _$dynamicModelsControllerHash() => + r'8070a60946d83a5b1a6876a585a70c32ef4b90a9'; + +/// Holds the dynamically-fetched model list for an OpenAI-compatible provider +/// (currently used by `litellm`). Returns the cached list immediately on +/// build; call `refresh()` to hit `/v1/models` and update the cache. + +final class DynamicModelsControllerFamily extends $Family + with + $ClassFamilyOverride< + DynamicModelsController, + AsyncValue>, + List, + FutureOr>, + String + > { + DynamicModelsControllerFamily._() + : super( + retry: null, + name: r'dynamicModelsControllerProvider', + dependencies: null, + $allTransitiveDependencies: null, + isAutoDispose: false, + ); + + /// Holds the dynamically-fetched model list for an OpenAI-compatible provider + /// (currently used by `litellm`). Returns the cached list immediately on + /// build; call `refresh()` to hit `/v1/models` and update the cache. + + DynamicModelsControllerProvider call(String providerId) => + DynamicModelsControllerProvider._(argument: providerId, from: this); + + @override + String toString() => r'dynamicModelsControllerProvider'; +} + +/// Holds the dynamically-fetched model list for an OpenAI-compatible provider +/// (currently used by `litellm`). Returns the cached list immediately on +/// build; call `refresh()` to hit `/v1/models` and update the cache. + +abstract class _$DynamicModelsController extends $AsyncNotifier> { + late final _$args = ref.$arg as String; + String get providerId => _$args; + + FutureOr> build(String providerId); + @$mustCallSuper + @override + void runBuild() { + final ref = this.ref as $Ref>, List>; + final element = + ref.element + as $ClassProviderElement< + AnyNotifier>, List>, + AsyncValue>, + Object?, + Object? + >; + element.handleCreate(ref, () => build(_$args)); + } +} diff --git a/lib/features/model_config/presentation/pages/model_config_page.dart b/lib/features/model_config/presentation/pages/model_config_page.dart index 2693b5f..2c4f3be 100644 --- a/lib/features/model_config/presentation/pages/model_config_page.dart +++ b/lib/features/model_config/presentation/pages/model_config_page.dart @@ -170,6 +170,15 @@ class _SpeechConfigSection extends ConsumerWidget { initialValue: state.apiKey ?? '', onSave: (val) => ref.read(speechProviderControllerProvider.notifier).saveApiKey(val), ), + if (state.providerId == 'litellm') ...[ + const SizedBox(height: 24), + _LiteLLMEndpointInput( + initialValue: state.customEndpoint ?? '', + onSave: (val) => ref + .read(speechProviderControllerProvider.notifier) + .saveCustomEndpoint(val), + ), + ], const SizedBox(height: 24), Row( children: [ @@ -181,21 +190,37 @@ class _SpeechConfigSection extends ConsumerWidget { ], ), const SizedBox(height: 12), - _ModelDropdown( - models: selectedProvider.models, - selectedModelId: state.modelId, - onChanged: (val) { - if (val != null) { - ref.read(speechProviderControllerProvider.notifier).selectModel(val); - } - }, - ), - const SizedBox(height: 24), - _AdvancedConfigSection( - providerId: state.providerId ?? '', - customEndpoint: state.customEndpoint ?? '', - onSaveCustomEndpoint: (val) => ref.read(speechProviderControllerProvider.notifier).saveCustomEndpoint(val), - ), + if (state.providerId == 'litellm') + _LiteLLMModelPicker( + baseUrl: state.customEndpoint ?? '', + apiKey: state.apiKey ?? '', + selectedModelId: state.modelId, + onChanged: (val) { + if (val != null) { + ref + .read(speechProviderControllerProvider.notifier) + .selectModel(val); + } + }, + ) + else + _ModelDropdown( + models: selectedProvider.models, + selectedModelId: state.modelId, + onChanged: (val) { + if (val != null) { + ref.read(speechProviderControllerProvider.notifier).selectModel(val); + } + }, + ), + if (state.providerId != 'litellm') ...[ + const SizedBox(height: 24), + _AdvancedConfigSection( + providerId: state.providerId ?? '', + customEndpoint: state.customEndpoint ?? '', + onSaveCustomEndpoint: (val) => ref.read(speechProviderControllerProvider.notifier).saveCustomEndpoint(val), + ), + ], ], ); }, @@ -450,6 +475,228 @@ class _CustomEndpointInputState extends State<_CustomEndpointInput> { } } +class _LiteLLMEndpointInput extends StatefulWidget { + const _LiteLLMEndpointInput({ + required this.initialValue, + required this.onSave, + }); + + final String initialValue; + final Function(String) onSave; + + @override + State<_LiteLLMEndpointInput> createState() => _LiteLLMEndpointInputState(); +} + +class _LiteLLMEndpointInputState extends State<_LiteLLMEndpointInput> { + late final TextEditingController _controller; + + @override + void initState() { + super.initState(); + _controller = TextEditingController(text: widget.initialValue); + } + + @override + void didUpdateWidget(_LiteLLMEndpointInput oldWidget) { + super.didUpdateWidget(oldWidget); + if (widget.initialValue != oldWidget.initialValue) { + _controller.text = widget.initialValue; + } + } + + @override + void dispose() { + _controller.dispose(); + super.dispose(); + } + + @override + Widget build(BuildContext context) { + final cs = Theme.of(context).colorScheme; + + return Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Row( + children: [ + const Text('Proxy Base URL', style: TextStyle(fontWeight: FontWeight.w600)), + const SizedBox(width: 4), + const Text('*', style: TextStyle(color: Colors.redAccent, fontSize: 18, fontWeight: FontWeight.bold)), + const SizedBox(width: 8), + Text('(必填)', style: TextStyle(color: Colors.redAccent.withAlpha(150), fontSize: 12)), + ], + ), + const SizedBox(height: 6), + Text( + '例如:https://litellm.example.com(不要含 /v1)', + style: TextStyle(color: cs.onSurface.withAlpha(120), fontSize: 12), + ), + const SizedBox(height: 12), + Row( + children: [ + Expanded( + child: TextField( + controller: _controller, + decoration: InputDecoration( + hintText: 'https://litellm.example.com', + hintStyle: TextStyle(color: cs.onSurface.withAlpha(80)), + filled: true, + fillColor: cs.surface, + contentPadding: const EdgeInsets.symmetric(horizontal: 16, vertical: 12), + border: OutlineInputBorder( + borderRadius: BorderRadius.circular(12), + borderSide: BorderSide(color: cs.onSurface.withAlpha(30)), + ), + enabledBorder: OutlineInputBorder( + borderRadius: BorderRadius.circular(12), + borderSide: BorderSide(color: cs.onSurface.withAlpha(30)), + ), + ), + style: const TextStyle(fontSize: 14), + ), + ), + const SizedBox(width: 12), + ElevatedButton( + onPressed: () { + widget.onSave(_controller.text.trim()); + ScaffoldMessenger.of(context).showSnackBar( + const SnackBar(content: Text('Proxy URL 已儲存'), duration: Duration(seconds: 1)), + ); + }, + style: ElevatedButton.styleFrom( + backgroundColor: cs.primary, + foregroundColor: cs.onPrimary, + shape: RoundedRectangleBorder( + borderRadius: BorderRadius.circular(12), + ), + padding: const EdgeInsets.symmetric(horizontal: 16, vertical: 16), + ), + child: const Text('儲存'), + ), + ], + ), + ], + ); + } +} + +class _LiteLLMModelPicker extends ConsumerWidget { + const _LiteLLMModelPicker({ + required this.baseUrl, + required this.apiKey, + required this.selectedModelId, + required this.onChanged, + }); + + final String baseUrl; + final String apiKey; + final String? selectedModelId; + final ValueChanged onChanged; + + bool get _canFetch => baseUrl.isNotEmpty && apiKey.isNotEmpty; + + @override + Widget build(BuildContext context, WidgetRef ref) { + final cs = Theme.of(context).colorScheme; + final modelsAsync = + ref.watch(dynamicModelsControllerProvider('litellm')); + + return Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Row( + children: [ + Expanded(child: _buildDropdown(context, modelsAsync.value ?? const [])), + const SizedBox(width: 8), + IconButton( + tooltip: _canFetch + ? '從 /v1/models 重新抓取模型清單' + : '需先填入 Proxy URL 與 API Key', + icon: modelsAsync.isLoading + ? const SizedBox( + width: 18, + height: 18, + child: CircularProgressIndicator(strokeWidth: 2), + ) + : const Icon(Icons.refresh), + onPressed: _canFetch && !modelsAsync.isLoading + ? () async { + await ref + .read(dynamicModelsControllerProvider('litellm').notifier) + .refresh( + providerId: 'litellm', + baseUrl: baseUrl, + apiKey: apiKey, + ); + if (context.mounted) { + final newState = + ref.read(dynamicModelsControllerProvider('litellm')); + final msg = newState.hasError + ? '抓取失敗:${newState.error}' + : '已更新 ${newState.value?.length ?? 0} 個模型'; + ScaffoldMessenger.of(context).showSnackBar( + SnackBar( + content: Text(msg), + duration: const Duration(seconds: 2), + ), + ); + } + } + : null, + ), + ], + ), + if (modelsAsync.hasError) ...[ + const SizedBox(height: 8), + Text( + '抓取錯誤:${modelsAsync.error}', + style: TextStyle(color: Colors.redAccent.withAlpha(200), fontSize: 12), + ), + ] else if ((modelsAsync.value ?? const []).isEmpty) ...[ + const SizedBox(height: 8), + Text( + _canFetch + ? '尚未抓取模型清單,按右側重新整理。' + : '請先填寫 Proxy Base URL 與 API Key,再按右側重新整理。', + style: TextStyle(color: cs.onSurface.withAlpha(120), fontSize: 12), + ), + ], + ], + ); + } + + Widget _buildDropdown(BuildContext context, List models) { + final cs = Theme.of(context).colorScheme; + return Container( + padding: const EdgeInsets.symmetric(horizontal: 16), + decoration: BoxDecoration( + color: cs.surface, + borderRadius: BorderRadius.circular(12), + border: Border.all(color: cs.onSurface.withAlpha(30)), + ), + child: DropdownButtonHideUnderline( + child: DropdownButton( + value: models.any((m) => m.id == selectedModelId) + ? selectedModelId + : null, + isExpanded: true, + hint: Text( + models.isEmpty ? '尚未抓取模型清單' : '選擇一個模型', + ), + items: models + .map((m) => DropdownMenuItem( + value: m.id, + child: Text(m.id), + )) + .toList(), + onChanged: models.isEmpty ? null : onChanged, + ), + ), + ); + } +} + class _AdvancedConfigSection extends StatelessWidget { const _AdvancedConfigSection({ required this.providerId, From acf409c0eab7dbae6277f4d5ed0df118d17ce954 Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 15:55:47 +0800 Subject: [PATCH 03/15] Make the title bar draggable and add window controls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Flutter window is created with `titleBarStyle: TitleBarStyle.hidden`, which suppresses the native Windows title bar. Without a substitute drag region this leaves the window completely immovable — fatal on multi-monitor setups where the user can't reach controls that opened off-screen. Replace the static "Zero Type" centred Container with a `_TitleBar`: - The centre region is wrapped in `DragToMoveArea` from window_manager so the user can grab the title to move/throw the window between displays. - Three trailing 46x44 buttons (minimize / maximize-or-restore / close) drive `windowManager` directly. The close button paints the standard Windows red on hover with a white icon; the others use a subtle surface-tinted hover. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/shared/widgets/main_shell.dart | 119 +++++++++++++++++++++++++---- 1 file changed, 105 insertions(+), 14 deletions(-) diff --git a/lib/shared/widgets/main_shell.dart b/lib/shared/widgets/main_shell.dart index ae9892a..a0a51ff 100644 --- a/lib/shared/widgets/main_shell.dart +++ b/lib/shared/widgets/main_shell.dart @@ -2,6 +2,7 @@ import 'package:auto_route/auto_route.dart'; import 'package:flutter/material.dart'; import 'package:flutter_riverpod/flutter_riverpod.dart'; import 'package:shared_preferences/shared_preferences.dart'; +import 'package:window_manager/window_manager.dart'; import 'package:zero_type/core/di/injection.dart'; import 'package:zero_type/core/router/app_router.dart'; import 'package:zero_type/features/history/presentation/controllers/history_controller.dart'; @@ -120,20 +121,7 @@ class _MainShellPageState extends ConsumerState { return Scaffold( body: Column( children: [ - Container( - height: 44, - padding: const EdgeInsets.symmetric(horizontal: 16), - color: Theme.of(context).colorScheme.surface, - child: Center( - child: Text( - 'Zero Type', - style: Theme.of(context).textTheme.titleSmall?.copyWith( - fontWeight: FontWeight.bold, - letterSpacing: 0.5, - ), - ), - ), - ), + const _TitleBar(), const Divider(height: 1, thickness: 1), Expanded( child: Row( @@ -194,6 +182,109 @@ class _MainShellPageState extends ConsumerState { } } +class _TitleBar extends StatelessWidget { + const _TitleBar(); + + @override + Widget build(BuildContext context) { + final cs = Theme.of(context).colorScheme; + return Container( + height: 44, + color: cs.surface, + child: Row( + children: [ + // Drag region — entire centre area is grabbable for moving the window. + // Buttons sit at the right end and intercept their own pointer events. + Expanded( + child: DragToMoveArea( + child: Container( + alignment: Alignment.center, + padding: const EdgeInsets.symmetric(horizontal: 16), + child: Text( + 'Zero Type', + style: Theme.of(context).textTheme.titleSmall?.copyWith( + fontWeight: FontWeight.bold, + letterSpacing: 0.5, + ), + ), + ), + ), + ), + _WindowControlButton( + icon: Icons.remove, + tooltip: '最小化', + onTap: () => windowManager.minimize(), + ), + _WindowControlButton( + icon: Icons.crop_square, + tooltip: '最大化/還原', + onTap: () async { + if (await windowManager.isMaximized()) { + await windowManager.unmaximize(); + } else { + await windowManager.maximize(); + } + }, + ), + _WindowControlButton( + icon: Icons.close, + tooltip: '關閉', + isClose: true, + onTap: () => windowManager.close(), + ), + ], + ), + ); + } +} + +class _WindowControlButton extends StatefulWidget { + const _WindowControlButton({ + required this.icon, + required this.tooltip, + required this.onTap, + this.isClose = false, + }); + + final IconData icon; + final String tooltip; + final VoidCallback onTap; + final bool isClose; + + @override + State<_WindowControlButton> createState() => _WindowControlButtonState(); +} + +class _WindowControlButtonState extends State<_WindowControlButton> { + bool _hovering = false; + + @override + Widget build(BuildContext context) { + final cs = Theme.of(context).colorScheme; + final hoverColor = widget.isClose + ? const Color(0xFFE81123) + : cs.onSurface.withValues(alpha: 0.08); + final iconColor = widget.isClose && _hovering ? Colors.white : cs.onSurface; + return Tooltip( + message: widget.tooltip, + child: MouseRegion( + onEnter: (_) => setState(() => _hovering = true), + onExit: (_) => setState(() => _hovering = false), + child: GestureDetector( + onTap: widget.onTap, + child: Container( + width: 46, + height: 44, + color: _hovering ? hoverColor : Colors.transparent, + alignment: Alignment.center, + child: Icon(widget.icon, size: 16, color: iconColor), + ), + ), + ), + ); + } +} + class _PermissionItem extends StatelessWidget { const _PermissionItem({ required this.icon, From 2665ffacb2e6b5ff5dff2dfb66233c64987f2ce5 Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 16:01:02 +0800 Subject: [PATCH 04/15] Surface real errors in overlay and add file logger MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user reported pressing Alt+Space showed only the literal string "錯誤" in the floating pill, with no further information — making it impossible to tell whether the proxy URL was wrong, the API key was bad, the model returned 4xx, or something timed out. Three changes: 1. recording_overlay: when status is error, render the actual `state.errorMessage` (`錯誤:`) instead of the fixed "錯誤" string. The label is wrapped in a 480-px max Flexible/ConstrainedBox so long server bodies wrap and ellipsis at 3 lines instead of overflowing the pill. 2. SpeechRecognitionService: wrap the OpenAI/LiteLLM POST and the LiteLLM `/v1/models` GET in DioException-aware try/catch via a new `_wrapDioError` helper that: * logs full details (type, status, message, truncated body) via `AppLogger` * rethrows an Exception whose message includes the HTTP status, the Dio error type, and a 400-char-truncated response body so the user sees something actionable rather than a stack trace The pre-existing Gemini handler is migrated to the same helper. 3. Add `AppLogger` (`lib/core/services/app_logger.dart`): an append-only logger that writes to `%TEMP%\zero_type.log` (rotated at 1 MiB) and mirrors to stdout. The controller's hotkey/start/stop/error paths use it instead of `print`, and a new `_failStartup(msg)` helper unifies the early-validation paths so they actually mutate state to error+message (not just call the macOS-only native overlay channel) — previously on Windows these branches showed nothing at all. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../controllers/zero_type_controller.dart | 54 ++++---- lib/core/services/app_logger.dart | 55 +++++++++ .../services/speech_recognition_service.dart | 116 +++++++++++++----- lib/shared/widgets/recording_overlay.dart | 45 ++++--- 4 files changed, 196 insertions(+), 74 deletions(-) create mode 100644 lib/core/services/app_logger.dart diff --git a/lib/core/controllers/zero_type_controller.dart b/lib/core/controllers/zero_type_controller.dart index 4819b97..63ee352 100644 --- a/lib/core/controllers/zero_type_controller.dart +++ b/lib/core/controllers/zero_type_controller.dart @@ -7,6 +7,7 @@ import 'package:riverpod_annotation/riverpod_annotation.dart'; import 'package:zero_type/core/constants/model_pricing.dart'; import 'package:zero_type/core/constants/app_constants.dart'; import 'package:zero_type/core/di/injection.dart'; +import 'package:zero_type/core/services/app_logger.dart'; import 'package:zero_type/core/services/recording_service.dart'; import 'package:shared_preferences/shared_preferences.dart'; import 'package:zero_type/core/services/sound_service.dart'; @@ -42,7 +43,8 @@ class ZeroTypeController extends _$ZeroTypeController { } Future toggleRecording() async { - print('[ZeroTypeController] Hotkey triggered! Current status: ${state.status}'); + AppLogger.log('ZeroType', + 'hotkey activated; current status=${state.status.name}'); if (state.status == ZeroTypeStatus.recording) { await _stopAndProcess(); } else if (state.status == ZeroTypeStatus.idle) { @@ -76,13 +78,7 @@ class ZeroTypeController extends _$ZeroTypeController { if (config.providerId == null || config.providerId!.isEmpty || config.apiKey == null || config.apiKey!.isEmpty || config.modelId == null || config.modelId!.isEmpty) { - await _showNativeOverlay('error', '請先完成語音辨識模型設定'); - await getIt().playCancelSound(); - await Future.delayed(const Duration(seconds: 3)); - if (ref.mounted && !_cancelled) { - state = const ZeroTypeState(); - await _hideNativeOverlay(); - } + await _failStartup('請先完成語音辨識模型設定(provider/model/apiKey 至少一項缺失)'); return; } @@ -104,23 +100,11 @@ class ZeroTypeController extends _$ZeroTypeController { if (!ref.mounted || _cancelled) return; if (!isAccessibilityOk) { - await _showNativeOverlay('error', '請先授權輔助使用權限'); - await getIt().playCancelSound(); - await Future.delayed(const Duration(seconds: 3)); - if (ref.mounted && !_cancelled) { - state = const ZeroTypeState(); - await _hideNativeOverlay(); - } + await _failStartup('請先授權輔助使用權限'); return; } if (!hasPermission) { - await _showNativeOverlay('error', '請先授權麥克風權限'); - await getIt().playCancelSound(); - await Future.delayed(const Duration(seconds: 3)); - if (ref.mounted && !_cancelled) { - state = const ZeroTypeState(); - await _hideNativeOverlay(); - } + await _failStartup('請先授權麥克風權限'); return; } @@ -155,13 +139,14 @@ class ZeroTypeController extends _$ZeroTypeController { }, ), ]); - } catch (e) { + } catch (e, st) { + AppLogger.log('ZeroType', 'startRecording threw', error: e, st: st); if (!ref.mounted || _cancelled) return; state = state.copyWith( status: ZeroTypeStatus.error, errorMessage: '錄音啟動失敗:$e', ); - await _showNativeOverlay('error', '錄音啟動失敗'); + await _showNativeOverlay('error', '錄音啟動失敗:$e'); await Future.delayed(const Duration(seconds: 3)); if (ref.mounted && !_cancelled) { state = const ZeroTypeState(); @@ -170,6 +155,24 @@ class ZeroTypeController extends _$ZeroTypeController { } } + /// Centralised early-exit error path. Sets state to error with [msg] (so the + /// Flutter overlay on Windows shows the actual reason), fires the macOS + /// native overlay equivalent, plays cancel sound, and resets after 3s. + Future _failStartup(String msg) async { + AppLogger.log('ZeroType', 'startRecording aborted: $msg'); + state = state.copyWith( + status: ZeroTypeStatus.error, + errorMessage: msg, + ); + await _showNativeOverlay('error', msg); + await getIt().playCancelSound(); + await Future.delayed(const Duration(seconds: 3)); + if (ref.mounted && !_cancelled) { + state = const ZeroTypeState(); + await _hideNativeOverlay(); + } + } + Future _transcribe(String filePath) async { final config = await ref.read(speechProviderControllerProvider.future); final prompt = await ref.read(speechPromptControllerProvider.future); @@ -274,7 +277,8 @@ class ZeroTypeController extends _$ZeroTypeController { await _hideNativeOverlay(); } } catch (e, st) { - print('[ZeroType] ERROR in _stopAndProcess: $e\n$st'); + AppLogger.log('ZeroType', '_stopAndProcess threw', + error: e, st: st); if (!ref.mounted || _cancelled) return; state = state.copyWith( status: ZeroTypeStatus.error, diff --git a/lib/core/services/app_logger.dart b/lib/core/services/app_logger.dart new file mode 100644 index 0000000..512b2b2 --- /dev/null +++ b/lib/core/services/app_logger.dart @@ -0,0 +1,55 @@ +import 'dart:io'; + +/// Lightweight append-only logger. +/// +/// Writes to `%TEMP%\zero_type.log` on Windows / `/tmp/zero_type.log` on +/// macOS-Linux. Lines are timestamped. Mirrors to stdout via `print` so they +/// also show up when launched from a terminal (`flutter run -d windows`). +/// +/// Rotates by size: when the file exceeds [_maxBytes], it is renamed to +/// `.1` (overwriting any existing rotation) and a fresh file is started. +class AppLogger { + AppLogger._(); + + static const int _maxBytes = 1 * 1024 * 1024; // 1 MiB + + static File? _file; + + static File _resolveFile() { + final dir = Directory.systemTemp.path; + return File('$dir${Platform.pathSeparator}zero_type.log'); + } + + static void _ensureRotated(File f) { + try { + if (f.existsSync() && f.lengthSync() > _maxBytes) { + final rotated = File('${f.path}.1'); + if (rotated.existsSync()) rotated.deleteSync(); + f.renameSync(rotated.path); + } + } catch (_) { + // Rotation failures should not break the app. + } + } + + static void log(String tag, String message, {Object? error, StackTrace? st}) { + final ts = DateTime.now().toIso8601String(); + final base = '[$ts] [$tag] $message'; + final full = + error == null ? base : '$base\n error: $error${st == null ? '' : '\n$st'}'; + + // Always echo to stdout so a `flutter run -d windows` terminal sees it. + // ignore: avoid_print + print(full); + + try { + final f = _file ??= _resolveFile(); + _ensureRotated(f); + f.writeAsStringSync('$full\n', mode: FileMode.append, flush: false); + } catch (_) { + // If we can't write to disk we still have the print above. + } + } + + static String get logPath => _resolveFile().path; +} diff --git a/lib/core/services/speech_recognition_service.dart b/lib/core/services/speech_recognition_service.dart index 0ee765b..9a0986f 100644 --- a/lib/core/services/speech_recognition_service.dart +++ b/lib/core/services/speech_recognition_service.dart @@ -2,6 +2,7 @@ import 'dart:convert'; import 'dart:io'; import 'package:dio/dio.dart'; +import 'package:zero_type/core/services/app_logger.dart'; typedef TranscriptionResult = ({ String text, @@ -22,7 +23,8 @@ class SpeechRecognitionService { required String prompt, String? customEndpoint, }) async { - print('[SpeechRecognition] Transcribing with $provider ($model)...'); + AppLogger.log('SpeechRecognition', + 'transcribe start: provider=$provider model=$model file=$audioFilePath endpoint=${customEndpoint ?? '(default)'}'); switch (provider) { case 'openai': @@ -66,33 +68,75 @@ class SpeechRecognitionService { required String apiKey, }) async { final url = '${_stripTrailingSlash(baseUrl)}/v1/models'; - final response = await _dio.get( - url, - options: Options( - headers: {'Authorization': 'Bearer $apiKey'}, - ), - ); + AppLogger.log('LiteLLM', 'GET $url'); + try { + final response = await _dio.get( + url, + options: Options( + headers: {'Authorization': 'Bearer $apiKey'}, + ), + ); - Map? body; - if (response.data is Map) { - body = response.data as Map; - } else if (response.data is String) { - body = jsonDecode(response.data as String) as Map; + Map? body; + if (response.data is Map) { + body = response.data as Map; + } else if (response.data is String) { + body = jsonDecode(response.data as String) as Map; + } + final list = body?['data'] as List? ?? const []; + final models = list + .whereType>() + .map((m) { + final id = m['id'] as String? ?? ''; + return (id: id, name: id); + }) + .where((m) => m.id.isNotEmpty) + .toList(); + AppLogger.log('LiteLLM', + 'fetched ${models.length} models from /v1/models'); + return models; + } on DioException catch (e) { + throw _wrapDioError('fetchAvailableModels GET $url', e); } - final list = body?['data'] as List? ?? const []; - return list - .whereType>() - .map((m) { - final id = m['id'] as String? ?? ''; - return (id: id, name: id); - }) - .where((m) => m.id.isNotEmpty) - .toList(); } static String _stripTrailingSlash(String s) => s.replaceAll(RegExp(r'/+$'), ''); + /// Convert a DioException into a readable Exception with status, message, + /// and a truncated response body. Also logs the full details so the file + /// log retains everything useful for diagnosis. + Exception _wrapDioError(String context, DioException e) { + final status = e.response?.statusCode; + final raw = e.response?.data; + final bodyStr = raw == null ? '' : raw.toString(); + final shortBody = bodyStr.length > 400 + ? '${bodyStr.substring(0, 400)}…(truncated)' + : bodyStr; + + AppLogger.log( + 'HTTP', + '$context failed: type=${e.type} status=${status ?? '-'} ' + 'msg=${e.message ?? '-'}\n body: $shortBody', + error: e, + ); + + final summary = StringBuffer(); + if (status != null) summary.write('HTTP $status'); + if (e.type != DioExceptionType.unknown) { + if (summary.isNotEmpty) summary.write(' '); + summary.write('(${e.type.name})'); + } + if (e.message != null && e.message!.isNotEmpty) { + if (summary.isNotEmpty) summary.write(' — '); + summary.write(e.message); + } + if (shortBody.isNotEmpty) { + summary.write('\n回應:$shortBody'); + } + return Exception(summary.isEmpty ? e.toString() : summary.toString()); + } + Future _transcribeWithOpenAI({ required String audioFilePath, required String apiKey, @@ -114,13 +158,20 @@ class SpeechRecognitionService { ? customEndpoint : 'https://api.openai.com/v1/audio/transcriptions'; - final response = await _dio.post( - url, - data: formData, - options: Options( - headers: {'Authorization': 'Bearer $apiKey'}, - ), - ); + AppLogger.log('OpenAI', 'POST $url model=$model'); + + Response response; + try { + response = await _dio.post( + url, + data: formData, + options: Options( + headers: {'Authorization': 'Bearer $apiKey'}, + ), + ); + } on DioException catch (e) { + throw _wrapDioError('OpenAI/LiteLLM POST $url', e); + } // Parse JSON response to extract text and token usage Map? data; @@ -153,7 +204,7 @@ class SpeechRecognitionService { required String prompt, String? customEndpoint, }) async { - print('[Gemini] Start direct transcription: $audioFilePath'); + AppLogger.log('Gemini', 'start: $audioFilePath'); final fileToUpload = File(audioFilePath); if (!fileToUpload.existsSync()) { @@ -217,12 +268,11 @@ class SpeechRecognitionService { final inputTokens = usageMeta?['promptTokenCount'] as int?; final outputTokens = usageMeta?['candidatesTokenCount'] as int?; - print('[Gemini] Success! tokens: in=$inputTokens out=$outputTokens'); + AppLogger.log('Gemini', + 'success tokens: in=$inputTokens out=$outputTokens'); return (text: text, inputTokens: inputTokens, outputTokens: outputTokens); } on DioException catch (e) { - print('[Gemini] DioException: ${e.message}'); - print('[Gemini] Status: ${e.response?.statusCode}'); - rethrow; + throw _wrapDioError('Gemini POST $url', e); } } } diff --git a/lib/shared/widgets/recording_overlay.dart b/lib/shared/widgets/recording_overlay.dart index 2e48953..8b44425 100644 --- a/lib/shared/widgets/recording_overlay.dart +++ b/lib/shared/widgets/recording_overlay.dart @@ -87,15 +87,21 @@ class _OverlayPill extends StatelessWidget { ZeroTypeStatus.idle => Colors.grey, }; - String get _label => switch (state.status) { - ZeroTypeStatus.recording => '錄音中', - ZeroTypeStatus.cancelling => '取消中', - ZeroTypeStatus.saving => '擷取中', - ZeroTypeStatus.transcribing => '辨識中', - ZeroTypeStatus.done => '已完成', - ZeroTypeStatus.error => '錯誤', - ZeroTypeStatus.idle => '', - }; + String get _label { + if (state.status == ZeroTypeStatus.error) { + final msg = state.errorMessage; + return (msg == null || msg.isEmpty) ? '錯誤' : '錯誤:$msg'; + } + return switch (state.status) { + ZeroTypeStatus.recording => '錄音中', + ZeroTypeStatus.cancelling => '取消中', + ZeroTypeStatus.saving => '擷取中', + ZeroTypeStatus.transcribing => '辨識中', + ZeroTypeStatus.done => '已完成', + ZeroTypeStatus.error => '錯誤', + ZeroTypeStatus.idle => '', + }; + } bool get _showWaveform => state.status == ZeroTypeStatus.recording || @@ -128,13 +134,20 @@ class _OverlayPill extends StatelessWidget { _WaveformBars(amplitude: state.amplitude, color: dotColor), const SizedBox(width: 10), ], - Text( - _label, - style: TextStyle( - color: dotColor, - fontSize: 13, - fontWeight: FontWeight.w600, - letterSpacing: 0.3, + Flexible( + child: ConstrainedBox( + constraints: const BoxConstraints(maxWidth: 480), + child: Text( + _label, + style: TextStyle( + color: dotColor, + fontSize: 13, + fontWeight: FontWeight.w600, + letterSpacing: 0.3, + ), + maxLines: 3, + overflow: TextOverflow.ellipsis, + ), ), ), if (state.status == ZeroTypeStatus.saving || From 3dc3eed316c58c4ddf7337634ce60c9d5f24738f Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 16:04:08 +0800 Subject: [PATCH 05/15] Use 44100 Hz sample rate on Windows for AAC recording MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Pressing Alt+Space on Windows immediately threw: PlatformException(Record, null, The data specified for the media type is invalid, inconsistent, or not supported by this object., null) Root cause: the Windows Media Foundation AAC encoder accepts only 44100 or 48000 Hz sample rates (per Microsoft's AAC encoder spec). The previous hard-coded 16000 Hz — chosen because Whisper is internally 16 kHz, so the mac path avoided server-side resampling — fails on Windows with MF_E_INVALIDMEDIATYPE before any audio is captured. Branch on Platform.isWindows: 44100 Hz on Windows, 16000 Hz on macOS (unchanged). Bitrate stays 128 kbps so the upload size is identical; Whisper resamples 44.1 → 16 kHz on the server with no perceptible difference for speech. Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/core/services/recording_service.dart | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/lib/core/services/recording_service.dart b/lib/core/services/recording_service.dart index cf86361..4ddba5a 100644 --- a/lib/core/services/recording_service.dart +++ b/lib/core/services/recording_service.dart @@ -35,12 +35,19 @@ class RecordingService { final timestamp = DateTime.now().millisecondsSinceEpoch; _currentFilePath = '${dir.path}/zerotype_$timestamp.m4a'; - print('[RecordingService] starting at $_currentFilePath'); + // Windows Media Foundation's AAC encoder only accepts 44100 or 48000 Hz + // (16000 Hz triggers MF_E_INVALIDMEDIATYPE / 0xC00D36B4 immediately). + // macOS's AVAssetWriter accepts 16000 Hz happily, which feeds Whisper at + // its native rate and avoids server-side resampling. + final sampleRate = Platform.isWindows ? 44100 : 16000; + + print( + '[RecordingService] starting at $_currentFilePath @ ${sampleRate}Hz'); await _recorder.start( - const RecordConfig( + RecordConfig( encoder: AudioEncoder.aacLc, bitRate: 128000, - sampleRate: 16000, + sampleRate: sampleRate, ), path: _currentFilePath!, ); From 8b95cf07d1244743df9eb5ca848e3c5d3846ec94 Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 16:06:50 +0800 Subject: [PATCH 06/15] LiteLLM: route non-whisper models through /v1/chat/completions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The user pointed ZeroType at a LiteLLM proxy and selected `gemini-2.5-flash-lite`, which the proxy returned in its `/v1/models` listing. The transcription request to `/v1/audio/transcriptions` failed with a server-side 500 and the LiteLLM body: litellm.APIConnectionError: Unmapped provider passed in. Unable to get the response. LiteLLM's `/v1/audio/transcriptions` only knows how to dispatch to whisper-style backends (OpenAI Whisper, Deepgram, Azure Speech, …); it has no mapping for chat/multimodal models. Modern multimodal LLMs (Gemini, GPT-4o-audio, Claude with audio) accept audio via the chat completions endpoint with an `input_audio` content part — and LiteLLM already bridges that shape to each backend's native audio API. Switch the LiteLLM provider to: - `model.contains('whisper')` → /v1/audio/transcriptions (existing path) - everything else → /v1/chat/completions with input_audio The new `_transcribeWithChatCompletions` reads the recorded m4a, base64- encodes it, posts a single user message containing the prompt and the audio (format detected from the file extension), and parses `choices[0].message.content` as the transcript. Token usage is read from `usage.prompt_tokens` / `usage.completion_tokens` so the history page's cost tracking still works. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../services/speech_recognition_service.dart | 113 +++++++++++++++++- 1 file changed, 110 insertions(+), 3 deletions(-) diff --git a/lib/core/services/speech_recognition_service.dart b/lib/core/services/speech_recognition_service.dart index 9a0986f..0d07219 100644 --- a/lib/core/services/speech_recognition_service.dart +++ b/lib/core/services/speech_recognition_service.dart @@ -47,13 +47,27 @@ class SpeechRecognitionService { if (customEndpoint == null || customEndpoint.isEmpty) { throw Exception('LiteLLM 需要在「進階設定」中填寫 Proxy Base URL'); } - return _transcribeWithOpenAI( + final base = _stripTrailingSlash(customEndpoint); + // Whisper-style transcription models go to /v1/audio/transcriptions. + // Everything else (Gemini, GPT-4o-audio, Claude, …) is treated as a + // multimodal chat model and gets the audio embedded in a + // /v1/chat/completions request — that's the only LiteLLM-supported + // path that actually works for non-whisper backends. + if (model.toLowerCase().contains('whisper')) { + return _transcribeWithOpenAI( + audioFilePath: audioFilePath, + apiKey: apiKey, + model: model, + prompt: prompt, + customEndpoint: '$base/v1/audio/transcriptions', + ); + } + return _transcribeWithChatCompletions( audioFilePath: audioFilePath, apiKey: apiKey, model: model, prompt: prompt, - customEndpoint: - '${_stripTrailingSlash(customEndpoint)}/v1/audio/transcriptions', + baseUrl: base, ); default: throw Exception('不支援的語音辨識服務商:$provider'); @@ -100,6 +114,99 @@ class SpeechRecognitionService { } } + /// Multimodal transcription via the OpenAI-compatible `/v1/chat/completions` + /// endpoint. Used by the LiteLLM provider for non-whisper models (Gemini, + /// GPT-4o-audio, Claude with audio, etc.) — LiteLLM bridges the OpenAI + /// `input_audio` content part to the backend's native audio API. + Future _transcribeWithChatCompletions({ + required String audioFilePath, + required String apiKey, + required String model, + required String prompt, + required String baseUrl, + }) async { + final url = '$baseUrl/v1/chat/completions'; + final file = File(audioFilePath); + if (!file.existsSync()) { + throw Exception('找不到音檔:$audioFilePath'); + } + final bytes = await file.readAsBytes(); + final base64Audio = base64Encode(bytes); + + final lower = audioFilePath.toLowerCase(); + final format = lower.endsWith('.m4a') + ? 'm4a' + : lower.endsWith('.mp3') + ? 'mp3' + : lower.endsWith('.wav') + ? 'wav' + : lower.endsWith('.ogg') + ? 'ogg' + : 'm4a'; + + final finalPrompt = + prompt.isEmpty ? 'Transcribe the speech in this audio.' : prompt; + + AppLogger.log('LiteLLM-chat', + 'POST $url model=$model format=$format bytes=${bytes.length}'); + + Response response; + try { + response = await _dio.post( + url, + data: { + 'model': model, + 'messages': [ + { + 'role': 'user', + 'content': [ + {'type': 'text', 'text': finalPrompt}, + { + 'type': 'input_audio', + 'input_audio': { + 'data': base64Audio, + 'format': format, + }, + }, + ], + }, + ], + }, + options: Options( + headers: { + 'Authorization': 'Bearer $apiKey', + 'Content-Type': 'application/json', + }, + ), + ); + } on DioException catch (e) { + throw _wrapDioError('LiteLLM chat POST $url', e); + } + + Map? body; + if (response.data is Map) { + body = response.data as Map; + } else if (response.data is String) { + body = jsonDecode(response.data as String) as Map; + } + final choices = body?['choices'] as List?; + if (choices == null || choices.isEmpty) { + throw Exception('LiteLLM 回應沒有 choices 欄位:${response.data}'); + } + final message = (choices.first as Map)['message'] + as Map?; + final content = message?['content']; + final text = (content is String) ? content.trim() : ''; + + final usage = body?['usage'] as Map?; + final inputTokens = usage?['prompt_tokens'] as int?; + final outputTokens = usage?['completion_tokens'] as int?; + + AppLogger.log('LiteLLM-chat', + 'success length=${text.length} tokens: in=$inputTokens out=$outputTokens'); + return (text: text, inputTokens: inputTokens, outputTokens: outputTokens); + } + static String _stripTrailingSlash(String s) => s.replaceAll(RegExp(r'/+$'), ''); From 844415383456ea1f5ce96ad9ae11905b36f7cdda Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 16:21:26 +0800 Subject: [PATCH 07/15] Add post-transcription LLM refinement (independent provider, prompt, toggle) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Adds option C from the design conversation: take the raw transcript from the speech provider and run it through a separate chat LLM to clean up filler words, fix self-corrections, normalise punctuation, etc. The point is to let users pick "cheap+fast model for transcription, smart model for polishing" — e.g. Whisper transcribe → Claude refine. Three independent axes (per the user's chosen design): (a) Refinement provider/model/key/endpoint kept entirely separate from the speech provider, with its own SharedPreferences namespace. (b) Refinement prompt is a separate editable prompt (`TextRefinement.prompt` asset, custom override at `/TextRefinement_Custom.prompt`). (c) Off by default; activated by a toggle in the Settings page so a typo in the refinement config can never silently double the user's bill. Implementation: - `assets/prompts/TextRefinement.prompt` — sensible default refinement instructions (preserve meaning, drop fillers, fix self-corrections, format bullet lists, restore literal punctuation). - `PromptRepository`: refactored shared file/asset/prefs helpers and added refinement get/save/reset/default methods. `PromptController` adds a `RefinementPromptController` mirroring the speech one. `PromptPage` becomes a `TabBar` with two `PromptEditor` instances. - `ModelConfigRepository`: refinement-namespaced provider/model/apiKey/ endpoint/cachedModels methods (separate prefs keys so speech and refinement can point at different LiteLLM proxies if desired). - `model_config_controller`: new `RefinementProviderController` and `DynamicRefinementModelsController` (parallel to the speech versions). - `model_config_page`: new "文字優化(可選)" collapsible section reusing `_ApiKeyInput`/`_LiteLLMEndpointInput`/`_ModelDropdown`. The `_LiteLLMModelPicker` gained an `isRefinement` flag so it watches and refreshes the refinement model cache when used in this section. - `SettingsState`: new `refinementEnabled` field; `SettingsController` loads/saves it via `AppConstants.isRefinementEnabledKey` (which was already declared but unused). New tile in `settings_page` between "開機啟動" and "歷史記錄保留時間". - `SpeechRecognitionService.refine(rawText, …)`: text-only chat-completion call. Routes openai/litellm via `/v1/chat/completions` (LiteLLM works with any chat backend), gemini via `models/{id}:generateContent`. Reuses the existing `_wrapDioError` helper so any failure produces a useful log line and a readable exception. - `ZeroTypeController._maybeRefine(rawText)`: called between transcribe and clipboard/paste. Returns `null` (silent fallback to raw transcript) if the toggle is off, the refinement provider isn't fully configured, or the call throws — refinement should never block the user from getting their text out. Briefly shows "優化中" in the overlay during the call. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../controllers/zero_type_controller.dart | 56 ++++- .../controllers/zero_type_controller.g.dart | 2 +- .../services/speech_recognition_service.dart | 171 +++++++++++++ .../model_config_repository_impl.dart | 64 +++++ .../repositories/model_config_repository.dart | 21 ++ .../controllers/model_config_controller.dart | 87 +++++++ .../model_config_controller.g.dart | 235 ++++++++++++++++++ .../presentation/pages/model_config_page.dart | 170 ++++++++++++- .../repositories/prompt_repository_impl.dart | 100 ++++++-- .../repositories/prompt_repository.dart | 7 +- .../controllers/prompt_controller.dart | 23 ++ .../controllers/prompt_controller.g.dart | 46 ++++ .../presentation/pages/prompt_page.dart | 88 +++++-- .../controllers/settings_controller.dart | 12 + .../controllers/settings_state.dart | 1 + .../controllers/settings_state.freezed.dart | 43 ++-- .../presentation/pages/settings_page.dart | 18 ++ prompts/TextRefinement.prompt | 37 +++ 18 files changed, 1104 insertions(+), 77 deletions(-) create mode 100644 prompts/TextRefinement.prompt diff --git a/lib/core/controllers/zero_type_controller.dart b/lib/core/controllers/zero_type_controller.dart index 63ee352..826d15c 100644 --- a/lib/core/controllers/zero_type_controller.dart +++ b/lib/core/controllers/zero_type_controller.dart @@ -18,6 +18,7 @@ import 'package:zero_type/features/history/domain/repositories/history_repositor import 'package:zero_type/features/model_config/presentation/controllers/model_config_controller.dart'; import 'package:zero_type/features/prompt/presentation/controllers/prompt_controller.dart'; import 'package:zero_type/features/dictionary/presentation/controllers/dictionary_controller.dart'; +import 'package:zero_type/features/settings/presentation/controllers/settings_controller.dart'; part 'zero_type_controller.g.dart'; @@ -155,6 +156,49 @@ class ZeroTypeController extends _$ZeroTypeController { } } + /// Optional post-transcription LLM refinement. Returns the refined text on + /// success, `null` if the feature is disabled / not configured / errors + /// (the caller falls back to the raw transcript so a misconfigured + /// refinement step never blocks the user from getting their text pasted). + Future _maybeRefine(String rawText) async { + final settings = await ref.read(settingsControllerProvider.future); + if (!settings.refinementEnabled) { + AppLogger.log('Refine', 'skipped: toggle is off'); + return null; + } + final cfg = await ref.read(refinementProviderControllerProvider.future); + if (cfg.providerId == null || cfg.providerId!.isEmpty || + cfg.modelId == null || cfg.modelId!.isEmpty || + cfg.apiKey == null || cfg.apiKey!.isEmpty) { + AppLogger.log('Refine', + 'skipped: refinement provider/model/apiKey not all configured'); + return null; + } + try { + await _showNativeOverlay('transcribing', '優化中'); + final prompt = await ref.read(refinementPromptControllerProvider.future); + final refined = await getIt().refine( + rawText: rawText, + apiKey: cfg.apiKey!, + provider: cfg.providerId!, + model: cfg.modelId!, + prompt: prompt, + customEndpoint: cfg.customEndpoint, + ); + if (refined.text.isEmpty) { + AppLogger.log('Refine', 'returned empty; falling back to raw'); + return null; + } + AppLogger.log('Refine', + 'success rawLen=${rawText.length} refinedLen=${refined.text.length}'); + return refined.text; + } catch (e, st) { + AppLogger.log('Refine', 'failed; falling back to raw', + error: e, st: st); + return null; + } + } + /// Centralised early-exit error path. Sets state to error with [msg] (so the /// Flutter overlay on Windows shows the actual reason), fires the macOS /// native overlay equivalent, plays cancel sound, and resets after 3s. @@ -236,6 +280,12 @@ class ZeroTypeController extends _$ZeroTypeController { throw Exception('未能辨識出任何文字'); } + // Optional refinement step: feed the transcript through a chat LLM + // to clean up filler words / fix punctuation. Settings toggle is + // checked first; if off, this is a no-op. + final refinedText = await _maybeRefine(result.text); + final finalText = refinedText ?? result.text; + // Move audio to history dir and save record final historyRepo = getIt(); final audioHistoryPath = await historyRepo.moveAudioFile(filePath); @@ -243,7 +293,7 @@ class ZeroTypeController extends _$ZeroTypeController { final recordId = DateTime.now().millisecondsSinceEpoch.toString(); final record = TranscriptionRecord( id: recordId, - text: result.text, + text: finalText, createdAt: DateTime.now(), audioPath: audioHistoryPath, durationMs: durationMs, @@ -261,8 +311,8 @@ class ZeroTypeController extends _$ZeroTypeController { await historyRepo.accumulateStats(record); // Output - state = state.copyWith(status: ZeroTypeStatus.done, result: result.text); - await Clipboard.setData(ClipboardData(text: result.text)); + state = state.copyWith(status: ZeroTypeStatus.done, result: finalText); + await Clipboard.setData(ClipboardData(text: finalText)); await Future.delayed(const Duration(milliseconds: 150)); print('[ZeroType] Simulating paste...'); diff --git a/lib/core/controllers/zero_type_controller.g.dart b/lib/core/controllers/zero_type_controller.g.dart index 61a27d6..c95c2d9 100644 --- a/lib/core/controllers/zero_type_controller.g.dart +++ b/lib/core/controllers/zero_type_controller.g.dart @@ -42,7 +42,7 @@ final class ZeroTypeControllerProvider } String _$zeroTypeControllerHash() => - r'e7fdb5b9d22d497dd77eda8c6008598f118ddb5d'; + r'bbc8298cecb8bc86652717576076330b34dd6083'; abstract class _$ZeroTypeController extends $Notifier { ZeroTypeState build(); diff --git a/lib/core/services/speech_recognition_service.dart b/lib/core/services/speech_recognition_service.dart index 0d07219..47366d1 100644 --- a/lib/core/services/speech_recognition_service.dart +++ b/lib/core/services/speech_recognition_service.dart @@ -74,6 +74,177 @@ class SpeechRecognitionService { } } + /// Post-transcription refinement: sends `prompt + rawText` to a chat model + /// and returns the polished text. Used after `transcribe()` when the user + /// has enabled the refinement feature in settings. + Future refine({ + required String rawText, + required String apiKey, + required String provider, + required String model, + required String prompt, + String? customEndpoint, + }) async { + AppLogger.log('Refine', + 'start: provider=$provider model=$model textLen=${rawText.length} endpoint=${customEndpoint ?? '(default)'}'); + + if (rawText.trim().isEmpty) { + return (text: '', inputTokens: null, outputTokens: null); + } + + switch (provider) { + case 'openai': + return _refineWithOpenAIChat( + rawText: rawText, + apiKey: apiKey, + model: model, + prompt: prompt, + baseUrl: (customEndpoint == null || customEndpoint.isEmpty) + ? 'https://api.openai.com' + : _stripTrailingSlash(customEndpoint), + ); + case 'litellm': + if (customEndpoint == null || customEndpoint.isEmpty) { + throw Exception('LiteLLM 需要在「模型」頁的文字優化區填寫 Proxy Base URL'); + } + return _refineWithOpenAIChat( + rawText: rawText, + apiKey: apiKey, + model: model, + prompt: prompt, + baseUrl: _stripTrailingSlash(customEndpoint), + ); + case 'gemini': + return _refineWithGemini( + rawText: rawText, + apiKey: apiKey, + model: model, + prompt: prompt, + customEndpoint: customEndpoint, + ); + default: + throw Exception('不支援的優化服務商:$provider'); + } + } + + Future _refineWithOpenAIChat({ + required String rawText, + required String apiKey, + required String model, + required String prompt, + required String baseUrl, + }) async { + final url = '$baseUrl/v1/chat/completions'; + final systemPrompt = prompt.trim().isEmpty + ? 'You are a text refinement assistant. Polish the user-provided transcript by removing filler words, correcting self-corrections, and adding punctuation. Output only the polished text.' + : prompt.trim(); + + AppLogger.log( + 'Refine-chat', 'POST $url model=$model prompt=${systemPrompt.length}c text=${rawText.length}c'); + + Response response; + try { + response = await _dio.post( + url, + data: { + 'model': model, + 'messages': [ + {'role': 'system', 'content': systemPrompt}, + {'role': 'user', 'content': rawText}, + ], + }, + options: Options( + headers: { + 'Authorization': 'Bearer $apiKey', + 'Content-Type': 'application/json', + }, + ), + ); + } on DioException catch (e) { + throw _wrapDioError('Refine chat POST $url', e); + } + + Map? body; + if (response.data is Map) { + body = response.data as Map; + } else if (response.data is String) { + body = jsonDecode(response.data as String) as Map; + } + final choices = body?['choices'] as List?; + if (choices == null || choices.isEmpty) { + throw Exception('優化回應沒有 choices:${response.data}'); + } + final message = + (choices.first as Map)['message'] as Map?; + final content = message?['content']; + final text = (content is String) ? content.trim() : ''; + + final usage = body?['usage'] as Map?; + return ( + text: text, + inputTokens: usage?['prompt_tokens'] as int?, + outputTokens: usage?['completion_tokens'] as int?, + ); + } + + Future _refineWithGemini({ + required String rawText, + required String apiKey, + required String model, + required String prompt, + String? customEndpoint, + }) async { + final base = (customEndpoint == null || customEndpoint.isEmpty) + ? 'https://generativelanguage.googleapis.com/v1beta' + : _stripTrailingSlash(customEndpoint); + final url = '$base/models/$model:generateContent'; + final systemPrompt = prompt.trim().isEmpty + ? 'Polish the following transcript. Output only the cleaned text.' + : prompt.trim(); + + AppLogger.log('Refine-gemini', 'POST $url model=$model'); + + Response> response; + try { + response = await _dio.post>( + url, + data: { + 'contents': [ + { + 'parts': [ + {'text': '$systemPrompt\n\n---\n\n$rawText'}, + ], + }, + ], + }, + options: Options( + headers: { + 'x-goog-api-key': apiKey, + 'Content-Type': 'application/json', + }, + ), + ); + } on DioException catch (e) { + throw _wrapDioError('Refine Gemini POST $url', e); + } + + final candidates = response.data?['candidates'] as List?; + if (candidates == null || candidates.isEmpty) { + throw Exception('Gemini 優化失敗:無候選回應'); + } + final parts = candidates[0]['content']?['parts'] as List?; + if (parts == null || parts.isEmpty) { + throw Exception('Gemini 優化失敗:內容為空'); + } + final text = (parts[0]['text'] as String? ?? '').trim(); + final usage = response.data?['usageMetadata'] as Map?; + return ( + text: text, + inputTokens: usage?['promptTokenCount'] as int?, + outputTokens: usage?['candidatesTokenCount'] as int?, + ); + } + /// Fetches the model list from an OpenAI-compatible `/v1/models` endpoint /// (e.g. a LiteLLM proxy). Returns id+name records; callers map to UI /// entities. The `id` is what gets sent to the transcription endpoint. diff --git a/lib/features/model_config/data/repositories/model_config_repository_impl.dart b/lib/features/model_config/data/repositories/model_config_repository_impl.dart index 4cfbbcd..e4f717d 100644 --- a/lib/features/model_config/data/repositories/model_config_repository_impl.dart +++ b/lib/features/model_config/data/repositories/model_config_repository_impl.dart @@ -95,4 +95,68 @@ class ModelConfigRepositoryImpl implements ModelConfigRepository { ); await _prefs.setString('cached_models_$providerId', raw); } + + // ── Refinement ──────────────────────────────────────────────────────────── + + @override + Future getSelectedRefinementProviderId() async => + _prefs.getString(AppConstants.selectedRefinementProviderKey); + + @override + Future saveSelectedRefinementProviderId(String providerId) async => + _prefs.setString(AppConstants.selectedRefinementProviderKey, providerId); + + @override + Future getSelectedRefinementModelId(String providerId) async => + _prefs + .getString('${AppConstants.selectedRefinementModelKey}_$providerId'); + + @override + Future saveSelectedRefinementModelId( + String providerId, String modelId) async => + _prefs.setString( + '${AppConstants.selectedRefinementModelKey}_$providerId', modelId); + + @override + Future getRefinementApiKey(String providerId) async => + _prefs.getString('api_key_refinement_$providerId'); + + @override + Future saveRefinementApiKey(String providerId, String apiKey) async => + _prefs.setString('api_key_refinement_$providerId', apiKey); + + @override + Future getRefinementCustomEndpoint(String providerId) async => + _prefs.getString('custom_endpoint_refinement_$providerId'); + + @override + Future saveRefinementCustomEndpoint( + String providerId, String endpoint) async => + _prefs.setString('custom_endpoint_refinement_$providerId', endpoint); + + @override + Future> getRefinementCachedModels(String providerId) async { + final raw = _prefs.getString('cached_models_refinement_$providerId'); + if (raw == null || raw.isEmpty) return const []; + try { + final list = jsonDecode(raw) as List; + return list + .map((m) => AiModel( + id: (m as Map)['id'] as String, + name: m['name'] as String, + )) + .toList(); + } catch (_) { + return const []; + } + } + + @override + Future saveRefinementCachedModels( + String providerId, List models) async { + final raw = jsonEncode( + models.map((m) => {'id': m.id, 'name': m.name}).toList(), + ); + await _prefs.setString('cached_models_refinement_$providerId', raw); + } } diff --git a/lib/features/model_config/domain/repositories/model_config_repository.dart b/lib/features/model_config/domain/repositories/model_config_repository.dart index 472047a..04476f7 100644 --- a/lib/features/model_config/domain/repositories/model_config_repository.dart +++ b/lib/features/model_config/domain/repositories/model_config_repository.dart @@ -21,4 +21,25 @@ abstract class ModelConfigRepository { /// Empty list when nothing has been fetched yet. Future> getCachedModels(String providerId); Future saveCachedModels(String providerId, List models); + + // ── Refinement (text-polishing) configuration ───────────────────────────── + // Mirrors the speech-recognition setters above but in a separate + // namespace so the user can use one provider/model for transcription and + // a completely different one for post-processing. + + Future getSelectedRefinementProviderId(); + Future saveSelectedRefinementProviderId(String providerId); + + Future getSelectedRefinementModelId(String providerId); + Future saveSelectedRefinementModelId(String providerId, String modelId); + + Future getRefinementApiKey(String providerId); + Future saveRefinementApiKey(String providerId, String apiKey); + + Future getRefinementCustomEndpoint(String providerId); + Future saveRefinementCustomEndpoint(String providerId, String endpoint); + + Future> getRefinementCachedModels(String providerId); + Future saveRefinementCachedModels( + String providerId, List models); } diff --git a/lib/features/model_config/presentation/controllers/model_config_controller.dart b/lib/features/model_config/presentation/controllers/model_config_controller.dart index c95804c..fe9b8d3 100644 --- a/lib/features/model_config/presentation/controllers/model_config_controller.dart +++ b/lib/features/model_config/presentation/controllers/model_config_controller.dart @@ -108,3 +108,90 @@ class DynamicModelsController extends _$DynamicModelsController { } } +@riverpod +class RefinementProviderController extends _$RefinementProviderController { + ModelConfigRepository get _repo => _buildRepository(); + + @override + Future<({String? providerId, String? modelId, String? apiKey, String? customEndpoint})> + build() async { + final providerId = await _repo.getSelectedRefinementProviderId(); + return ( + providerId: providerId, + modelId: providerId == null + ? null + : await _repo.getSelectedRefinementModelId(providerId), + apiKey: providerId == null + ? null + : await _repo.getRefinementApiKey(providerId), + customEndpoint: providerId == null + ? null + : await _repo.getRefinementCustomEndpoint(providerId), + ); + } + + Future selectProvider(String providerId) async { + await _repo.saveSelectedRefinementProviderId(providerId); + ref.invalidateSelf(); + } + + Future selectModel(String modelId) async { + final s = await future; + if (s.providerId != null) { + await _repo.saveSelectedRefinementModelId(s.providerId!, modelId); + ref.invalidateSelf(); + } + } + + Future saveApiKey(String apiKey) async { + final s = await future; + if (s.providerId != null) { + await _repo.saveRefinementApiKey(s.providerId!, apiKey); + ref.invalidateSelf(); + } + } + + Future saveCustomEndpoint(String endpoint) async { + final s = await future; + if (s.providerId != null) { + await _repo.saveRefinementCustomEndpoint(s.providerId!, endpoint); + ref.invalidateSelf(); + } + } +} + +/// Refinement-specific dynamic model list (separate cache from speech, so the +/// user can point speech and refinement at different LiteLLM proxies if they +/// want). +@Riverpod(keepAlive: true) +class DynamicRefinementModelsController + extends _$DynamicRefinementModelsController { + ModelConfigRepository get _repo => _buildRepository(); + SpeechRecognitionService get _service => getIt(); + + @override + Future> build(String providerId) async { + return _repo.getRefinementCachedModels(providerId); + } + + Future refresh({ + required String providerId, + required String baseUrl, + required String apiKey, + }) async { + state = const AsyncLoading(); + try { + final fetched = await _service.fetchAvailableModels( + baseUrl: baseUrl, + apiKey: apiKey, + ); + final models = + fetched.map((m) => AiModel(id: m.id, name: m.name)).toList(); + await _repo.saveRefinementCachedModels(providerId, models); + state = AsyncData(models); + } catch (e, st) { + state = AsyncError(e, st); + } + } +} + diff --git a/lib/features/model_config/presentation/controllers/model_config_controller.g.dart b/lib/features/model_config/presentation/controllers/model_config_controller.g.dart index 8a75127..c4644a2 100644 --- a/lib/features/model_config/presentation/controllers/model_config_controller.g.dart +++ b/lib/features/model_config/presentation/controllers/model_config_controller.g.dart @@ -270,3 +270,238 @@ abstract class _$DynamicModelsController extends $AsyncNotifier> { element.handleCreate(ref, () => build(_$args)); } } + +@ProviderFor(RefinementProviderController) +final refinementProviderControllerProvider = + RefinementProviderControllerProvider._(); + +final class RefinementProviderControllerProvider + extends + $AsyncNotifierProvider< + RefinementProviderController, + ({ + String? apiKey, + String? customEndpoint, + String? modelId, + String? providerId, + }) + > { + RefinementProviderControllerProvider._() + : super( + from: null, + argument: null, + retry: null, + name: r'refinementProviderControllerProvider', + isAutoDispose: true, + dependencies: null, + $allTransitiveDependencies: null, + ); + + @override + String debugGetCreateSourceHash() => _$refinementProviderControllerHash(); + + @$internal + @override + RefinementProviderController create() => RefinementProviderController(); +} + +String _$refinementProviderControllerHash() => + r'7dbecc6d3e8ae703a3b803f8b4464a0e78fa9153'; + +abstract class _$RefinementProviderController + extends + $AsyncNotifier< + ({ + String? apiKey, + String? customEndpoint, + String? modelId, + String? providerId, + }) + > { + FutureOr< + ({ + String? apiKey, + String? customEndpoint, + String? modelId, + String? providerId, + }) + > + build(); + @$mustCallSuper + @override + void runBuild() { + final ref = + this.ref + as $Ref< + AsyncValue< + ({ + String? apiKey, + String? customEndpoint, + String? modelId, + String? providerId, + }) + >, + ({ + String? apiKey, + String? customEndpoint, + String? modelId, + String? providerId, + }) + >; + final element = + ref.element + as $ClassProviderElement< + AnyNotifier< + AsyncValue< + ({ + String? apiKey, + String? customEndpoint, + String? modelId, + String? providerId, + }) + >, + ({ + String? apiKey, + String? customEndpoint, + String? modelId, + String? providerId, + }) + >, + AsyncValue< + ({ + String? apiKey, + String? customEndpoint, + String? modelId, + String? providerId, + }) + >, + Object?, + Object? + >; + element.handleCreate(ref, build); + } +} + +/// Refinement-specific dynamic model list (separate cache from speech, so the +/// user can point speech and refinement at different LiteLLM proxies if they +/// want). + +@ProviderFor(DynamicRefinementModelsController) +final dynamicRefinementModelsControllerProvider = + DynamicRefinementModelsControllerFamily._(); + +/// Refinement-specific dynamic model list (separate cache from speech, so the +/// user can point speech and refinement at different LiteLLM proxies if they +/// want). +final class DynamicRefinementModelsControllerProvider + extends + $AsyncNotifierProvider< + DynamicRefinementModelsController, + List + > { + /// Refinement-specific dynamic model list (separate cache from speech, so the + /// user can point speech and refinement at different LiteLLM proxies if they + /// want). + DynamicRefinementModelsControllerProvider._({ + required DynamicRefinementModelsControllerFamily super.from, + required String super.argument, + }) : super( + retry: null, + name: r'dynamicRefinementModelsControllerProvider', + isAutoDispose: false, + dependencies: null, + $allTransitiveDependencies: null, + ); + + @override + String debugGetCreateSourceHash() => + _$dynamicRefinementModelsControllerHash(); + + @override + String toString() { + return r'dynamicRefinementModelsControllerProvider' + '' + '($argument)'; + } + + @$internal + @override + DynamicRefinementModelsController create() => + DynamicRefinementModelsController(); + + @override + bool operator ==(Object other) { + return other is DynamicRefinementModelsControllerProvider && + other.argument == argument; + } + + @override + int get hashCode { + return argument.hashCode; + } +} + +String _$dynamicRefinementModelsControllerHash() => + r'2eeded948629e6398e9a3d2c9d36d581ef5851b0'; + +/// Refinement-specific dynamic model list (separate cache from speech, so the +/// user can point speech and refinement at different LiteLLM proxies if they +/// want). + +final class DynamicRefinementModelsControllerFamily extends $Family + with + $ClassFamilyOverride< + DynamicRefinementModelsController, + AsyncValue>, + List, + FutureOr>, + String + > { + DynamicRefinementModelsControllerFamily._() + : super( + retry: null, + name: r'dynamicRefinementModelsControllerProvider', + dependencies: null, + $allTransitiveDependencies: null, + isAutoDispose: false, + ); + + /// Refinement-specific dynamic model list (separate cache from speech, so the + /// user can point speech and refinement at different LiteLLM proxies if they + /// want). + + DynamicRefinementModelsControllerProvider call(String providerId) => + DynamicRefinementModelsControllerProvider._( + argument: providerId, + from: this, + ); + + @override + String toString() => r'dynamicRefinementModelsControllerProvider'; +} + +/// Refinement-specific dynamic model list (separate cache from speech, so the +/// user can point speech and refinement at different LiteLLM proxies if they +/// want). + +abstract class _$DynamicRefinementModelsController + extends $AsyncNotifier> { + late final _$args = ref.$arg as String; + String get providerId => _$args; + + FutureOr> build(String providerId); + @$mustCallSuper + @override + void runBuild() { + final ref = this.ref as $Ref>, List>; + final element = + ref.element + as $ClassProviderElement< + AnyNotifier>, List>, + AsyncValue>, + Object?, + Object? + >; + element.handleCreate(ref, () => build(_$args)); + } +} diff --git a/lib/features/model_config/presentation/pages/model_config_page.dart b/lib/features/model_config/presentation/pages/model_config_page.dart index 2c4f3be..b2e5237 100644 --- a/lib/features/model_config/presentation/pages/model_config_page.dart +++ b/lib/features/model_config/presentation/pages/model_config_page.dart @@ -41,6 +41,13 @@ class ModelConfigPage extends ConsumerWidget { isRequired: true, child: _SpeechConfigSection(providers: config.speechRecognition), ), + const SizedBox(height: 24), + _ConfigSection( + title: '文字優化(可選)', + isRequired: false, + child: + _RefinementConfigSection(providers: config.speechRecognition), + ), ], ), ), @@ -231,6 +238,128 @@ class _SpeechConfigSection extends ConsumerWidget { } +class _RefinementConfigSection extends ConsumerWidget { + const _RefinementConfigSection({required this.providers}); + final List providers; + + @override + Widget build(BuildContext context, WidgetRef ref) { + final stateAsync = ref.watch(refinementProviderControllerProvider); + final cs = Theme.of(context).colorScheme; + + return stateAsync.when( + data: (state) { + final selectedProvider = providers.firstWhere( + (p) => p.id == state.providerId, + orElse: () => providers.first, + ); + + return Column( + crossAxisAlignment: CrossAxisAlignment.start, + children: [ + Text( + '把語音辨識結果再丟給一個聊天模型做格式化、錯字修正、條列整理。' + '需要在「設定」頁打開「啟用文字優化」才會生效。', + style: TextStyle( + color: cs.onSurface.withAlpha(150), + fontSize: 12, + ), + ), + const SizedBox(height: 16), + const Text('選擇 Provider', style: TextStyle(fontWeight: FontWeight.w600)), + const SizedBox(height: 12), + Wrap( + spacing: 8, + children: providers.map((p) { + final isSelected = p.id == state.providerId; + return ChoiceChip( + label: Text(p.name), + selected: isSelected, + onSelected: (val) { + if (val) { + ref + .read(refinementProviderControllerProvider.notifier) + .selectProvider(p.id); + } + }, + backgroundColor: cs.surface, + selectedColor: cs.primary.withAlpha(50), + labelStyle: TextStyle( + color: isSelected ? cs.primary : cs.onSurface.withAlpha(150), + fontWeight: isSelected ? FontWeight.bold : FontWeight.normal, + ), + side: BorderSide( + color: isSelected ? cs.primary : cs.onSurface.withAlpha(30), + ), + ); + }).toList(), + ), + const SizedBox(height: 24), + _ApiKeyInput( + providerId: state.providerId ?? '', + initialValue: state.apiKey ?? '', + onSave: (val) => ref + .read(refinementProviderControllerProvider.notifier) + .saveApiKey(val), + ), + if (state.providerId == 'litellm') ...[ + const SizedBox(height: 24), + _LiteLLMEndpointInput( + initialValue: state.customEndpoint ?? '', + onSave: (val) => ref + .read(refinementProviderControllerProvider.notifier) + .saveCustomEndpoint(val), + ), + ], + const SizedBox(height: 24), + const Text('選擇模型', style: TextStyle(fontWeight: FontWeight.w600)), + const SizedBox(height: 12), + if (state.providerId == 'litellm') + _LiteLLMModelPicker( + baseUrl: state.customEndpoint ?? '', + apiKey: state.apiKey ?? '', + selectedModelId: state.modelId, + isRefinement: true, + onChanged: (val) { + if (val != null) { + ref + .read(refinementProviderControllerProvider.notifier) + .selectModel(val); + } + }, + ) + else + _ModelDropdown( + models: selectedProvider.models, + selectedModelId: state.modelId, + onChanged: (val) { + if (val != null) { + ref + .read(refinementProviderControllerProvider.notifier) + .selectModel(val); + } + }, + ), + if (state.providerId != 'litellm') ...[ + const SizedBox(height: 24), + _AdvancedConfigSection( + providerId: state.providerId ?? '', + customEndpoint: state.customEndpoint ?? '', + onSaveCustomEndpoint: (val) => ref + .read(refinementProviderControllerProvider.notifier) + .saveCustomEndpoint(val), + ), + ], + ], + ); + }, + loading: () => + const SizedBox(height: 100, child: Center(child: CircularProgressIndicator())), + error: (err, _) => Text('錯誤: $err'), + ); + } +} + class _ApiKeyInput extends StatefulWidget { const _ApiKeyInput({ required this.providerId, @@ -587,20 +716,23 @@ class _LiteLLMModelPicker extends ConsumerWidget { required this.apiKey, required this.selectedModelId, required this.onChanged, + this.isRefinement = false, }); final String baseUrl; final String apiKey; final String? selectedModelId; final ValueChanged onChanged; + final bool isRefinement; bool get _canFetch => baseUrl.isNotEmpty && apiKey.isNotEmpty; @override Widget build(BuildContext context, WidgetRef ref) { final cs = Theme.of(context).colorScheme; - final modelsAsync = - ref.watch(dynamicModelsControllerProvider('litellm')); + final modelsAsync = isRefinement + ? ref.watch(dynamicRefinementModelsControllerProvider('litellm')) + : ref.watch(dynamicModelsControllerProvider('litellm')); return Column( crossAxisAlignment: CrossAxisAlignment.start, @@ -622,16 +754,32 @@ class _LiteLLMModelPicker extends ConsumerWidget { : const Icon(Icons.refresh), onPressed: _canFetch && !modelsAsync.isLoading ? () async { - await ref - .read(dynamicModelsControllerProvider('litellm').notifier) - .refresh( - providerId: 'litellm', - baseUrl: baseUrl, - apiKey: apiKey, - ); + if (isRefinement) { + await ref + .read(dynamicRefinementModelsControllerProvider( + 'litellm') + .notifier) + .refresh( + providerId: 'litellm', + baseUrl: baseUrl, + apiKey: apiKey, + ); + } else { + await ref + .read(dynamicModelsControllerProvider('litellm') + .notifier) + .refresh( + providerId: 'litellm', + baseUrl: baseUrl, + apiKey: apiKey, + ); + } if (context.mounted) { - final newState = - ref.read(dynamicModelsControllerProvider('litellm')); + final newState = isRefinement + ? ref.read(dynamicRefinementModelsControllerProvider( + 'litellm')) + : ref.read( + dynamicModelsControllerProvider('litellm')); final msg = newState.hasError ? '抓取失敗:${newState.error}' : '已更新 ${newState.value?.length ?? 0} 個模型'; diff --git a/lib/features/prompt/data/repositories/prompt_repository_impl.dart b/lib/features/prompt/data/repositories/prompt_repository_impl.dart index 8a6d90c..6b940ed 100644 --- a/lib/features/prompt/data/repositories/prompt_repository_impl.dart +++ b/lib/features/prompt/data/repositories/prompt_repository_impl.dart @@ -11,58 +11,112 @@ class PromptRepositoryImpl implements PromptRepository { final SharedPreferences _prefs; - Future _getCustomPromptFile() async { + Future _getCustomFile(String fileName) async { final dir = await getApplicationSupportDirectory(); - return File('${dir.path}/SpeechToText_Custom.prompt'); + return File('${dir.path}/$fileName'); } - @override - Future getDefaultSpeechPrompt() async { + Future _loadDefaultFromAsset(String assetPath, String fallback) async { try { - final content = await rootBundle.loadString('prompts/SpeechToText.prompt'); + final content = await rootBundle.loadString(assetPath); return content.trim(); } catch (e) { - print('[PromptRepo] ERROR loading SpeechToText.prompt from assets: $e'); + print('[PromptRepo] ERROR loading $assetPath: $e'); + return fallback; } - return '請將語音精確轉換成繁體中文,並依語意加上適當的標點符號。'; } - @override - Future getSpeechPrompt() async { + Future _readCustomOrDefault( + String customFileName, Future Function() defaultLoader) async { try { - final file = await _getCustomPromptFile(); + final file = await _getCustomFile(customFileName); if (await file.exists()) { final content = (await file.readAsString()).trim(); if (content.isNotEmpty) return content; } } catch (e) { - print('[PromptRepo] Error reading custom prompt: $e'); + print('[PromptRepo] Error reading $customFileName: $e'); } - return await getDefaultSpeechPrompt(); + return await defaultLoader(); } - @override - Future saveSpeechPrompt(String prompt) async { + Future _saveCustom( + String customFileName, String prefsKey, String prompt) async { final cleaned = prompt.trim(); try { - final file = await _getCustomPromptFile(); + final file = await _getCustomFile(customFileName); await file.writeAsString(cleaned, flush: true); } catch (e) { - print('[PromptRepo] Error saving custom prompt: $e'); + print('[PromptRepo] Error saving $customFileName: $e'); } - await _prefs.setString(AppConstants.speechPromptKey, cleaned); + await _prefs.setString(prefsKey, cleaned); return cleaned; } - @override - Future resetSpeechPrompt() async { + Future _resetCustom(String customFileName, String prefsKey, + Future Function() defaultLoader) async { try { - final file = await _getCustomPromptFile(); + final file = await _getCustomFile(customFileName); if (await file.exists()) await file.delete(); } catch (e) { - print('[PromptRepo] Error deleting custom prompt: $e'); + print('[PromptRepo] Error deleting $customFileName: $e'); } - await _prefs.remove(AppConstants.speechPromptKey); - return await getDefaultSpeechPrompt(); + await _prefs.remove(prefsKey); + return await defaultLoader(); } + + // ── Speech ──────────────────────────────────────────────────────────────── + + @override + Future getDefaultSpeechPrompt() => _loadDefaultFromAsset( + 'prompts/SpeechToText.prompt', + '請將語音精確轉換成繁體中文,並依語意加上適當的標點符號。', + ); + + @override + Future getSpeechPrompt() => + _readCustomOrDefault('SpeechToText_Custom.prompt', getDefaultSpeechPrompt); + + @override + Future saveSpeechPrompt(String prompt) => _saveCustom( + 'SpeechToText_Custom.prompt', + AppConstants.speechPromptKey, + prompt, + ); + + @override + Future resetSpeechPrompt() => _resetCustom( + 'SpeechToText_Custom.prompt', + AppConstants.speechPromptKey, + getDefaultSpeechPrompt, + ); + + // ── Refinement ──────────────────────────────────────────────────────────── + + @override + Future getDefaultRefinementPrompt() => _loadDefaultFromAsset( + 'prompts/TextRefinement.prompt', + '優化以下語音轉錄文字:移除「嗯/啊/那個」等填充詞、修正口誤、加上適當標點,' + '保留原意,僅輸出優化後的純文字。', + ); + + @override + Future getRefinementPrompt() => _readCustomOrDefault( + 'TextRefinement_Custom.prompt', + getDefaultRefinementPrompt, + ); + + @override + Future saveRefinementPrompt(String prompt) => _saveCustom( + 'TextRefinement_Custom.prompt', + AppConstants.refinementPromptKey, + prompt, + ); + + @override + Future resetRefinementPrompt() => _resetCustom( + 'TextRefinement_Custom.prompt', + AppConstants.refinementPromptKey, + getDefaultRefinementPrompt, + ); } diff --git a/lib/features/prompt/domain/repositories/prompt_repository.dart b/lib/features/prompt/domain/repositories/prompt_repository.dart index 7262718..c1d655e 100644 --- a/lib/features/prompt/domain/repositories/prompt_repository.dart +++ b/lib/features/prompt/domain/repositories/prompt_repository.dart @@ -1,8 +1,11 @@ abstract class PromptRepository { Future getSpeechPrompt(); Future saveSpeechPrompt(String prompt); - Future getDefaultSpeechPrompt(); - Future resetSpeechPrompt(); + + Future getRefinementPrompt(); + Future saveRefinementPrompt(String prompt); + Future getDefaultRefinementPrompt(); + Future resetRefinementPrompt(); } diff --git a/lib/features/prompt/presentation/controllers/prompt_controller.dart b/lib/features/prompt/presentation/controllers/prompt_controller.dart index a624396..8e927c2 100644 --- a/lib/features/prompt/presentation/controllers/prompt_controller.dart +++ b/lib/features/prompt/presentation/controllers/prompt_controller.dart @@ -33,3 +33,26 @@ class SpeechPromptController extends _$SpeechPromptController { } } +@riverpod +class RefinementPromptController extends _$RefinementPromptController { + @override + Future build() async { + final repo = ref.watch(promptRepositoryProvider); + return repo.getRefinementPrompt(); + } + + Future save(String prompt) async { + final repo = ref.read(promptRepositoryProvider); + final newVal = await repo.saveRefinementPrompt(prompt); + ref.invalidateSelf(); + return newVal; + } + + Future resetToDefault() async { + final repo = ref.read(promptRepositoryProvider); + final newVal = await repo.resetRefinementPrompt(); + ref.invalidateSelf(); + return newVal; + } +} + diff --git a/lib/features/prompt/presentation/controllers/prompt_controller.g.dart b/lib/features/prompt/presentation/controllers/prompt_controller.g.dart index 372dfa6..253fc36 100644 --- a/lib/features/prompt/presentation/controllers/prompt_controller.g.dart +++ b/lib/features/prompt/presentation/controllers/prompt_controller.g.dart @@ -99,3 +99,49 @@ abstract class _$SpeechPromptController extends $AsyncNotifier { element.handleCreate(ref, build); } } + +@ProviderFor(RefinementPromptController) +final refinementPromptControllerProvider = + RefinementPromptControllerProvider._(); + +final class RefinementPromptControllerProvider + extends $AsyncNotifierProvider { + RefinementPromptControllerProvider._() + : super( + from: null, + argument: null, + retry: null, + name: r'refinementPromptControllerProvider', + isAutoDispose: true, + dependencies: null, + $allTransitiveDependencies: null, + ); + + @override + String debugGetCreateSourceHash() => _$refinementPromptControllerHash(); + + @$internal + @override + RefinementPromptController create() => RefinementPromptController(); +} + +String _$refinementPromptControllerHash() => + r'adee0c32bf7696254a613619da6890b3e173fb42'; + +abstract class _$RefinementPromptController extends $AsyncNotifier { + FutureOr build(); + @$mustCallSuper + @override + void runBuild() { + final ref = this.ref as $Ref, String>; + final element = + ref.element + as $ClassProviderElement< + AnyNotifier, String>, + AsyncValue, + Object?, + Object? + >; + element.handleCreate(ref, build); + } +} diff --git a/lib/features/prompt/presentation/pages/prompt_page.dart b/lib/features/prompt/presentation/pages/prompt_page.dart index 415089b..eb2162a 100644 --- a/lib/features/prompt/presentation/pages/prompt_page.dart +++ b/lib/features/prompt/presentation/pages/prompt_page.dart @@ -5,17 +5,40 @@ import 'package:zero_type/features/prompt/presentation/controllers/prompt_contro import 'package:zero_type/features/prompt/presentation/widgets/prompt_editor.dart'; @RoutePage() -class PromptPage extends ConsumerWidget { +class PromptPage extends ConsumerStatefulWidget { const PromptPage({super.key}); @override - Widget build(BuildContext context, WidgetRef ref) { + ConsumerState createState() => _PromptPageState(); +} + +class _PromptPageState extends ConsumerState + with SingleTickerProviderStateMixin { + late final TabController _tabController; + + @override + void initState() { + super.initState(); + _tabController = TabController(length: 2, vsync: this); + } + + @override + void dispose() { + _tabController.dispose(); + super.dispose(); + } + + @override + Widget build(BuildContext context) { final speechPrompt = ref.watch(speechPromptControllerProvider); + final refinementPrompt = ref.watch(refinementPromptControllerProvider); + final cs = Theme.of(context).colorScheme; return Scaffold( backgroundColor: Colors.transparent, body: Padding( - padding: const EdgeInsets.only(left: 24, right: 24, bottom: 24, top: 30), + padding: + const EdgeInsets.only(left: 24, right: 24, bottom: 24, top: 30), child: Column( crossAxisAlignment: CrossAxisAlignment.start, children: [ @@ -29,22 +52,54 @@ class PromptPage extends ConsumerWidget { Text( '自訂發送給 AI 的系統提示詞', style: Theme.of(context).textTheme.bodyMedium?.copyWith( - color: Theme.of(context).colorScheme.onSurface.withAlpha(150), + color: cs.onSurface.withAlpha(150), ), ), - const SizedBox(height: 32), + const SizedBox(height: 16), + TabBar( + controller: _tabController, + isScrollable: true, + tabAlignment: TabAlignment.start, + labelColor: cs.primary, + unselectedLabelColor: cs.onSurface.withAlpha(150), + indicatorColor: cs.primary, + tabs: const [ + Tab(text: '語音辨識'), + Tab(text: '文字優化'), + ], + ), + const SizedBox(height: 16), Expanded( - child: PromptEditor( - title: '語音辨識提示詞', - subtitle: '提供給語音辨識模型的補充指令', - icon: Icons.mic, - value: speechPrompt.value ?? '', - isLoading: speechPrompt.isLoading, - onSave: (text) => - ref.read(speechPromptControllerProvider.notifier).save(text), - onReset: () => ref - .read(speechPromptControllerProvider.notifier) - .resetToDefault(), + child: TabBarView( + controller: _tabController, + children: [ + PromptEditor( + title: '語音辨識提示詞', + subtitle: '提供給語音辨識模型的補充指令', + icon: Icons.mic, + value: speechPrompt.value ?? '', + isLoading: speechPrompt.isLoading, + onSave: (text) => ref + .read(speechPromptControllerProvider.notifier) + .save(text), + onReset: () => ref + .read(speechPromptControllerProvider.notifier) + .resetToDefault(), + ), + PromptEditor( + title: '文字優化提示詞', + subtitle: '轉錄後送進 LLM 做格式化/錯字修正用的指令', + icon: Icons.auto_fix_high, + value: refinementPrompt.value ?? '', + isLoading: refinementPrompt.isLoading, + onSave: (text) => ref + .read(refinementPromptControllerProvider.notifier) + .save(text), + onReset: () => ref + .read(refinementPromptControllerProvider.notifier) + .resetToDefault(), + ), + ], ), ), ], @@ -52,5 +107,4 @@ class PromptPage extends ConsumerWidget { ), ); } - } diff --git a/lib/features/settings/presentation/controllers/settings_controller.dart b/lib/features/settings/presentation/controllers/settings_controller.dart index 3988a05..775aa88 100644 --- a/lib/features/settings/presentation/controllers/settings_controller.dart +++ b/lib/features/settings/presentation/controllers/settings_controller.dart @@ -37,6 +37,8 @@ class SettingsController extends _$SettingsController { final stopSound = prefs.getString(AppConstants.stopSoundKey) ?? kDefaultStopSound; final historyRetentionDays = prefs.getInt(AppConstants.historyRetentionDaysKey) ?? 7; final maxRecordingMinutes = prefs.getInt(AppConstants.maxRecordingMinutesKey) ?? 1; + final refinementEnabled = + prefs.getBool(AppConstants.isRefinementEnabledKey) ?? false; print('[SettingsController] Build complete.'); return SettingsState( @@ -49,6 +51,7 @@ class SettingsController extends _$SettingsController { stopSound: stopSound, historyRetentionDays: historyRetentionDays, maxRecordingMinutes: maxRecordingMinutes, + refinementEnabled: refinementEnabled, ); } catch (e, st) { print('[SettingsController] Error building settings state: $e\n$st'); @@ -188,6 +191,15 @@ class SettingsController extends _$SettingsController { } } + Future toggleRefinementEnabled(bool value) async { + await getIt() + .setBool(AppConstants.isRefinementEnabledKey, value); + final currentState = state.value; + if (currentState != null) { + state = AsyncData(currentState.copyWith(refinementEnabled: value)); + } + } + /// Called by SettingsPage whenever it becomes visible. Future refreshPermissions() async { // Run checks independently of current state loading status diff --git a/lib/features/settings/presentation/controllers/settings_state.dart b/lib/features/settings/presentation/controllers/settings_state.dart index 37f82a0..bc49ce4 100644 --- a/lib/features/settings/presentation/controllers/settings_state.dart +++ b/lib/features/settings/presentation/controllers/settings_state.dart @@ -17,5 +17,6 @@ abstract class SettingsState with _$SettingsState { @Default(kDefaultStopSound) String stopSound, @Default(7) int historyRetentionDays, @Default(1) int maxRecordingMinutes, + @Default(false) bool refinementEnabled, }) = _SettingsState; } diff --git a/lib/features/settings/presentation/controllers/settings_state.freezed.dart b/lib/features/settings/presentation/controllers/settings_state.freezed.dart index 440485f..8bcf69e 100644 --- a/lib/features/settings/presentation/controllers/settings_state.freezed.dart +++ b/lib/features/settings/presentation/controllers/settings_state.freezed.dart @@ -14,7 +14,7 @@ T _$identity(T value) => value; /// @nodoc mixin _$SettingsState { - bool get launchAtStartup; HotKey get hotkey; bool get isAccessibilityAuthorized; bool get isMicrophoneAuthorized; bool get isRecordingHotkey; bool get soundEnabled; String get startSound; String get stopSound; int get historyRetentionDays; int get maxRecordingMinutes; + bool get launchAtStartup; HotKey get hotkey; bool get isAccessibilityAuthorized; bool get isMicrophoneAuthorized; bool get isRecordingHotkey; bool get soundEnabled; String get startSound; String get stopSound; int get historyRetentionDays; int get maxRecordingMinutes; bool get refinementEnabled; /// Create a copy of SettingsState /// with the given fields replaced by the non-null parameter values. @JsonKey(includeFromJson: false, includeToJson: false) @@ -25,16 +25,16 @@ $SettingsStateCopyWith get copyWith => _$SettingsStateCopyWithImp @override bool operator ==(Object other) { - return identical(this, other) || (other.runtimeType == runtimeType&&other is SettingsState&&(identical(other.launchAtStartup, launchAtStartup) || other.launchAtStartup == launchAtStartup)&&(identical(other.hotkey, hotkey) || other.hotkey == hotkey)&&(identical(other.isAccessibilityAuthorized, isAccessibilityAuthorized) || other.isAccessibilityAuthorized == isAccessibilityAuthorized)&&(identical(other.isMicrophoneAuthorized, isMicrophoneAuthorized) || other.isMicrophoneAuthorized == isMicrophoneAuthorized)&&(identical(other.isRecordingHotkey, isRecordingHotkey) || other.isRecordingHotkey == isRecordingHotkey)&&(identical(other.soundEnabled, soundEnabled) || other.soundEnabled == soundEnabled)&&(identical(other.startSound, startSound) || other.startSound == startSound)&&(identical(other.stopSound, stopSound) || other.stopSound == stopSound)&&(identical(other.historyRetentionDays, historyRetentionDays) || other.historyRetentionDays == historyRetentionDays)&&(identical(other.maxRecordingMinutes, maxRecordingMinutes) || other.maxRecordingMinutes == maxRecordingMinutes)); + return identical(this, other) || (other.runtimeType == runtimeType&&other is SettingsState&&(identical(other.launchAtStartup, launchAtStartup) || other.launchAtStartup == launchAtStartup)&&(identical(other.hotkey, hotkey) || other.hotkey == hotkey)&&(identical(other.isAccessibilityAuthorized, isAccessibilityAuthorized) || other.isAccessibilityAuthorized == isAccessibilityAuthorized)&&(identical(other.isMicrophoneAuthorized, isMicrophoneAuthorized) || other.isMicrophoneAuthorized == isMicrophoneAuthorized)&&(identical(other.isRecordingHotkey, isRecordingHotkey) || other.isRecordingHotkey == isRecordingHotkey)&&(identical(other.soundEnabled, soundEnabled) || other.soundEnabled == soundEnabled)&&(identical(other.startSound, startSound) || other.startSound == startSound)&&(identical(other.stopSound, stopSound) || other.stopSound == stopSound)&&(identical(other.historyRetentionDays, historyRetentionDays) || other.historyRetentionDays == historyRetentionDays)&&(identical(other.maxRecordingMinutes, maxRecordingMinutes) || other.maxRecordingMinutes == maxRecordingMinutes)&&(identical(other.refinementEnabled, refinementEnabled) || other.refinementEnabled == refinementEnabled)); } @override -int get hashCode => Object.hash(runtimeType,launchAtStartup,hotkey,isAccessibilityAuthorized,isMicrophoneAuthorized,isRecordingHotkey,soundEnabled,startSound,stopSound,historyRetentionDays,maxRecordingMinutes); +int get hashCode => Object.hash(runtimeType,launchAtStartup,hotkey,isAccessibilityAuthorized,isMicrophoneAuthorized,isRecordingHotkey,soundEnabled,startSound,stopSound,historyRetentionDays,maxRecordingMinutes,refinementEnabled); @override String toString() { - return 'SettingsState(launchAtStartup: $launchAtStartup, hotkey: $hotkey, isAccessibilityAuthorized: $isAccessibilityAuthorized, isMicrophoneAuthorized: $isMicrophoneAuthorized, isRecordingHotkey: $isRecordingHotkey, soundEnabled: $soundEnabled, startSound: $startSound, stopSound: $stopSound, historyRetentionDays: $historyRetentionDays, maxRecordingMinutes: $maxRecordingMinutes)'; + return 'SettingsState(launchAtStartup: $launchAtStartup, hotkey: $hotkey, isAccessibilityAuthorized: $isAccessibilityAuthorized, isMicrophoneAuthorized: $isMicrophoneAuthorized, isRecordingHotkey: $isRecordingHotkey, soundEnabled: $soundEnabled, startSound: $startSound, stopSound: $stopSound, historyRetentionDays: $historyRetentionDays, maxRecordingMinutes: $maxRecordingMinutes, refinementEnabled: $refinementEnabled)'; } @@ -45,7 +45,7 @@ abstract mixin class $SettingsStateCopyWith<$Res> { factory $SettingsStateCopyWith(SettingsState value, $Res Function(SettingsState) _then) = _$SettingsStateCopyWithImpl; @useResult $Res call({ - bool launchAtStartup, HotKey hotkey, bool isAccessibilityAuthorized, bool isMicrophoneAuthorized, bool isRecordingHotkey, bool soundEnabled, String startSound, String stopSound, int historyRetentionDays, int maxRecordingMinutes + bool launchAtStartup, HotKey hotkey, bool isAccessibilityAuthorized, bool isMicrophoneAuthorized, bool isRecordingHotkey, bool soundEnabled, String startSound, String stopSound, int historyRetentionDays, int maxRecordingMinutes, bool refinementEnabled }); @@ -62,7 +62,7 @@ class _$SettingsStateCopyWithImpl<$Res> /// Create a copy of SettingsState /// with the given fields replaced by the non-null parameter values. -@pragma('vm:prefer-inline') @override $Res call({Object? launchAtStartup = null,Object? hotkey = null,Object? isAccessibilityAuthorized = null,Object? isMicrophoneAuthorized = null,Object? isRecordingHotkey = null,Object? soundEnabled = null,Object? startSound = null,Object? stopSound = null,Object? historyRetentionDays = null,Object? maxRecordingMinutes = null,}) { +@pragma('vm:prefer-inline') @override $Res call({Object? launchAtStartup = null,Object? hotkey = null,Object? isAccessibilityAuthorized = null,Object? isMicrophoneAuthorized = null,Object? isRecordingHotkey = null,Object? soundEnabled = null,Object? startSound = null,Object? stopSound = null,Object? historyRetentionDays = null,Object? maxRecordingMinutes = null,Object? refinementEnabled = null,}) { return _then(_self.copyWith( launchAtStartup: null == launchAtStartup ? _self.launchAtStartup : launchAtStartup // ignore: cast_nullable_to_non_nullable as bool,hotkey: null == hotkey ? _self.hotkey : hotkey // ignore: cast_nullable_to_non_nullable @@ -74,7 +74,8 @@ as bool,startSound: null == startSound ? _self.startSound : startSound // ignore as String,stopSound: null == stopSound ? _self.stopSound : stopSound // ignore: cast_nullable_to_non_nullable as String,historyRetentionDays: null == historyRetentionDays ? _self.historyRetentionDays : historyRetentionDays // ignore: cast_nullable_to_non_nullable as int,maxRecordingMinutes: null == maxRecordingMinutes ? _self.maxRecordingMinutes : maxRecordingMinutes // ignore: cast_nullable_to_non_nullable -as int, +as int,refinementEnabled: null == refinementEnabled ? _self.refinementEnabled : refinementEnabled // ignore: cast_nullable_to_non_nullable +as bool, )); } @@ -159,10 +160,10 @@ return $default(_that);case _: /// } /// ``` -@optionalTypeArgs TResult maybeWhen(TResult Function( bool launchAtStartup, HotKey hotkey, bool isAccessibilityAuthorized, bool isMicrophoneAuthorized, bool isRecordingHotkey, bool soundEnabled, String startSound, String stopSound, int historyRetentionDays, int maxRecordingMinutes)? $default,{required TResult orElse(),}) {final _that = this; +@optionalTypeArgs TResult maybeWhen(TResult Function( bool launchAtStartup, HotKey hotkey, bool isAccessibilityAuthorized, bool isMicrophoneAuthorized, bool isRecordingHotkey, bool soundEnabled, String startSound, String stopSound, int historyRetentionDays, int maxRecordingMinutes, bool refinementEnabled)? $default,{required TResult orElse(),}) {final _that = this; switch (_that) { case _SettingsState() when $default != null: -return $default(_that.launchAtStartup,_that.hotkey,_that.isAccessibilityAuthorized,_that.isMicrophoneAuthorized,_that.isRecordingHotkey,_that.soundEnabled,_that.startSound,_that.stopSound,_that.historyRetentionDays,_that.maxRecordingMinutes);case _: +return $default(_that.launchAtStartup,_that.hotkey,_that.isAccessibilityAuthorized,_that.isMicrophoneAuthorized,_that.isRecordingHotkey,_that.soundEnabled,_that.startSound,_that.stopSound,_that.historyRetentionDays,_that.maxRecordingMinutes,_that.refinementEnabled);case _: return orElse(); } @@ -180,10 +181,10 @@ return $default(_that.launchAtStartup,_that.hotkey,_that.isAccessibilityAuthoriz /// } /// ``` -@optionalTypeArgs TResult when(TResult Function( bool launchAtStartup, HotKey hotkey, bool isAccessibilityAuthorized, bool isMicrophoneAuthorized, bool isRecordingHotkey, bool soundEnabled, String startSound, String stopSound, int historyRetentionDays, int maxRecordingMinutes) $default,) {final _that = this; +@optionalTypeArgs TResult when(TResult Function( bool launchAtStartup, HotKey hotkey, bool isAccessibilityAuthorized, bool isMicrophoneAuthorized, bool isRecordingHotkey, bool soundEnabled, String startSound, String stopSound, int historyRetentionDays, int maxRecordingMinutes, bool refinementEnabled) $default,) {final _that = this; switch (_that) { case _SettingsState(): -return $default(_that.launchAtStartup,_that.hotkey,_that.isAccessibilityAuthorized,_that.isMicrophoneAuthorized,_that.isRecordingHotkey,_that.soundEnabled,_that.startSound,_that.stopSound,_that.historyRetentionDays,_that.maxRecordingMinutes);case _: +return $default(_that.launchAtStartup,_that.hotkey,_that.isAccessibilityAuthorized,_that.isMicrophoneAuthorized,_that.isRecordingHotkey,_that.soundEnabled,_that.startSound,_that.stopSound,_that.historyRetentionDays,_that.maxRecordingMinutes,_that.refinementEnabled);case _: throw StateError('Unexpected subclass'); } @@ -200,10 +201,10 @@ return $default(_that.launchAtStartup,_that.hotkey,_that.isAccessibilityAuthoriz /// } /// ``` -@optionalTypeArgs TResult? whenOrNull(TResult? Function( bool launchAtStartup, HotKey hotkey, bool isAccessibilityAuthorized, bool isMicrophoneAuthorized, bool isRecordingHotkey, bool soundEnabled, String startSound, String stopSound, int historyRetentionDays, int maxRecordingMinutes)? $default,) {final _that = this; +@optionalTypeArgs TResult? whenOrNull(TResult? Function( bool launchAtStartup, HotKey hotkey, bool isAccessibilityAuthorized, bool isMicrophoneAuthorized, bool isRecordingHotkey, bool soundEnabled, String startSound, String stopSound, int historyRetentionDays, int maxRecordingMinutes, bool refinementEnabled)? $default,) {final _that = this; switch (_that) { case _SettingsState() when $default != null: -return $default(_that.launchAtStartup,_that.hotkey,_that.isAccessibilityAuthorized,_that.isMicrophoneAuthorized,_that.isRecordingHotkey,_that.soundEnabled,_that.startSound,_that.stopSound,_that.historyRetentionDays,_that.maxRecordingMinutes);case _: +return $default(_that.launchAtStartup,_that.hotkey,_that.isAccessibilityAuthorized,_that.isMicrophoneAuthorized,_that.isRecordingHotkey,_that.soundEnabled,_that.startSound,_that.stopSound,_that.historyRetentionDays,_that.maxRecordingMinutes,_that.refinementEnabled);case _: return null; } @@ -215,7 +216,7 @@ return $default(_that.launchAtStartup,_that.hotkey,_that.isAccessibilityAuthoriz class _SettingsState implements SettingsState { - const _SettingsState({this.launchAtStartup = false, required this.hotkey, this.isAccessibilityAuthorized = false, this.isMicrophoneAuthorized = false, this.isRecordingHotkey = false, this.soundEnabled = true, this.startSound = kDefaultStartSound, this.stopSound = kDefaultStopSound, this.historyRetentionDays = 7, this.maxRecordingMinutes = 1}); + const _SettingsState({this.launchAtStartup = false, required this.hotkey, this.isAccessibilityAuthorized = false, this.isMicrophoneAuthorized = false, this.isRecordingHotkey = false, this.soundEnabled = true, this.startSound = kDefaultStartSound, this.stopSound = kDefaultStopSound, this.historyRetentionDays = 7, this.maxRecordingMinutes = 1, this.refinementEnabled = false}); @override@JsonKey() final bool launchAtStartup; @@ -228,6 +229,7 @@ class _SettingsState implements SettingsState { @override@JsonKey() final String stopSound; @override@JsonKey() final int historyRetentionDays; @override@JsonKey() final int maxRecordingMinutes; +@override@JsonKey() final bool refinementEnabled; /// Create a copy of SettingsState /// with the given fields replaced by the non-null parameter values. @@ -239,16 +241,16 @@ _$SettingsStateCopyWith<_SettingsState> get copyWith => __$SettingsStateCopyWith @override bool operator ==(Object other) { - return identical(this, other) || (other.runtimeType == runtimeType&&other is _SettingsState&&(identical(other.launchAtStartup, launchAtStartup) || other.launchAtStartup == launchAtStartup)&&(identical(other.hotkey, hotkey) || other.hotkey == hotkey)&&(identical(other.isAccessibilityAuthorized, isAccessibilityAuthorized) || other.isAccessibilityAuthorized == isAccessibilityAuthorized)&&(identical(other.isMicrophoneAuthorized, isMicrophoneAuthorized) || other.isMicrophoneAuthorized == isMicrophoneAuthorized)&&(identical(other.isRecordingHotkey, isRecordingHotkey) || other.isRecordingHotkey == isRecordingHotkey)&&(identical(other.soundEnabled, soundEnabled) || other.soundEnabled == soundEnabled)&&(identical(other.startSound, startSound) || other.startSound == startSound)&&(identical(other.stopSound, stopSound) || other.stopSound == stopSound)&&(identical(other.historyRetentionDays, historyRetentionDays) || other.historyRetentionDays == historyRetentionDays)&&(identical(other.maxRecordingMinutes, maxRecordingMinutes) || other.maxRecordingMinutes == maxRecordingMinutes)); + return identical(this, other) || (other.runtimeType == runtimeType&&other is _SettingsState&&(identical(other.launchAtStartup, launchAtStartup) || other.launchAtStartup == launchAtStartup)&&(identical(other.hotkey, hotkey) || other.hotkey == hotkey)&&(identical(other.isAccessibilityAuthorized, isAccessibilityAuthorized) || other.isAccessibilityAuthorized == isAccessibilityAuthorized)&&(identical(other.isMicrophoneAuthorized, isMicrophoneAuthorized) || other.isMicrophoneAuthorized == isMicrophoneAuthorized)&&(identical(other.isRecordingHotkey, isRecordingHotkey) || other.isRecordingHotkey == isRecordingHotkey)&&(identical(other.soundEnabled, soundEnabled) || other.soundEnabled == soundEnabled)&&(identical(other.startSound, startSound) || other.startSound == startSound)&&(identical(other.stopSound, stopSound) || other.stopSound == stopSound)&&(identical(other.historyRetentionDays, historyRetentionDays) || other.historyRetentionDays == historyRetentionDays)&&(identical(other.maxRecordingMinutes, maxRecordingMinutes) || other.maxRecordingMinutes == maxRecordingMinutes)&&(identical(other.refinementEnabled, refinementEnabled) || other.refinementEnabled == refinementEnabled)); } @override -int get hashCode => Object.hash(runtimeType,launchAtStartup,hotkey,isAccessibilityAuthorized,isMicrophoneAuthorized,isRecordingHotkey,soundEnabled,startSound,stopSound,historyRetentionDays,maxRecordingMinutes); +int get hashCode => Object.hash(runtimeType,launchAtStartup,hotkey,isAccessibilityAuthorized,isMicrophoneAuthorized,isRecordingHotkey,soundEnabled,startSound,stopSound,historyRetentionDays,maxRecordingMinutes,refinementEnabled); @override String toString() { - return 'SettingsState(launchAtStartup: $launchAtStartup, hotkey: $hotkey, isAccessibilityAuthorized: $isAccessibilityAuthorized, isMicrophoneAuthorized: $isMicrophoneAuthorized, isRecordingHotkey: $isRecordingHotkey, soundEnabled: $soundEnabled, startSound: $startSound, stopSound: $stopSound, historyRetentionDays: $historyRetentionDays, maxRecordingMinutes: $maxRecordingMinutes)'; + return 'SettingsState(launchAtStartup: $launchAtStartup, hotkey: $hotkey, isAccessibilityAuthorized: $isAccessibilityAuthorized, isMicrophoneAuthorized: $isMicrophoneAuthorized, isRecordingHotkey: $isRecordingHotkey, soundEnabled: $soundEnabled, startSound: $startSound, stopSound: $stopSound, historyRetentionDays: $historyRetentionDays, maxRecordingMinutes: $maxRecordingMinutes, refinementEnabled: $refinementEnabled)'; } @@ -259,7 +261,7 @@ abstract mixin class _$SettingsStateCopyWith<$Res> implements $SettingsStateCopy factory _$SettingsStateCopyWith(_SettingsState value, $Res Function(_SettingsState) _then) = __$SettingsStateCopyWithImpl; @override @useResult $Res call({ - bool launchAtStartup, HotKey hotkey, bool isAccessibilityAuthorized, bool isMicrophoneAuthorized, bool isRecordingHotkey, bool soundEnabled, String startSound, String stopSound, int historyRetentionDays, int maxRecordingMinutes + bool launchAtStartup, HotKey hotkey, bool isAccessibilityAuthorized, bool isMicrophoneAuthorized, bool isRecordingHotkey, bool soundEnabled, String startSound, String stopSound, int historyRetentionDays, int maxRecordingMinutes, bool refinementEnabled }); @@ -276,7 +278,7 @@ class __$SettingsStateCopyWithImpl<$Res> /// Create a copy of SettingsState /// with the given fields replaced by the non-null parameter values. -@override @pragma('vm:prefer-inline') $Res call({Object? launchAtStartup = null,Object? hotkey = null,Object? isAccessibilityAuthorized = null,Object? isMicrophoneAuthorized = null,Object? isRecordingHotkey = null,Object? soundEnabled = null,Object? startSound = null,Object? stopSound = null,Object? historyRetentionDays = null,Object? maxRecordingMinutes = null,}) { +@override @pragma('vm:prefer-inline') $Res call({Object? launchAtStartup = null,Object? hotkey = null,Object? isAccessibilityAuthorized = null,Object? isMicrophoneAuthorized = null,Object? isRecordingHotkey = null,Object? soundEnabled = null,Object? startSound = null,Object? stopSound = null,Object? historyRetentionDays = null,Object? maxRecordingMinutes = null,Object? refinementEnabled = null,}) { return _then(_SettingsState( launchAtStartup: null == launchAtStartup ? _self.launchAtStartup : launchAtStartup // ignore: cast_nullable_to_non_nullable as bool,hotkey: null == hotkey ? _self.hotkey : hotkey // ignore: cast_nullable_to_non_nullable @@ -288,7 +290,8 @@ as bool,startSound: null == startSound ? _self.startSound : startSound // ignore as String,stopSound: null == stopSound ? _self.stopSound : stopSound // ignore: cast_nullable_to_non_nullable as String,historyRetentionDays: null == historyRetentionDays ? _self.historyRetentionDays : historyRetentionDays // ignore: cast_nullable_to_non_nullable as int,maxRecordingMinutes: null == maxRecordingMinutes ? _self.maxRecordingMinutes : maxRecordingMinutes // ignore: cast_nullable_to_non_nullable -as int, +as int,refinementEnabled: null == refinementEnabled ? _self.refinementEnabled : refinementEnabled // ignore: cast_nullable_to_non_nullable +as bool, )); } diff --git a/lib/features/settings/presentation/pages/settings_page.dart b/lib/features/settings/presentation/pages/settings_page.dart index 9ee077c..7d5f735 100644 --- a/lib/features/settings/presentation/pages/settings_page.dart +++ b/lib/features/settings/presentation/pages/settings_page.dart @@ -152,6 +152,24 @@ class _SettingsPageState extends ConsumerState with WidgetsBinding error: (_, __) => const SizedBox.shrink(), ), const Divider(height: 1, indent: 56), + // Refinement enabled + settings.when( + data: (data) => _SettingTile( + icon: Icons.auto_fix_high, + title: '啟用文字優化', + subtitle: + '轉錄完成後再丟給聊天模型做格式化/錯字修正(在「模型」頁設定 provider 與模型)', + trailing: Switch( + value: data.refinementEnabled, + onChanged: (val) => ref + .read(settingsControllerProvider.notifier) + .toggleRefinementEnabled(val), + ), + ), + loading: () => const _LoadingTile(), + error: (_, __) => const SizedBox.shrink(), + ), + const Divider(height: 1, indent: 56), // History Retention Days settings.when( data: (data) => _SettingTile( diff --git a/prompts/TextRefinement.prompt b/prompts/TextRefinement.prompt new file mode 100644 index 0000000..19a604f --- /dev/null +++ b/prompts/TextRefinement.prompt @@ -0,0 +1,37 @@ +你是一個文字優化助手。輸入會是「語音轉錄後的原始文字」,你的任務是把它改寫得更乾淨、更易讀,但**忠於原意**。 + +## 優化原則 + +1. **保留語意,不增不減** + - 不要新增原文沒講過的內容 + - 不要刪除原文有但你覺得多餘的句子(除非是 2 中的填充詞) + - 不要把意見改成你的版本 + +2. **去除口語贅詞** + - 移除:「嗯」「啊」「呃」「喔」「那個」「然後」「就是」「基本上」「對對對」等沒有語意的填充詞 + - 但**保留**有實際意義的「然後」「所以」「因為」等連接詞 + +3. **修正口誤與重述** + - 偵測「我說錯了,是…」「不是 X,是 Y」「應該說…才對」這類 self-correction,採用後者,刪掉前者 + - 偵測重複的字詞(「我我我覺得」→「我覺得」) + +4. **修正錯別字與同音字** + - 例如「在做」「再做」、「做為」「作為」、「以及」「以及」依語意選對 + - 英文專有名詞保留原文不翻譯(例如 React、Docker、API) + +5. **加上適當標點** + - 根據語意斷句加逗號、句號、問號 + - 對話/引用內容加引號 + +6. **格式化** + - 偵測序數(第一、第二、首先、然後、最後)→ 自動轉成 `1. 2. 3.` 條列 + - 偵測「冒號」「破折號」「句號」「逗號」「驚嘆號」「問號」「引號」等口語指示 → 還原為對應符號 + - 偵測「換行」「新段落」→ 換行 + - 偵測「大寫 X」「小寫 X」→ 對應大小寫 + - 偵測「空格」「底線」→ 對應符號 + +## 輸出格式 + +**只輸出優化後的文字本身**,不要加任何前綴(例如「以下是優化後的版本:」)、不要加引號包住、不要加 markdown 標題。 + +如果輸入是空字串或只有填充詞,輸出空字串。 From feefff280dbd663eb5c44882d9d71b5888ea5322 Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 16:26:48 +0800 Subject: [PATCH 08/15] Add Windows build CI and Inno Setup installer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GitHub Actions workflow at `.github/workflows/build-windows.yml`: - Triggers on push to master/main, on `v*` tags, on PRs, and via `workflow_dispatch`. Concurrency group cancels superseded runs. - Runs on `windows-latest`, sets up Flutter 3.41.9 stable with cache, enables windows desktop, fetches deps, runs build_runner codegen, then `flutter build windows --release`. - Reads version from pubspec.yaml and passes it into Inno Setup as `MyAppVersion`. - Compiles the installer (Inno Setup 6 is preinstalled on the GHA windows-latest image; falls back to chocolatey if absent). - Always uploads the installer .exe as a workflow artifact. - On a `v*` tag push, additionally publishes a GitHub Release with the installer attached and auto-generated release notes. Installer at `installer/zerotype.iss`: - Bundles the entire Release output and installs to {autopf}\ZeroType (admin) or LocalAppData\Programs\ZeroType (standard user). - `PrivilegesRequiredOverridesAllowed=dialog commandline` lets the user decline UAC and continue as a standard user. Welcome page (`InfoBeforeFile=install-mode-info.txt`) explains the trade-off in Traditional Chinese + English: admin gets system-wide install plus auto-configured microphone permission and optional launch-at-startup; standard user gets per-user install with no system writes — the user configures mic and startup themselves. - Microphone consent: on admin installs, writes HKCU\…\CapabilityAccessManager\ConsentStore\microphone\NonPackaged\ with Value="Allow" so the user doesn't have to dig into Settings → Privacy → Microphone the first time. The exe path is computed at install time via `[Code]` GetMicConsentSubkey() — Windows encodes the path with `#` instead of `\`, so install-location changes are handled correctly. Skipped via `Check: IsAdminInstallMode` in standard-user mode (per the user's "admin = auto, standard = manual" contract). - Launch-at-startup: optional task, also gated behind `Check: IsAdminInstallMode`. Writes to HKCU\…\Run with the install path; cleaned up on uninstall. - Tracks the install with a stable AppId GUID so future versions upgrade in place rather than installing side-by-side. `.gitignore`: ignore `installer/Output/` (the local build output of ISCC; CI generates these fresh per run). Co-Authored-By: Claude Opus 4.7 (1M context) --- .github/workflows/build-windows.yml | 91 +++++++++++++++++++++++ .gitignore | 1 + installer/install-mode-info.txt | 41 +++++++++++ installer/zerotype.iss | 108 ++++++++++++++++++++++++++++ 4 files changed, 241 insertions(+) create mode 100644 .github/workflows/build-windows.yml create mode 100644 installer/install-mode-info.txt create mode 100644 installer/zerotype.iss diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml new file mode 100644 index 0000000..d355dcf --- /dev/null +++ b/.github/workflows/build-windows.yml @@ -0,0 +1,91 @@ +name: Windows build & installer + +on: + push: + branches: [master, main] + tags: ['v*'] + pull_request: + branches: [master, main] + workflow_dispatch: + +# Cancel in-flight runs on the same ref when a new push lands. +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build: + runs-on: windows-latest + timeout-minutes: 30 + + steps: + - name: Checkout + uses: actions/checkout@v4 + + - name: Setup Flutter + uses: subosito/flutter-action@v2 + with: + flutter-version: '3.41.9' + channel: stable + cache: true + + - name: Show Flutter version + run: flutter --version + + - name: Enable Windows desktop + run: flutter config --enable-windows-desktop --no-analytics + + - name: Install dependencies + run: flutter pub get + + - name: Run code generation + run: dart run build_runner build --delete-conflicting-outputs + + - name: Build Windows release + run: flutter build windows --release + + # GitHub-hosted windows-latest runners ship with Inno Setup 6. + # Add it to PATH so we can call ISCC by name. + - name: Add Inno Setup to PATH + shell: pwsh + run: | + $iscc = "C:\Program Files (x86)\Inno Setup 6" + if (-not (Test-Path $iscc)) { + choco install innosetup --no-progress -y + } + Add-Content -Path $env:GITHUB_PATH -Value $iscc + + - name: Read app version from pubspec + id: pubspec + shell: pwsh + run: | + $line = (Get-Content pubspec.yaml | Where-Object { $_ -match '^version:' }) + $version = ($line -replace 'version:\s*','').Trim().Split('+')[0] + if ([string]::IsNullOrWhiteSpace($version)) { $version = '0.0.0' } + Write-Host "Version: $version" + "version=$version" | Out-File -FilePath $env:GITHUB_OUTPUT -Append -Encoding utf8 + + - name: Build installer + shell: pwsh + run: | + iscc.exe ` + "/DMyAppVersion=${{ steps.pubspec.outputs.version }}" ` + installer\zerotype.iss + + - name: Upload installer artifact + uses: actions/upload-artifact@v4 + with: + name: ZeroType-Setup-${{ steps.pubspec.outputs.version }} + path: installer/Output/*.exe + if-no-files-found: error + retention-days: 30 + + # On tag push (vX.Y.Z), publish a GitHub Release with the installer. + - name: Create GitHub Release + if: startsWith(github.ref, 'refs/tags/v') + uses: softprops/action-gh-release@v2 + with: + files: installer/Output/*.exe + draft: false + prerelease: false + generate_release_notes: true diff --git a/.gitignore b/.gitignore index 8aa4e22..5206a34 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,7 @@ .claude/ .idea/ *.dmg +installer/Output/ ### Dart ### # See https://www.dartlang.org/guides/libraries/private-files diff --git a/installer/install-mode-info.txt b/installer/install-mode-info.txt new file mode 100644 index 0000000..fd605a5 --- /dev/null +++ b/installer/install-mode-info.txt @@ -0,0 +1,41 @@ +關於安裝模式 +========================================================== + +ZeroType 支援兩種安裝模式,請依需求選擇下一頁的權限選項: + +[管理員模式](建議) +- 安裝到 C:\Program Files\ZeroType(系統範圍) +- 自動將 ZeroType 加入 Windows 麥克風權限白名單 +- 自動設定 Windows 啟動時自動執行(如果你勾選該選項) +- 第一次按 Alt+Space 就能直接錄音,不需手動到 Settings 開權限 + +[一般使用者模式] +- 安裝到 %LOCALAPPDATA%\Programs\ZeroType(僅目前使用者) +- 不會自動寫入麥克風權限與開機啟動設定 +- 第一次錄音時,請至「設定 → 隱私權與安全性 → 麥克風」 + 手動允許 ZeroType 存取麥克風 +- 開機自動啟動可在 ZeroType 內的「設定」頁開啟 + +如何選擇: +- 點下一頁的「下一步」後,會出現 Windows 的權限詢問 + (UAC,盾牌圖示): + - 按「是」 → 安裝程式以管理員模式執行(自動設定) + - 按「否」 → 安裝程式以一般使用者模式執行(你自己設定) + +About installation modes +========================================================== + +ZeroType supports two install modes — pick when prompted by Windows: + +[Administrator mode] (recommended) +- Installs to C:\Program Files\ZeroType (system-wide) +- Automatically allow-lists ZeroType for microphone access +- Automatically registers Windows startup launch (if checked) +- Alt+Space works on first try with no extra setup + +[Standard user mode] +- Installs to %LOCALAPPDATA%\Programs\ZeroType (this user only) +- Does NOT auto-configure microphone or startup launch +- You'll need to manually approve microphone access in + Settings → Privacy → Microphone the first time +- Launch-at-startup can be enabled inside ZeroType's Settings page diff --git a/installer/zerotype.iss b/installer/zerotype.iss new file mode 100644 index 0000000..85213a7 --- /dev/null +++ b/installer/zerotype.iss @@ -0,0 +1,108 @@ +; ZeroType — Inno Setup installer script. +; +; Run from the repository root after `flutter build windows --release`: +; iscc installer\zerotype.iss +; +; CI passes the version via /DMyAppVersion=; for local builds we fall +; back to a placeholder so iscc doesn't fail. +#ifndef MyAppVersion + #define MyAppVersion "0.0.0-local" +#endif + +#define MyAppName "ZeroType" +#define MyAppPublisher "ZeroType" +#define MyAppURL "https://github.com/alarmz/ZeroType" +#define MyAppExeName "zero_type.exe" +#define BuildOutputDir "..\build\windows\x64\runner\Release" + +[Setup] +; A unique AppId. Do NOT regenerate this for new versions — Windows uses it +; to identify upgrades vs side-by-side installs. +AppId={{8C5F4A3D-3F6E-4D2B-B8A1-D9C6E7F2B9A3} +AppName={#MyAppName} +AppVersion={#MyAppVersion} +AppVerName={#MyAppName} {#MyAppVersion} +AppPublisher={#MyAppPublisher} +AppPublisherURL={#MyAppURL} +AppSupportURL={#MyAppURL} +AppUpdatesURL={#MyAppURL} +DefaultDirName={autopf}\{#MyAppName} +DefaultGroupName={#MyAppName} +DisableProgramGroupPage=yes +; Admin is the *recommended* mode (system-wide install + auto-configure +; microphone permission + auto launch-at-startup), but the user may decline +; the UAC prompt and continue as a standard user. In that case we install +; per-user under LocalAppData and skip the auto-config registry writes; +; the welcome page (InfoBeforeFile) explains the trade-off in both +; languages so the choice is informed. +PrivilegesRequired=admin +PrivilegesRequiredOverridesAllowed=dialog commandline +InfoBeforeFile=install-mode-info.txt +ArchitecturesAllowed=x64compatible +ArchitecturesInstallIn64BitMode=x64compatible +OutputDir=Output +OutputBaseFilename=ZeroTypeSetup-{#MyAppVersion} +SolidCompression=yes +Compression=lzma2 +WizardStyle=modern +UninstallDisplayName={#MyAppName} {#MyAppVersion} +UninstallDisplayIcon={app}\{#MyAppExeName} +CloseApplications=force +RestartApplications=no +; If a previous version of zero_type.exe is running, kill it before files +; get replaced — otherwise the file copy will fail with "in use". +SetupLogging=yes + +[Languages] +Name: "english"; MessagesFile: "compiler:Default.isl" +Name: "chinesetraditional"; MessagesFile: "compiler:Languages\ChineseTraditional.isl" + +[Tasks] +Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked +; Auto-launch task is only offered in admin mode — in non-admin mode the +; user is told to enable it from inside ZeroType's Settings page instead. +Name: "launchatstartup"; Description: "Launch {#MyAppName} when Windows starts"; GroupDescription: "Startup:"; Check: IsAdminInstallMode + +[Files] +Source: "{#BuildOutputDir}\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs createallsubdirs + +[Icons] +Name: "{group}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}" +Name: "{autodesktop}\{#MyAppName}"; Filename: "{app}\{#MyAppExeName}"; Tasks: desktopicon + +[Registry] +; Pre-allow microphone access for ZeroType so the user doesn't have to dig +; into Settings → Privacy → Microphone the first time they hit Alt+Space. +; +; Only applied in admin install mode — the welcome page promises that +; non-admin installs leave the system untouched, so the user has a clear +; mental model: "admin = auto-configured, standard = I'll set it up myself". +; +; The Windows microphone consent store keys exe paths with backslashes +; replaced by '#'. The full subkey is computed at install time by +; GetMicConsentSubkey() in [Code] below. +; +; Note: this only takes effect if the global "Allow desktop apps to access +; your microphone" toggle is on (default for most users). If it's off, the +; user still has to flip it manually — by design, since flipping a global +; privacy toggle silently would be hostile. +Root: HKCU; Subkey: "{code:GetMicConsentSubkey}"; ValueType: string; ValueName: "Value"; ValueData: "Allow"; Flags: uninsdeletekey; Check: IsAdminInstallMode + +; Optional: launch on Windows startup. Stored on HKCU\…\Run instead of using +; the launch_at_startup plugin's own mechanism so it survives uninstall (we +; remove it on uninstall via uninsdeletevalue). Tied to the launchatstartup +; task which itself only appears in admin install mode. +Root: HKCU; Subkey: "SOFTWARE\Microsoft\Windows\CurrentVersion\Run"; ValueType: string; ValueName: "ZeroType"; ValueData: """{app}\{#MyAppExeName}"""; Flags: uninsdeletevalue; Tasks: launchatstartup + +[Run] +Filename: "{app}\{#MyAppExeName}"; Description: "Launch {#MyAppName}"; Flags: postinstall nowait skipifsilent + +[Code] +function GetMicConsentSubkey(Param: String): String; +var + exePath: String; +begin + exePath := ExpandConstant('{app}\{#MyAppExeName}'); + StringChange(exePath, '\', '#'); + Result := 'SOFTWARE\Microsoft\Windows\CurrentVersion\CapabilityAccessManager\ConsentStore\microphone\NonPackaged\' + exePath; +end; From db8a3cdfc94404846008ebc9118e18e532f0540b Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 16:29:52 +0800 Subject: [PATCH 09/15] Record as WAV on Windows for OpenAI/whisper compatibility MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LiteLLM proxy returned a 400 from OpenAI's /v1/chat/completions when we sent an m4a-format input_audio: Invalid value: 'm4a'. Supported values are: 'wav' and 'mp3'. OpenAI's chat-completions audio API accepts only wav and mp3, so any non-whisper OpenAI model (gpt-4o-audio, gpt-5.5, …) rejected our recordings even though Gemini-style models had been accepting m4a fine. Switch the Windows recorder to AudioEncoder.wav (PCM16, 16 kHz mono). Trade-off: * Files grow from ~1 MB/min (AAC@128k) to ~1.9 MB/min (PCM16@16k) — fine for local LiteLLM proxies and acceptable for direct cloud uploads. * Format works for *every* backend tested: OpenAI input_audio, Gemini multimodal generateContent, Whisper /v1/audio/transcriptions, and LiteLLM-bridged routes for all of the above. * Sample rate drops back to 16 kHz (Whisper's native rate); the AAC 44.1 kHz workaround was only needed because Windows MF's AAC encoder rejected 16 kHz, and PCM has no such restriction. macOS unchanged (AAC m4a continues to work end-to-end on the existing mac pipeline; switching it would be churn for no benefit). Co-Authored-By: Claude Opus 4.7 (1M context) --- lib/core/services/recording_service.dart | 26 ++++++++++++++++-------- 1 file changed, 17 insertions(+), 9 deletions(-) diff --git a/lib/core/services/recording_service.dart b/lib/core/services/recording_service.dart index 4ddba5a..aa93c96 100644 --- a/lib/core/services/recording_service.dart +++ b/lib/core/services/recording_service.dart @@ -33,19 +33,27 @@ class RecordingService { dir.createSync(recursive: true); } final timestamp = DateTime.now().millisecondsSinceEpoch; - _currentFilePath = '${dir.path}/zerotype_$timestamp.m4a'; - - // Windows Media Foundation's AAC encoder only accepts 44100 or 48000 Hz - // (16000 Hz triggers MF_E_INVALIDMEDIATYPE / 0xC00D36B4 immediately). - // macOS's AVAssetWriter accepts 16000 Hz happily, which feeds Whisper at - // its native rate and avoids server-side resampling. - final sampleRate = Platform.isWindows ? 44100 : 16000; + // Encoder choice differs by platform: + // - Windows: WAV (PCM16). AAC was tempting (smaller files) but Windows + // Media Foundation's AAC encoder only accepts 44.1/48 kHz, AND OpenAI's + // chat-completions `input_audio` only accepts wav/mp3. WAV satisfies + // both whisper-style transcription endpoints AND multimodal chat + // endpoints (Gemini, GPT-4o, Claude) on every backend we've tested. + // At 16 kHz mono, file size is ~1.9 MB/min — fine for local proxies + // and direct cloud uploads alike. + // - macOS: AAC m4a as before. AVAssetWriter handles 16 kHz natively and + // the existing pipeline has been validated end-to-end on it. + final isWin = Platform.isWindows; + final ext = isWin ? 'wav' : 'm4a'; + _currentFilePath = '${dir.path}/zerotype_$timestamp.$ext'; + final sampleRate = 16000; + final encoder = isWin ? AudioEncoder.wav : AudioEncoder.aacLc; print( - '[RecordingService] starting at $_currentFilePath @ ${sampleRate}Hz'); + '[RecordingService] starting at $_currentFilePath enc=${encoder.name} @ ${sampleRate}Hz'); await _recorder.start( RecordConfig( - encoder: AudioEncoder.aacLc, + encoder: encoder, bitRate: 128000, sampleRate: sampleRate, ), From 4c7cee8731c620cc3166e5fe841204a248c0c001 Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 16:38:54 +0800 Subject: [PATCH 10/15] Drop ChineseTraditional Inno Setup language to fix CI build MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The GHA windows-latest Inno Setup install ships only the official language files; ChineseTraditional lives in Languages\Unofficial\ and isn't installed by default, so ISCC failed with: Couldn't open include file "compiler:Languages\ChineseTraditional.isl": The system cannot find the file specified. Drop the entry — the wizard chrome will render in English, but the substantive page (InfoBeforeFile=install-mode-info.txt) is still bilingual TC + English so the install-mode choice is unambiguous to Chinese-speaking users. If we want the wizard chrome itself in Chinese later, we can vendor the unofficial .isl into installer/ and reference it via a relative path; not bundling it now to avoid baking a third-party file we'd have to keep in sync. --- installer/zerotype.iss | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/installer/zerotype.iss b/installer/zerotype.iss index 85213a7..449d348 100644 --- a/installer/zerotype.iss +++ b/installer/zerotype.iss @@ -54,8 +54,12 @@ RestartApplications=no SetupLogging=yes [Languages] -Name: "english"; MessagesFile: "compiler:Default.isl" -Name: "chinesetraditional"; MessagesFile: "compiler:Languages\ChineseTraditional.isl" +; Wizard chrome stays English. The Chinese-speaking audience reads the +; bilingual InfoBeforeFile page which carries the actual setup choices. +; (Inno Setup ships ChineseTraditional only as an "unofficial" language +; file that's not present on every install — including the default GHA +; windows-latest image.) +Name: "english"; MessagesFile: "compiler:Default.isl" [Tasks] Name: "desktopicon"; Description: "{cm:CreateDesktopIcon}"; GroupDescription: "{cm:AdditionalIcons}"; Flags: unchecked From aee819bd42ca21742735ea3ec5c1425ef9035a70 Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 16:55:08 +0800 Subject: [PATCH 11/15] Bump version to 1.1.0 for Windows release --- pubspec.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pubspec.yaml b/pubspec.yaml index d106488..4937052 100644 --- a/pubspec.yaml +++ b/pubspec.yaml @@ -2,7 +2,7 @@ name: zero_type description: "ZeroType - AI-powered voice transcription & refinement tool" publish_to: 'none' -version: 1.0.0+1 +version: 1.1.0+1 environment: sdk: ^3.11.0 From a067f496edd2f3a89ca2d98e7a5005d300e6724f Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 17:05:11 +0800 Subject: [PATCH 12/15] Surface a clear message when the chosen LiteLLM model can't accept audio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When the user picked a text-only OpenAI model (gpt-5.5) on the LiteLLM proxy, the OpenAI backend returned the verbose: Invalid 'messages[0]'. Content blocks are expected to be either text or image_url type. …wrapped in HTTP boilerplate by _wrapDioError. The actual cause — 'this model is text-only, you need a multimodal/audio one' — was buried. Detect that specific OpenAI error pattern (image_url + 'Content blocks are expected') in the LiteLLM chat path and rewrite into a Traditional Chinese message that names the user-selected model and lists working alternatives: • gemini-2.5-flash-lite / gemini-3-flash-preview (multimodal) • claude-haiku-4-5 / claude-sonnet-* (multimodal) • gpt-4o-audio-preview / gpt-4o-mini-audio-preview • whisper-1 (transcription endpoint, auto-routed) All four families above were verified end-to-end against the user's LiteLLM proxy at v1.1.0. --- .../services/speech_recognition_service.dart | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/lib/core/services/speech_recognition_service.dart b/lib/core/services/speech_recognition_service.dart index 47366d1..737caf1 100644 --- a/lib/core/services/speech_recognition_service.dart +++ b/lib/core/services/speech_recognition_service.dart @@ -351,6 +351,24 @@ class SpeechRecognitionService { ), ); } on DioException catch (e) { + // Detect the specific "model is text-only, doesn't accept audio" error + // pattern from OpenAI-style backends and rewrite into a clear, + // actionable Chinese message — `_wrapDioError` would otherwise return + // a wall of HTTP boilerplate that buries the actual cause. + final bodyStr = e.response?.data?.toString() ?? ''; + if (bodyStr.contains('image_url') && + bodyStr.contains('Content blocks are expected')) { + AppLogger.log('LiteLLM-chat', + 'model "$model" does not accept audio input (text+image only)'); + throw Exception( + '此模型「$model」不支援音訊輸入。\n' + '請改選支援 audio 的模型,例如:\n' + ' • gemini-2.5-flash-lite / gemini-3-flash-preview(多模態)\n' + ' • claude-haiku-4-5 / claude-sonnet-*(多模態)\n' + ' • gpt-4o-audio-preview / gpt-4o-mini-audio-preview\n' + ' • whisper-1(純轉錄)', + ); + } throw _wrapDioError('LiteLLM chat POST $url', e); } From 3529b26bff30f0b92f88d1389d11ede564d9094e Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 17:06:53 +0800 Subject: [PATCH 13/15] Grant contents:write to allow tag-driven Release creation The v1.1.0 tag run failed at the 'Create GitHub Release' step with HTTP 403: 'Resource not accessible by integration'. The default GITHUB_TOKEN on a fork only has read access to repository contents; uploading a release requires the elevated permission to be requested explicitly via `permissions: contents: write`. --- .github/workflows/build-windows.yml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.github/workflows/build-windows.yml b/.github/workflows/build-windows.yml index d355dcf..cd4a831 100644 --- a/.github/workflows/build-windows.yml +++ b/.github/workflows/build-windows.yml @@ -8,6 +8,12 @@ on: branches: [master, main] workflow_dispatch: +# `contents: write` is needed for the tag-driven release step (the default +# GITHUB_TOKEN has read-only contents permission and would 403 on POST +# /repos/:owner/:repo/releases). +permissions: + contents: write + # Cancel in-flight runs on the same ref when a new push lands. concurrency: group: ${{ github.workflow }}-${{ github.ref }} From 5764a89b70f4cecacb28dc48eecff87af8b08290 Mon Sep 17 00:00:00 2001 From: alarmz Date: Fri, 1 May 2026 17:17:26 +0800 Subject: [PATCH 14/15] Update README for Windows + LiteLLM + refinement (v1.1.0) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Replace placeholder your-username/zerotype URLs with alarmz/ZeroType - Add Windows install section using ZeroTypeSetup-x.y.z.exe with the admin-vs-standard mode explanation that mirrors the installer's bilingual welcome page - Document LiteLLM provider end-to-end: base URL format, the dynamic /v1/models picker, and the model-type → endpoint routing table (whisper-* → /v1/audio/transcriptions, multimodal → chat completions with input_audio, text-only → unsupported) - Mention the new optional text-refinement feature (independent provider/prompt + Settings toggle) - Update default-hotkey lines and paste-simulation lines to mention both ⌘V on macOS and Ctrl+V on Windows - Add a v1.1.0 release-notes block; demote v1.0.2 from 'current' - Note that Windows accessibility permission is NOT required (uses SendInput, no keyboard automation consent needed) --- README.md | 104 +++++++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 92 insertions(+), 12 deletions(-) diff --git a/README.md b/README.md index d9f4e79..1c9f395 100644 --- a/README.md +++ b/README.md @@ -11,15 +11,21 @@ ## ✨ 功能特色 ### 🎙️ 全局快捷鍵錄音 -- 自訂全局快捷鍵(預設 `⌥ Option + Space`),在任何應用程式中觸發錄音 +- 自訂全局快捷鍵(macOS 預設 `⌥ Option + Space`、Windows 預設 `Alt + Space`),在任何應用程式中觸發錄音 - 錄音中顯示浮動音波 Overlay,提供即時視覺回饋 - 按下 `Esc` 或點擊取消按鈕可中止錄音 ### 🧠 AI 驅動的語音辨識 -- 支援 **OpenAI**(`gpt-4o-transcribe`)與 **Google Gemini**(`gemini-*`)兩大語音辨識後端 -- 辨識完成後,結果自動貼至游標所在位置(模擬 `⌘V`) +- 支援 **OpenAI**(`gpt-4o-transcribe`)、**Google Gemini**(`gemini-*`)、以及 **LiteLLM Proxy** 三大後端 +- LiteLLM 模式會自動從 proxy 的 `/v1/models` 抓取可用模型清單,動態選用 Whisper / Gemini / Claude / GPT-4o-audio 等模型 +- 辨識完成後,結果自動貼至游標所在位置(macOS 模擬 `⌘V`、Windows 模擬 `Ctrl+V`) - 支援自訂 API Endpoint(可使用 OpenAI-compatible 的第三方服務) +### ✨ 文字優化(可選) +- 轉錄完成後,可選擇再丟給聊天模型(GPT / Claude / Gemini)做格式化、錯字修正、條列整理 +- 優化的 provider / model / prompt 完全獨立,可以「便宜模型轉錄、聰明模型優化」 +- 設定頁有獨立 toggle 控制是否啟用,預設關閉 + ### 🇹🇼 針對繁體中文深度優化的提示詞 內建的轉錄提示詞針對台灣使用情境做了以下優化: @@ -48,17 +54,18 @@ ## 🔧 使用前準備 ### 系統需求 -- macOS 11.0+ +- **macOS 11.0+**,或 **Windows 10 / 11 (x64)** - Flutter 3.x(如需自行 build) ### 必要系統授權 -1. **麥克風** — 錄音所需 -2. **輔助使用(Accessibility)** — 模擬鍵盤輸入(`⌘V` 貼上)所需 +1. **麥克風** — 錄音所需(Windows 第一次按 Alt+Space 時會跳系統權限請求) +2. **輔助使用(Accessibility)** — macOS 模擬鍵盤輸入(`⌘V` 貼上)所需。**Windows 不需要此權限**,使用 SendInput API 直接送 Ctrl+V ### API Key -前往以下任一服務申請 API Key: -- [OpenAI](https://platform.openai.com/api-keys)(支援 Transcribe 語音辨識) +前往以下任一服務申請 API Key(或使用你自己的 LiteLLM proxy): +- [OpenAI](https://platform.openai.com/api-keys)(支援 Transcribe / Whisper / GPT-4o-audio) - [Google AI Studio](https://aistudio.google.com/app/apikey)(支援 Gemini 多模態) +- 自架 [LiteLLM Proxy](https://github.com/BerriAI/litellm)(一個 endpoint 串接所有 LLM) --- @@ -66,22 +73,68 @@ ### 方法一:直接下載(推薦) -1. 前往 [Releases](https://github.com/your-username/zerotype/releases) 頁面下載最新的 `.dmg` +#### macOS + +1. 前往 [Releases](https://github.com/alarmz/ZeroType/releases) 頁面下載最新的 `.dmg` 2. 開啟 `.dmg` 並將 **ZeroType.app** 拖入 Applications 資料夾 3. 首次執行時,依照提示授予以下權限: - **麥克風** — 語音輸入所需 - **輔助使用(Accessibility)** — 模擬鍵盤貼上所需 4. 在 App 內的「模型設定」填入你的 API Key,即可開始使用 +#### Windows + +1. 從 [Releases](https://github.com/alarmz/ZeroType/releases/latest) 下載 `ZeroTypeSetup-x.y.z.exe` +2. 雙擊執行安裝程式: + - **第一頁**會中英對照解釋兩種安裝模式(管理員 vs 一般使用者) + - 按下一步時 Windows 跳 UAC: + - 按「**是**」(管理員模式)→ 裝到 Program Files、自動允許麥克風存取、可勾選開機自動啟動 + - 按「**否**」(一般使用者)→ 裝到 `%LOCALAPPDATA%\Programs\ZeroType`、不會自動寫入系統設定(你需要手動到 Windows Settings → Privacy → Microphone 允許) +3. 安裝完成後 ZeroType 會自動啟動 +4. 在「模型」頁填入 API Key 與選擇模型,按 `Alt+Space` 即可開始 + ### 方法二:從原始碼 Build(進階) ```bash -git clone https://github.com/your-username/zerotype.git -cd zerotype +git clone https://github.com/alarmz/ZeroType.git +cd ZeroType flutter pub get +dart run build_runner build --delete-conflicting-outputs + +# macOS flutter run -d macos + +# Windows +flutter run -d windows +# 或產生 release exe: +flutter build windows --release +# 產出位置:build/windows/x64/runner/Release/zero_type.exe ``` +> **Windows build 需求**:Flutter 3.41+、Visual Studio Build Tools 2022(含 C++ Desktop workload)、Windows 11 SDK、開啟 Developer Mode(symlink 支援)。 + +--- + +## 🔌 LiteLLM Proxy 設定 + +LiteLLM 提供一個 OpenAI-compatible 的代理端點,能統一接 Whisper / Gemini / Claude / OpenAI / Groq 等多家後端。ZeroType 對 LiteLLM 做了原生支援: + +1. **「模型」頁** → Provider 選 **LiteLLM** +2. 填入你的 **Proxy Base URL**(例如 `https://litellm.example.com` 或 `http://192.168.x.x:4000`,**不要含 `/v1`**) +3. 填入 LiteLLM **virtual key** 並按儲存 +4. 按「選擇模型」右側的 🔄 按鈕 → 程式自動從 `/v1/models` 抓取你 proxy 上所有可用模型 +5. 從下拉選單選一個 + +### 模型支援度 + +ZeroType 會根據選擇的模型自動走不同 endpoint: + +| 模型類型 | Endpoint | 範例 | +|---|---|---| +| 名稱含 `whisper` | `/v1/audio/transcriptions` | `whisper-1`、`groq-whisper-large` | +| 多模態(吃 audio)| `/v1/chat/completions` + `input_audio` | `gemini-2.5-flash-lite`、`claude-haiku-4-5`、`gpt-4o-audio-preview` | +| 純文字模型 | ❌ 不支援 | `gpt-4`、`gpt-5.5`、`claude-haiku-4-5-text` 等 + --- ## 🌍 語言支援 & 貢獻 (Localization & Contribution) @@ -93,7 +146,34 @@ flutter run -d macos ## 📜 版本更新紀錄 (Release Notes) -### [v1.0.2] - 當前版本 +### [v1.1.0] - 當前版本(Windows 首發) + +**Windows 全平台支援** 🪟 +- Windows 10 / 11 原生 desktop app,使用 SendInput 模擬 Ctrl+V 貼上 +- 預設熱鍵 `Alt + Space`,UI 自動顯示 Win/Ctrl/Alt/Shift modifier +- 可拖曳的標題列 + min/max/close 視窗按鈕(多螢幕可正常移動) +- WAV 格式錄音(16 kHz mono PCM),相容 OpenAI / Whisper / Gemini / Claude + +**LiteLLM Proxy 支援** 🔌 +- 新增 LiteLLM provider,自動從 `/v1/models` 抓取模型清單並快取 +- 智慧路由:whisper-* 走 `/v1/audio/transcriptions`、其他走 `/v1/chat/completions` 多模態 +- 友善的錯誤訊息,當選到不支援 audio 的模型會直接列出可用替代 + +**文字優化** ✨ +- 新增「文字優化」功能,轉錄後再過一層 chat LLM 做格式化/錯字修正 +- 獨立的 provider / model / prompt 設定,可在「模型」頁與「提示詞」頁分別配置 +- 設定頁 toggle 控制,預設關閉 + +**Windows installer + CI** 📦 +- GitHub Actions 自動 build Windows release,每次 push 都產生 installer artifact +- Inno Setup 安裝程式:admin 模式自動寫麥克風白名單與開機啟動,一般模式則裝到 LocalAppData +- Tag-driven release:push `vX.Y.Z` tag 自動發 GitHub Release + +**Logging** 🔎 +- 新增檔案 logger 寫到 `%TEMP%\zero_type.log`(macOS: `/tmp/zero_type.log`) +- 浮動圓條顯示完整錯誤訊息(不只「錯誤」兩個字),含 HTTP status + 回應 body + +### [v1.0.2] - **新增歷史紀錄頁** 🎨 - 提供歷史產生逐字稿的紀錄語音檔,並可提供檢視。 - 新增總轉寫次數與總花費(USD)的持久化累計統計。 From a553842924f6aa0a47e0b89f11d9f72fcc289855 Mon Sep 17 00:00:00 2001 From: alarmz Date: Tue, 5 May 2026 06:17:57 +0800 Subject: [PATCH 15/15] Strip prompt YAML structure that gemini-2.5-flash-lite sometimes echoes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit User reported transcripts arriving with the system prompt's internal sections appended: self_correction: - 【後者為準】無遺漏的修正訊號。 - … language: … dictionary: … Or, alternatively, fully wrapped in the prompt's YAML format with the real answer hidden inside an `output:` block: self_correction: … output: | 我錄了兩次,第二次會出現很多說明… Root cause: the SpeechToText.prompt is structured as YAML — `instructions:`, `examples:` (with `reasoning:` and `output:` per example), `self_correction:`, `language:`, `dictionary:`. Gemini 2.5 Flash *does* respect the convention because its thinking-mode channel separates reasoning from final answer; but gemini-2.5-flash-lite has no such channel, so it imitates the YAML form and emits the structure verbatim alongside its answer. Add `_stripPromptStructureEcho()` in SpeechRecognitionService applied to the chat-completions content. The sanitizer: - Detects top-level YAML keys (`self_correction|reasoning|language| dictionary|examples|instructions|name|description`) at line start. - If an explicit `output:` block exists (Pattern B), extracts and un-indents its YAML literal value. - Otherwise (Pattern A — transcript first, structure trailing), cuts everything from the first structural key onward. The log now shows both lengths so the effect is observable: [LiteLLM-chat] success length=72 (raw=487) tokens: in=2825 out=312 …meaning the sanitizer trimmed 415 chars of structural noise. The upstream prompt is left untouched — this is purely a defensive client-side fix, so the upstream maintainer can keep their prompt format without anyone needing to coordinate. --- .../services/speech_recognition_service.dart | 58 ++++++++++++++++++- 1 file changed, 56 insertions(+), 2 deletions(-) diff --git a/lib/core/services/speech_recognition_service.dart b/lib/core/services/speech_recognition_service.dart index 737caf1..91673c1 100644 --- a/lib/core/services/speech_recognition_service.dart +++ b/lib/core/services/speech_recognition_service.dart @@ -385,17 +385,71 @@ class SpeechRecognitionService { final message = (choices.first as Map)['message'] as Map?; final content = message?['content']; - final text = (content is String) ? content.trim() : ''; + final rawText = (content is String) ? content.trim() : ''; + final text = _stripPromptStructureEcho(rawText); final usage = body?['usage'] as Map?; final inputTokens = usage?['prompt_tokens'] as int?; final outputTokens = usage?['completion_tokens'] as int?; AppLogger.log('LiteLLM-chat', - 'success length=${text.length} tokens: in=$inputTokens out=$outputTokens'); + 'success length=${text.length} (raw=${rawText.length}) tokens: in=$inputTokens out=$outputTokens'); return (text: text, inputTokens: inputTokens, outputTokens: outputTokens); } + /// Models without a thinking-mode channel (e.g. gemini-2.5-flash-lite) + /// sometimes echo the prompt's YAML-style headers — `self_correction:`, + /// `reasoning:`, `output:`, `language:`, `dictionary:` — back into the + /// response, mixing the actual transcript with the meta-structure that + /// was supposed to be internal-only. + /// + /// Two observed shapes: + /// (A) plain transcript first, then YAML noise: + /// + /// self_correction: + /// - ... + /// language: ... + /// (B) full YAML where the answer is the value of `output:`: + /// self_correction: ... + /// output: | + /// + /// + /// Strategy: if a structural key appears, prefer the value of an explicit + /// `output:` block when one exists; otherwise cut everything from the + /// first structural key onward. + static String _stripPromptStructureEcho(String raw) { + if (raw.isEmpty) return raw; + + // Top-level YAML keys the prompt happens to use. Match at line start so + // we don't accidentally chop transcript text that happens to contain + // 'output:' as substance. + final structureKey = RegExp( + r'^\s*(self_correction|reasoning|language|dictionary|examples|instructions|name|description)\s*:', + multiLine: true, + ); + final structMatch = structureKey.firstMatch(raw); + if (structMatch == null) return raw; + + // Pattern (B): an explicit `output:` block. Capture either an inline + // value (`output: foo`) or a YAML literal block (`output: |\n foo`). + final outputBlock = RegExp( + r'^\s*output\s*:\s*(?:\|\s*\n([\s\S]+?)|([^\n]+))(?=\n\s*\w+\s*:|\Z)', + multiLine: true, + ).firstMatch(raw); + if (outputBlock != null) { + final block = outputBlock.group(1) ?? outputBlock.group(2) ?? ''; + // Un-indent a YAML literal block (lines were prefixed with 2+ spaces). + final unindented = block + .split('\n') + .map((l) => l.replaceFirst(RegExp(r'^ '), '')) + .join('\n'); + return unindented.trim(); + } + + // Pattern (A): chop everything from the first structural key onward. + return raw.substring(0, structMatch.start).trim(); + } + static String _stripTrailingSlash(String s) => s.replaceAll(RegExp(r'/+$'), '');