diff --git a/storage-service/ocr/Video2AudioConverter.go b/storage-service/ocr/Video2AudioConverter.go index 8e2361d..79cb2bd 100644 --- a/storage-service/ocr/Video2AudioConverter.go +++ b/storage-service/ocr/Video2AudioConverter.go @@ -7,5 +7,6 @@ import ( ) type Video2AudioConverter interface { - ConvertToMp3(ctx context.Context, video temp.Data) (temp.Data, error) + ExtractAudio(ctx context.Context, video temp.Data) (temp.Data, error) + CutAudio(ctx context.Context, video temp.Data) (temp.Data, error) } diff --git a/storage-service/ocr/ffmpeg/FfmpegVideo2AudioConverter.go b/storage-service/ocr/ffmpeg/FfmpegVideo2AudioConverter.go index 1f8ee59..07639fb 100644 --- a/storage-service/ocr/ffmpeg/FfmpegVideo2AudioConverter.go +++ b/storage-service/ocr/ffmpeg/FfmpegVideo2AudioConverter.go @@ -21,37 +21,99 @@ type Video2AudioConverterImpl struct { var _ ocr.Video2AudioConverter = (*Video2AudioConverterImpl)(nil) -func (f *Video2AudioConverterImpl) ConvertToMp3(ctx context.Context, video temp.Data) (temp.Data, error) { - f.slogger.InfoContext(ctx, "ConvertToMp3: start") +func (f *Video2AudioConverterImpl) CutAudio(ctx context.Context, video temp.Data) (temp.Data, error) { + f.slogger.InfoContext(ctx, "CutAudio: start") + + dir, err := os.MkdirTemp("", "cutaudio-*") + if err != nil { + return nil, fmt.Errorf("CutAudio: create temp dir: %w", err) + } + defer func(path string) { + errRemoveAll := os.RemoveAll(path) + if errRemoveAll != nil { + f.slogger.WarnContext(ctx, "CutAudio: remove temp dir failed: ", "error", errRemoveAll) + } + }(dir) + + ffmpegInputPath := filepath.Join(dir, "input") + ffmpegInputFile, err := os.OpenFile(ffmpegInputPath, os.O_CREATE|os.O_WRONLY, 0600) + if err != nil { + return nil, fmt.Errorf("CutAudio: create input file: %w", err) + } + + videoInputReader, err := video.Reader() + if err != nil { + helper.QuietClose(ffmpegInputFile, f.slogger) + return nil, fmt.Errorf("CutAudio: get videoInputReader: %w", err) + } + + _, err = io.Copy(ffmpegInputFile, videoInputReader) + helper.QuietClose(ffmpegInputFile, f.slogger) + helper.QuietClose(videoInputReader, f.slogger) + if err != nil { + return nil, fmt.Errorf("CutAudio: write input: %w", err) + } + + outputPath := filepath.Join(dir, "output.mp4") + cmd := buildCmd(ctx, f.cfg, + "-i", ffmpegInputPath, + "-an", + "-c:v", "copy", + outputPath, + ) + f.slogger.InfoContext(ctx, "CutAudio: running ffmpeg", "cmd", cmd.String()) + out, err := cmd.CombinedOutput() + f.slogger.DebugContext(ctx, "CutAudio: ffmpeg output", "output", string(out)) + if err != nil { + return nil, fmt.Errorf("CutAudio: ffmpeg failed: %w\n%s", err, out) + } + + outputFile, err := os.Open(outputPath) + if err != nil { + return nil, fmt.Errorf("CutAudio: open output: %w", err) + } + defer helper.QuietClose(outputFile, f.slogger) + + data, err := temp.DataTemp(outputFile) + if err != nil { + return nil, fmt.Errorf("CutAudio: read output: %w", err) + } + + f.slogger.InfoContext(ctx, "CutAudio: done") + return data, nil +} + +func (f *Video2AudioConverterImpl) ExtractAudio(ctx context.Context, video temp.Data) (temp.Data, error) { + f.slogger.InfoContext(ctx, "ExtractAudio: start") dir, err := os.MkdirTemp("", "video2audio-*") if err != nil { - return nil, fmt.Errorf("ConvertToMp3: create temp dir: %w", err) + return nil, fmt.Errorf("ExtractAudio: create temp dir: %w", err) } defer func(path string) { errRemoveAll := os.RemoveAll(path) if errRemoveAll != nil { - f.slogger.WarnContext(ctx, "ConvertToMp3: remove temp dir failed: ", "error", errRemoveAll) + f.slogger.WarnContext(ctx, "ExtractAudio: remove temp dir failed: ", "error", errRemoveAll) } }(dir) ffmpegInputPath := filepath.Join(dir, "input") ffmpegInputFile, err := os.OpenFile(ffmpegInputPath, os.O_CREATE|os.O_WRONLY, 0600) if err != nil { - return nil, fmt.Errorf("ConvertToMp3: create input file: %w", err) + return nil, fmt.Errorf("ExtractAudio: create input file: %w", err) } videoInputReader, err := video.Reader() if err != nil { helper.QuietClose(ffmpegInputFile, f.slogger) - return nil, fmt.Errorf("ConvertToMp3: get videoInputReader: %w", err) + return nil, fmt.Errorf("ExtractAudio: get videoInputReader: %w", err) } _, err = io.Copy(ffmpegInputFile, videoInputReader) helper.QuietClose(ffmpegInputFile, f.slogger) helper.QuietClose(videoInputReader, f.slogger) if err != nil { - return nil, fmt.Errorf("ConvertToMp3: write input: %w", err) + return nil, fmt.Errorf("ExtractAudio: write input: %w", err) } outputPath := filepath.Join(dir, "output.mp3") @@ -62,25 +124,25 @@ func (f *Video2AudioConverterImpl) ConvertToMp3(ctx context.Context, video temp. "-q:a", "2", outputPath, ) - f.slogger.InfoContext(ctx, "ConvertToMp3: running ffmpeg", "cmd", cmd.String()) + f.slogger.InfoContext(ctx, "ExtractAudio: running ffmpeg", "cmd", cmd.String()) out, err := cmd.CombinedOutput() - f.slogger.DebugContext(ctx, "ConvertToMp3: ffmpeg output", "output", string(out)) + f.slogger.DebugContext(ctx, "ExtractAudio: ffmpeg output", "output", string(out)) if err != nil { - return nil, fmt.Errorf("ConvertToMp3: ffmpeg failed: %w\n%s", err, out) + return nil, fmt.Errorf("ExtractAudio: ffmpeg failed: %w\n%s", err, out) } outputFile, err := os.Open(outputPath) if err != nil { - return nil, fmt.Errorf("ConvertToMp3: open output: %w", err) + return nil, fmt.Errorf("ExtractAudio: open output: %w", err) } defer helper.QuietClose(outputFile, f.slogger) data, err := temp.DataTemp(outputFile) if err != nil { - return nil, fmt.Errorf("ConvertToMp3: read output: %w", err) + return nil, fmt.Errorf("ExtractAudio: read output: %w", err) } - f.slogger.InfoContext(ctx, "ConvertToMp3: done") + f.slogger.InfoContext(ctx, "ExtractAudio: done") return data, nil } diff --git a/storage-service/service/EP52VidLlmExtract.go b/storage-service/service/EP52VidLlmExtract.go index a9142b8..7ce5084 100644 --- a/storage-service/service/EP52VidLlmExtract.go +++ b/storage-service/service/EP52VidLlmExtract.go @@ -9,6 +9,7 @@ import ( "sync" "github.com/weoses/memelo/common/helper" + "github.com/weoses/memelo/common/temp" "github.com/weoses/memelo/storage-service/conf" "github.com/weoses/memelo/storage-service/entity" "github.com/weoses/memelo/storage-service/ocr" @@ -140,7 +141,20 @@ func (s *VidLlmExtractPipelineStep) processSlice( "index", slice.SliceNumber, "startTime", slice.SliceStartTime, "endTime", slice.SliceEndTime) - r, err := s.extractor.ProcessVideo(ctxInProcess, slice.Slice) + + var videoData temp.Data = slice.Slice + if s.separateAudio { + cut, err := s.video2audio.CutAudio(ctxInProcess, slice.Slice) + defer helper.QuietClose(cut, s.slogger) + if err != nil { + appendErr(err) + cancel() + return + } + videoData = cut + } + + r, err := s.extractor.ProcessVideo(ctxInProcess, videoData) if err != nil { appendErr(err) cancel() @@ -162,7 +176,7 @@ func (s *VidLlmExtractPipelineStep) processSlice( "startTime", slice.SliceStartTime, "endTime", slice.SliceEndTime) - audioData, err := s.video2audio.ConvertToMp3(ctxInProcess, slice.Slice) + audioData, err := s.video2audio.ExtractAudio(ctxInProcess, slice.Slice) defer helper.QuietClose(audioData, s.slogger) if err != nil { appendErr(err) diff --git a/storage-service/tests/functional_test/init_llm_mock_test.go b/storage-service/tests/functional_test/init_llm_mock_test.go index 6f0a88c..99da875 100644 --- a/storage-service/tests/functional_test/init_llm_mock_test.go +++ b/storage-service/tests/functional_test/init_llm_mock_test.go @@ -168,7 +168,11 @@ func (m *MockEmbeddingExtractor) GetTextEmbedding(_ context.Context, _ string) ( type MockVideo2AudioConverter struct{} -func (m *MockVideo2AudioConverter) ConvertToMp3(_ context.Context, _ temp.Data) (temp.Data, error) { +func (m *MockVideo2AudioConverter) ExtractAudio(_ context.Context, _ temp.Data) (temp.Data, error) { + return temp.DataTemp(bytes.NewReader([]byte{})) +} + +func (m *MockVideo2AudioConverter) CutAudio(_ context.Context, _ temp.Data) (temp.Data, error) { return temp.DataTemp(bytes.NewReader([]byte{})) }