Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion storage-service/ocr/Video2AudioConverter.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ import (
)

type Video2AudioConverter interface {
ConvertToMp3(ctx context.Context, video temp.Data) (temp.Data, error)
ExtractAudio(ctx context.Context, video temp.Data) (temp.Data, error)
CutAudio(ctx context.Context, video temp.Data) (temp.Data, error)
}
88 changes: 75 additions & 13 deletions storage-service/ocr/ffmpeg/FfmpegVideo2AudioConverter.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,37 +21,99 @@ type Video2AudioConverterImpl struct {

var _ ocr.Video2AudioConverter = (*Video2AudioConverterImpl)(nil)

func (f *Video2AudioConverterImpl) ConvertToMp3(ctx context.Context, video temp.Data) (temp.Data, error) {
f.slogger.InfoContext(ctx, "ConvertToMp3: start")
func (f *Video2AudioConverterImpl) CutAudio(ctx context.Context, video temp.Data) (temp.Data, error) {
f.slogger.InfoContext(ctx, "CutAudio: start")

dir, err := os.MkdirTemp("", "cutaudio-*")
if err != nil {
return nil, fmt.Errorf("CutAudio: create temp dir: %w", err)
}
defer func(path string) {
errRemoveAll := os.RemoveAll(path)
if errRemoveAll != nil {
f.slogger.WarnContext(ctx, "CutAudio: remove temp dir failed: ", "error", errRemoveAll)
}
}(dir)

ffmpegInputPath := filepath.Join(dir, "input")
ffmpegInputFile, err := os.OpenFile(ffmpegInputPath, os.O_CREATE|os.O_WRONLY, 0600)
if err != nil {
return nil, fmt.Errorf("CutAudio: create input file: %w", err)
}

videoInputReader, err := video.Reader()
if err != nil {
helper.QuietClose(ffmpegInputFile, f.slogger)
return nil, fmt.Errorf("CutAudio: get videoInputReader: %w", err)
}

_, err = io.Copy(ffmpegInputFile, videoInputReader)
helper.QuietClose(ffmpegInputFile, f.slogger)
helper.QuietClose(videoInputReader, f.slogger)
if err != nil {
return nil, fmt.Errorf("CutAudio: write input: %w", err)
}

outputPath := filepath.Join(dir, "output.mp4")
cmd := buildCmd(ctx, f.cfg,
"-i", ffmpegInputPath,
"-an",
"-c:v", "copy",
outputPath,
)
f.slogger.InfoContext(ctx, "CutAudio: running ffmpeg", "cmd", cmd.String())
out, err := cmd.CombinedOutput()
f.slogger.DebugContext(ctx, "CutAudio: ffmpeg output", "output", string(out))
if err != nil {
return nil, fmt.Errorf("CutAudio: ffmpeg failed: %w\n%s", err, out)
}

outputFile, err := os.Open(outputPath)
if err != nil {
return nil, fmt.Errorf("CutAudio: open output: %w", err)
}
defer helper.QuietClose(outputFile, f.slogger)

data, err := temp.DataTemp(outputFile)
if err != nil {
return nil, fmt.Errorf("CutAudio: read output: %w", err)
}

f.slogger.InfoContext(ctx, "CutAudio: done")
return data, nil
}

func (f *Video2AudioConverterImpl) ExtractAudio(ctx context.Context, video temp.Data) (temp.Data, error) {
f.slogger.InfoContext(ctx, "ExtractAudio: start")

dir, err := os.MkdirTemp("", "video2audio-*")
if err != nil {
return nil, fmt.Errorf("ConvertToMp3: create temp dir: %w", err)
return nil, fmt.Errorf("ExtractAudio: create temp dir: %w", err)
}
defer func(path string) {
errRemoveAll := os.RemoveAll(path)
if errRemoveAll != nil {
f.slogger.WarnContext(ctx, "ConvertToMp3: remove temp dir failed: ", "error", errRemoveAll)
f.slogger.WarnContext(ctx, "ExtractAudio: remove temp dir failed: ", "error", errRemoveAll)
}
}(dir)

ffmpegInputPath := filepath.Join(dir, "input")
ffmpegInputFile, err := os.OpenFile(ffmpegInputPath, os.O_CREATE|os.O_WRONLY, 0600)
if err != nil {
return nil, fmt.Errorf("ConvertToMp3: create input file: %w", err)
return nil, fmt.Errorf("ExtractAudio: create input file: %w", err)
}

videoInputReader, err := video.Reader()
if err != nil {
helper.QuietClose(ffmpegInputFile, f.slogger)
return nil, fmt.Errorf("ConvertToMp3: get videoInputReader: %w", err)
return nil, fmt.Errorf("ExtractAudio: get videoInputReader: %w", err)
}

_, err = io.Copy(ffmpegInputFile, videoInputReader)
helper.QuietClose(ffmpegInputFile, f.slogger)
helper.QuietClose(videoInputReader, f.slogger)
if err != nil {
return nil, fmt.Errorf("ConvertToMp3: write input: %w", err)
return nil, fmt.Errorf("ExtractAudio: write input: %w", err)
}

outputPath := filepath.Join(dir, "output.mp3")
Expand All @@ -62,25 +124,25 @@ func (f *Video2AudioConverterImpl) ConvertToMp3(ctx context.Context, video temp.
"-q:a", "2",
outputPath,
)
f.slogger.InfoContext(ctx, "ConvertToMp3: running ffmpeg", "cmd", cmd.String())
f.slogger.InfoContext(ctx, "ExtractAudio: running ffmpeg", "cmd", cmd.String())
out, err := cmd.CombinedOutput()
f.slogger.DebugContext(ctx, "ConvertToMp3: ffmpeg output", "output", string(out))
f.slogger.DebugContext(ctx, "ExtractAudio: ffmpeg output", "output", string(out))
if err != nil {
return nil, fmt.Errorf("ConvertToMp3: ffmpeg failed: %w\n%s", err, out)
return nil, fmt.Errorf("ExtractAudio: ffmpeg failed: %w\n%s", err, out)
}

outputFile, err := os.Open(outputPath)
if err != nil {
return nil, fmt.Errorf("ConvertToMp3: open output: %w", err)
return nil, fmt.Errorf("ExtractAudio: open output: %w", err)
}
defer helper.QuietClose(outputFile, f.slogger)

data, err := temp.DataTemp(outputFile)
if err != nil {
return nil, fmt.Errorf("ConvertToMp3: read output: %w", err)
return nil, fmt.Errorf("ExtractAudio: read output: %w", err)
}

f.slogger.InfoContext(ctx, "ConvertToMp3: done")
f.slogger.InfoContext(ctx, "ExtractAudio: done")
return data, nil
}

Expand Down
18 changes: 16 additions & 2 deletions storage-service/service/EP52VidLlmExtract.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (
"sync"

"github.com/weoses/memelo/common/helper"
"github.com/weoses/memelo/common/temp"
"github.com/weoses/memelo/storage-service/conf"
"github.com/weoses/memelo/storage-service/entity"
"github.com/weoses/memelo/storage-service/ocr"
Expand Down Expand Up @@ -140,7 +141,20 @@ func (s *VidLlmExtractPipelineStep) processSlice(
"index", slice.SliceNumber,
"startTime", slice.SliceStartTime,
"endTime", slice.SliceEndTime)
r, err := s.extractor.ProcessVideo(ctxInProcess, slice.Slice)

var videoData temp.Data = slice.Slice
if s.separateAudio {
cut, err := s.video2audio.CutAudio(ctxInProcess, slice.Slice)
defer helper.QuietClose(cut, s.slogger)
if err != nil {
appendErr(err)
cancel()
return
}
videoData = cut
}

r, err := s.extractor.ProcessVideo(ctxInProcess, videoData)
if err != nil {
appendErr(err)
cancel()
Expand All @@ -162,7 +176,7 @@ func (s *VidLlmExtractPipelineStep) processSlice(
"startTime", slice.SliceStartTime,
"endTime", slice.SliceEndTime)

audioData, err := s.video2audio.ConvertToMp3(ctxInProcess, slice.Slice)
audioData, err := s.video2audio.ExtractAudio(ctxInProcess, slice.Slice)
defer helper.QuietClose(audioData, s.slogger)
if err != nil {
appendErr(err)
Expand Down
6 changes: 5 additions & 1 deletion storage-service/tests/functional_test/init_llm_mock_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -168,7 +168,11 @@ func (m *MockEmbeddingExtractor) GetTextEmbedding(_ context.Context, _ string) (

type MockVideo2AudioConverter struct{}

func (m *MockVideo2AudioConverter) ConvertToMp3(_ context.Context, _ temp.Data) (temp.Data, error) {
func (m *MockVideo2AudioConverter) ExtractAudio(_ context.Context, _ temp.Data) (temp.Data, error) {
return temp.DataTemp(bytes.NewReader([]byte{}))
}

func (m *MockVideo2AudioConverter) CutAudio(_ context.Context, _ temp.Data) (temp.Data, error) {
return temp.DataTemp(bytes.NewReader([]byte{}))
}

Expand Down
Loading