Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
25 changes: 25 additions & 0 deletions server/mcp_server_mediakit/src/base/api_info.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,41 @@
api_info = {
"add_image_to_video": {"path": "/api/v1/tools/add-image-to-video", "method": "POST"},
"add_subtitle_to_video": {"path": "/api/v1/tools/add-subtitle-to-video", "method": "POST"},
"adjust_audio_speed": {"path": "/api/v1/tools/adjust-audio-speed", "method": "POST"},
"adjust_video_speed": {"path": "/api/v1/tools/adjust-video-speed", "method": "POST"},
"adjust_video_volume": {"path": "/api/v1/tools/adjust-video-volume", "method": "POST"},
"analyze_video_highlights": {"path": "/api/v1/tools/analyze-video-highlights", "method": "POST"},
"analyze_video_storyline": {"path": "/api/v1/tools/analyze-video-storyline", "method": "POST"},
"apply_video_filter": {"path": "/api/v1/tools/apply-video-filter", "method": "POST"},
"asr_subtitles": {"path": "/api/v1/tools/asr-subtitles", "method": "POST"},
"concat_audio": {"path": "/api/v1/tools/concat-audio", "method": "POST"},
"concat_video": {"path": "/api/v1/tools/concat-video", "method": "POST"},
"enhance_image": {"path": "/api/v1/tools-sync/enhance-image", "method": "POST"},
"enhance_video": {"path": "/api/v1/tools/enhance-video", "method": "POST"},
"enhance_video_generative": {"path": "/api/v1/tools/enhance-video-generative", "method": "POST"},
"erase_image": {"path": "/api/v1/tools-sync/erase-image", "method": "POST"},
"erase_video_subtitle": {"path": "/api/v1/tools/erase-video-subtitle", "method": "POST"},
"erase_video_subtitle_pro": {"path": "/api/v1/tools/erase-video-subtitle-pro", "method": "POST"},
"evaluate_image_quality": {"path": "/api/v1/tools-sync/evaluate-image-quality", "method": "POST"},
"extract_audio": {"path": "/api/v1/tools/extract-audio", "method": "POST"},
"fade_audio": {"path": "/api/v1/tools/fade-audio", "method": "POST"},
"fade_video_audio": {"path": "/api/v1/tools/fade-video-audio", "method": "POST"},
"flip_video": {"path": "/api/v1/tools/flip-video", "method": "POST"},
"generate_highlights_microdrama": {"path": "/api/v1/tools/generate-highlights-microdrama", "method": "POST"},
"generate_highlights_minigame": {"path": "/api/v1/tools/generate-highlights-minigame", "method": "POST"},
"image_ocr": {"path": "/api/v1/tools-sync/image-ocr", "method": "POST"},
"image_to_video": {"path": "/api/v1/tools/image-to-video", "method": "POST"},
"matte_greenscreen_video": {"path": "/api/v1/tools/matte-greenscreen-video", "method": "POST"},
"matte_portrait_video": {"path": "/api/v1/tools/matte-portrait-video", "method": "POST"},
"mix_audio": {"path": "/api/v1/tools/mix-audio", "method": "POST"},
"mux_audio_video": {"path": "/api/v1/tools/mux-audio-video", "method": "POST"},
"probe_audio_metadata": {"path": "/api/v1/tools/probe-audio-metadata", "method": "POST"},
"probe_video_metadata": {"path": "/api/v1/tools/probe-video-metadata", "method": "POST"},
"query_task": {"path": "/api/v1/tasks/{task_id}", "method": "GET"},
"remove_image_background": {"path": "/api/v1/tools-sync/remove-image-background", "method": "POST"},
"segment_scenes": {"path": "/api/v1/tools/segment-scenes", "method": "POST"},
"separate_voice": {"path": "/api/v1/tools/separate-voice", "method": "POST"},
"trim_audio": {"path": "/api/v1/tools/trim-audio", "method": "POST"},
"trim_video": {"path": "/api/v1/tools/trim-video", "method": "POST"},
"video_ocr": {"path": "/api/v1/tools/video-ocr", "method": "POST"},
}
68 changes: 68 additions & 0 deletions server/mcp_server_mediakit/src/mediakit/mcp_tools/audio.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
from __future__ import annotations

from typing import Any, Dict, List, Optional
from typing_extensions import NotRequired, Required, TypedDict

try:
from pydantic import Field
except Exception: # pragma: no cover
def Field(*args, **kwargs):
if args:
return args[0]
return kwargs.get("default", None)

try:
from mcp.server.fastmcp.server import Context
from mcp.server.session import ServerSession
except Exception: # pragma: no cover
class Context: # type: ignore
pass

class ServerSession: # type: ignore
pass

from base.client import MediKitClient
from ..utils.response import async_task_response, error_response

TOOL_NAMES = ['probe_audio_metadata', 'separate_voice']


def register_tools(mcp, client: MediKitClient) -> None:
@mcp.tool(name="separate_voice", description="将音频中的人声与背景音精准分离,输出为两个独立的音轨文件。\n支持格式:主流音视频格式(如mp4、mov、mp3、m4a、wav等)。\n输入:video_url和audio_url二选一。\n输出格式:AAC。 使用 task_id, 调用 query_task 方法获取结果")
async def separate_voice(
video_url: Optional[str] = Field(None, description="输入视频 Url(需公网可访问),与audio_url二选一,都存在时优先取video_url"),
audio_url: Optional[str] = Field(None, description="输入音频 Url(需公网可访问),与video_url二选一,不能都为空"),
callback_args: Optional[str] = Field(None, description="可选,回调参数"),
client_token: Optional[str] = Field(None, description="可选,用于幂等,默认幂等,用户可根据需求进行调整"),
*,
ctx: Context,
) -> dict:
"""将音频中的人声与背景音精准分离,输出为两个独立的音轨文件。
支持格式:主流音视频格式(如mp4、mov、mp3、m4a、wav等)。
输入:video_url和audio_url二选一。
输出格式:AAC。"""
try:
result = client.call(api_name="separate_voice", video_url=video_url, audio_url=audio_url, callback_args=callback_args, client_token=client_token)
return async_task_response(result)
except Exception as exc:
return error_response(str(exc))

@mcp.tool(name="probe_audio_metadata", description="获取指定音频的详细元信息,输出容器层信息(format_meta)与音频流元信息(audio_stream_meta)。\n字段分类参考 ffprobe,并对 VOD 原始返回做精简与统一。\n使用限制:支持公网 HTTP/HTTPS URL。 使用 task_id, 调用 query_task 方法获取结果")
async def probe_audio_metadata(
audio_url: str = Field(..., description="待探测的音频公网 HTTP/HTTPS URL。"),
callback_args: Optional[str] = Field(None, description="可选,回调参数"),
client_token: Optional[str] = Field(None, description="可选,用于幂等,默认幂等,用户可根据需求进行调整"),
*,
ctx: Context,
) -> dict:
"""获取指定音频的详细元信息,输出容器层信息(format_meta)与音频流元信息(audio_stream_meta)。
字段分类参考 ffprobe,并对 VOD 原始返回做精简与统一。
使用限制:支持公网 HTTP/HTTPS URL。"""
try:
result = client.call(api_name="probe_audio_metadata", audio_url=audio_url, callback_args=callback_args, client_token=client_token)
return async_task_response(result)
except Exception as exc:
return error_response(str(exc))

if hasattr(mcp, "register_domain_tools"):
mcp.register_domain_tools("audio", TOOL_NAMES)
103 changes: 102 additions & 1 deletion server/mcp_server_mediakit/src/mediakit/mcp_tools/editing.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ class ImageToVideoImagesItem(TypedDict):
animation_in: NotRequired[float]
animation_out: NotRequired[float]

TOOL_NAMES = ['add_image_to_video', 'add_subtitle_to_video', 'adjust_video_speed', 'concat_audio', 'concat_video', 'extract_audio', 'flip_video', 'image_to_video', 'mux_audio_video', 'trim_audio', 'trim_video']
TOOL_NAMES = ['add_image_to_video', 'add_subtitle_to_video', 'adjust_audio_speed', 'adjust_video_speed', 'adjust_video_volume', 'apply_video_filter', 'concat_audio', 'concat_video', 'extract_audio', 'fade_audio', 'fade_video_audio', 'flip_video', 'image_to_video', 'mix_audio', 'mux_audio_video', 'trim_audio', 'trim_video']


def register_tools(mcp, client: MediKitClient) -> None:
Expand Down Expand Up @@ -233,5 +233,106 @@ async def trim_video(
except Exception as exc:
return error_response(str(exc))

@mcp.tool(name="adjust_audio_speed", description="调整音频的播放倍速,实现快放或慢放效果。 使用 task_id, 调用 query_task 方法获取结果")
async def adjust_audio_speed(
audio_url: str = Field(..., description="输入音频。支持http://xxx或https://xxx格式 Url,支持 mp3、m4a、wav 等格式"),
speed: Optional[float] = Field(1, description="调整速度的倍数,Float类型,取值范围为0.1~4。0.1=放慢至原速的 0.1 倍,1=原速,4=加速至原速的 4 倍。"),
callback_args: Optional[str] = Field(None, description="可选,回调参数"),
client_token: Optional[str] = Field(None, description="可选,用于幂等,默认幂等,用户可根据需求进行调整"),
*,
ctx: Context,
) -> dict:
"""调整音频的播放倍速,实现快放或慢放效果。"""
try:
result = client.call(api_name="adjust_audio_speed", audio_url=audio_url, speed=speed, callback_args=callback_args, client_token=client_token)
return async_task_response(result)
except Exception as exc:
return error_response(str(exc))

@mcp.tool(name="adjust_video_volume", description="调整视频音量大小,支持静音;输出 mp4,分辨率与原片一致。 使用 task_id, 调用 query_task 方法获取结果")
async def adjust_video_volume(
video_url: str = Field(..., description="输入视频。支持http://xxx或https://xxx格式 URL,支持 mp4、mov、flv、ts、avi、wmv、mkv 等格式,最高 4K"),
volume: Optional[float] = Field(1, description="音量倍数。Float 类型,取值范围 0~4。0=静音,1=原音量,4=放大 4 倍。"),
callback_args: Optional[str] = Field(None, description="可选,回调参数"),
client_token: Optional[str] = Field(None, description="可选,用于幂等,默认幂等,用户可根据需求进行调整"),
*,
ctx: Context,
) -> dict:
"""调整视频音量大小,支持静音;输出 mp4,分辨率与原片一致。"""
try:
result = client.call(api_name="adjust_video_volume", video_url=video_url, volume=volume, callback_args=callback_args, client_token=client_token)
return async_task_response(result)
except Exception as exc:
return error_response(str(exc))

@mcp.tool(name="apply_video_filter", description="为视频添加指定滤镜效果,输出mp4,分辨率与原片一致。 使用 task_id, 调用 query_task 方法获取结果")
async def apply_video_filter(
video_url: str = Field(..., description="输入视频。支持http://xxx或https://xxx格式 URL,支持 mp4、mov、flv、ts、avi、wmv、mkv 等格式,最高 4K"),
filter_style: Optional[str] = Field('spring', description="滤镜风格。根据用户想要的视频画面效果选择:\n- spring:春日滤镜\n- sunset:晚霞滤镜\n- vivid:鲜亮滤镜\n- fair_skin:白皙滤镜\n- food:食物滤镜\n"),
callback_args: Optional[str] = Field(None, description="可选,回调参数"),
client_token: Optional[str] = Field(None, description="可选,用于幂等,默认幂等,用户可根据需求进行调整"),
*,
ctx: Context,
) -> dict:
"""为视频添加指定滤镜效果,输出mp4,分辨率与原片一致。"""
try:
result = client.call(api_name="apply_video_filter", video_url=video_url, filter_style=filter_style, callback_args=callback_args, client_token=client_token)
return async_task_response(result)
except Exception as exc:
return error_response(str(exc))

@mcp.tool(name="fade_audio", description="对输入音频实现淡入淡出效果,输出 mp3。 使用 task_id, 调用 query_task 方法获取结果")
async def fade_audio(
audio_url: str = Field(..., description="输入音频。支持http://xxx或https://xxx格式 URL,支持 mp3、m4a、wav、flac 等格式"),
fade_in_duration: Optional[float] = Field(1, description="声音淡入时长。单位:秒,可传小数(最多3位小数)。0 表示不淡入。"),
fade_out_duration: Optional[float] = Field(1, description="声音淡出时长。单位:秒,可传小数(最多3位小数)。0 表示不淡出。"),
callback_args: Optional[str] = Field(None, description="可选,回调参数"),
client_token: Optional[str] = Field(None, description="可选,用于幂等,默认幂等,用户可根据需求进行调整"),
*,
ctx: Context,
) -> dict:
"""对输入音频实现淡入淡出效果,输出 mp3。"""
try:
result = client.call(api_name="fade_audio", audio_url=audio_url, fade_in_duration=fade_in_duration, fade_out_duration=fade_out_duration, callback_args=callback_args, client_token=client_token)
return async_task_response(result)
except Exception as exc:
return error_response(str(exc))

@mcp.tool(name="fade_video_audio", description="对输入视频的声轨实现淡入淡出效果。\n输出 mp4,分辨率与原片一致。 使用 task_id, 调用 query_task 方法获取结果")
async def fade_video_audio(
video_url: str = Field(..., description="输入视频。支持http://xxx或https://xxx格式 URL,支持 mp4、mov、flv、ts、avi、wmv、mkv 等格式,最高 4K"),
fade_in_duration: Optional[float] = Field(1, description="声音淡入时长。单位:秒,可传小数(最多3位小数)。0 表示不淡入。"),
fade_out_duration: Optional[float] = Field(1, description="声音淡出时长。单位:秒,可传小数(最多3位小数)。0 表示不淡出。"),
callback_args: Optional[str] = Field(None, description="可选,回调参数"),
client_token: Optional[str] = Field(None, description="可选,用于幂等,默认幂等,用户可根据需求进行调整"),
*,
ctx: Context,
) -> dict:
"""对输入视频的声轨实现淡入淡出效果。
输出 mp4,分辨率与原片一致。"""
try:
result = client.call(api_name="fade_video_audio", video_url=video_url, fade_in_duration=fade_in_duration, fade_out_duration=fade_out_duration, callback_args=callback_args, client_token=client_token)
return async_task_response(result)
except Exception as exc:
return error_response(str(exc))

@mcp.tool(name="mix_audio", description="将多个音频文件(如背景音乐、音效、人声)进行混音,生成一个新的音频文件。\n处理耗时:处理耗时与视频时长正相关。视频时长越长,处理耗时越长。平均 RTF(处理耗时/原片时长)为 1。\n输出音频的时长以最长的音频为准。\n输出视频格式:mp3 使用 task_id, 调用 query_task 方法获取结果")
async def mix_audio(
audio_urls: List[str] = Field(..., description="待混合的音频列表,Array<string>类型。最少传入1个,最多传入100个。\n子项说明:待混合的输入音频。支持http://xxx或https://xxx格式 URL,支持 mp3、wav、flac 等格式"),
callback_args: Optional[str] = Field(None, description="可选,回调参数"),
client_token: Optional[str] = Field(None, description="可选,用于幂等,默认幂等,用户可根据需求进行调整"),
*,
ctx: Context,
) -> dict:
"""将多个音频文件(如背景音乐、音效、人声)进行混音,生成一个新的音频文件。
处理耗时:处理耗时与视频时长正相关。视频时长越长,处理耗时越长。平均 RTF(处理耗时/原片时长)为 1。
输出音频的时长以最长的音频为准。
输出视频格式:mp3"""
try:
result = client.call(api_name="mix_audio", audio_urls=audio_urls, callback_args=callback_args, client_token=client_token)
return async_task_response(result)
except Exception as exc:
return error_response(str(exc))

if hasattr(mcp, "register_domain_tools"):
mcp.register_domain_tools("editing", TOOL_NAMES)
Loading
Loading