Skip to content

Commit a23bdcd

Browse files
committed
Merge branch 'vllm' - vLLM inference engine + streaming WebSocket service
Features: - AutoModelVLLM: generic vLLM inference for all LLM-based ASR models - serve_realtime_ws.py: streaming WebSocket ASR with VAD+SPK+hotwords - DynamicStreamingVAD: dynamic silence threshold (default on for all models) - Fun-ASR-Nano batch_size fix for VAD pipeline - Comprehensive vLLM documentation (docs/vllm_guide.md)
2 parents 47e1c64 + d008cbf commit a23bdcd

16 files changed

Lines changed: 3929 additions & 10 deletions

File tree

docs/vllm_guide.md

Lines changed: 602 additions & 0 deletions
Large diffs are not rendered by default.
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
<!DOCTYPE html>
2+
<html lang="zh-CN">
3+
<head>
4+
<meta charset="UTF-8">
5+
<title>Fun-ASR-Nano · Streaming ASR with Speaker Diarization</title>
6+
<link href="https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700;800;900&display=swap" rel="stylesheet">
7+
<style>
8+
* { margin:0; padding:0; box-sizing:border-box; }
9+
html, body { height:100%; }
10+
body { font-family:'Inter',system-ui,sans-serif; background:#080810; color:#e0e0e0; display:flex; justify-content:center; height:100vh; overflow:hidden; }
11+
12+
/* === Hero Section === */
13+
.hero { padding:24px 28px 16px; flex-shrink:0; }
14+
.hero-top { display:flex; align-items:center; gap:16px; margin-bottom:14px; }
15+
.logo { font-size:30px; font-weight:900; letter-spacing:-1px; }
16+
.logo .g { background:linear-gradient(135deg,#64ffda,#00bfa5); -webkit-background-clip:text; -webkit-text-fill-color:transparent; }
17+
.logo .w { color:#fff; }
18+
.logo .s { color:rgba(255,255,255,0.3); font-weight:400; font-size:15px; margin-left:8px; }
19+
.hero-links { margin-left:auto; display:flex; gap:10px; }
20+
.hero-links a { font-size:12px; color:#aaa; text-decoration:none; padding:6px 14px; border-radius:8px; background:rgba(255,255,255,0.04); border:1px solid rgba(255,255,255,0.08); transition:all 0.2s; font-weight:500; }
21+
.hero-links a:hover { color:#64ffda; border-color:rgba(100,255,218,0.3); }
22+
23+
.features { display:flex; gap:10px; flex-wrap:wrap; margin-bottom:14px; }
24+
.feat { font-size:11px; padding:5px 12px; border-radius:20px; font-weight:500; }
25+
.feat-green { color:#64ffda; background:rgba(100,255,218,0.08); border:1px solid rgba(100,255,218,0.2); }
26+
.feat-yellow { color:#fbbf24; background:rgba(251,191,36,0.08); border:1px solid rgba(251,191,36,0.2); }
27+
.feat-purple { color:#a78bfa; background:rgba(167,139,250,0.08); border:1px solid rgba(167,139,250,0.2); }
28+
.feat-blue { color:#38bdf8; background:rgba(56,189,248,0.08); border:1px solid rgba(56,189,248,0.2); }
29+
30+
.hero-desc { font-size:13px; color:rgba(255,255,255,0.45); line-height:1.8; }
31+
.hero-desc b { color:rgba(255,255,255,0.75); font-weight:600; }
32+
.hero-desc a { color:#64ffda; text-decoration:none; font-weight:600; }
33+
.hero-desc a:hover { text-decoration:underline; }
34+
35+
/* === Main Panel === */
36+
.main { flex:1; display:flex; flex-direction:column; padding:0 28px 20px; min-height:0; }
37+
.panel { flex:1; display:flex; flex-direction:column; background:rgba(16,16,24,0.95); border:1px solid rgba(255,255,255,0.05); border-radius:16px; box-shadow:0 10px 40px rgba(0,0,0,0.4); min-height:0; }
38+
39+
.controls { padding:14px 20px; display:flex; align-items:center; gap:12px; border-bottom:1px solid rgba(255,255,255,0.04); flex-shrink:0; }
40+
.controls input[type=text] { background:rgba(255,255,255,0.05); border:1px solid rgba(255,255,255,0.1); border-radius:8px; padding:8px 12px; color:#ccc; font-size:12px; width:180px; outline:none; }
41+
.controls input[type=text]:focus { border-color:rgba(100,255,218,0.4); }
42+
.controls input[type=file] { display:none; }
43+
.controls label { font-size:11px; color:#888; display:flex; align-items:center; gap:5px; cursor:pointer; }
44+
.controls input[type=checkbox] { accent-color:#64ffda; }
45+
.btn { padding:8px 18px; border:none; border-radius:10px; font-size:12px; cursor:pointer; font-weight:600; transition:all 0.12s; }
46+
.btn:hover { transform:scale(1.04); }
47+
.btn:active { transform:scale(0.97); }
48+
.btn-mic { background:linear-gradient(135deg,#64ffda,#00bfa5); color:#080810; }
49+
.btn-file { background:linear-gradient(135deg,#fbbf24,#f59e0b); color:#1a1a2e; }
50+
.btn-hw { background:rgba(167,139,250,0.12); color:#a78bfa; border:1px solid rgba(167,139,250,0.3); }
51+
.btn-stop { background:linear-gradient(135deg,#ef4444,#dc2626); color:#fff; }
52+
.sta { margin-left:auto; display:flex; align-items:center; gap:6px; }
53+
.dot { width:7px; height:7px; border-radius:50%; background:#222; }
54+
.dot.on { background:#64ffda; box-shadow:0 0 10px rgba(100,255,218,0.6); animation:g 1.5s infinite; }
55+
@keyframes g { 0%,100%{opacity:1} 50%{opacity:0.3} }
56+
.sta span { font-size:10px; color:#555; }
57+
.hw-info { font-size:10px; color:#a78bfa; margin-left:2px; }
58+
59+
.result { flex:1; overflow-y:auto; padding:18px 22px; min-height:0; }
60+
.result::-webkit-scrollbar { width:4px; }
61+
.result::-webkit-scrollbar-thumb { background:rgba(255,255,255,0.06); border-radius:2px; }
62+
.line { padding:6px 0; border-bottom:1px solid rgba(255,255,255,0.02); display:flex; align-items:baseline; gap:0; word-break:break-all; }
63+
.time { color:rgba(100,255,218,0.5); font-size:11px; font-family:'SF Mono','Menlo',monospace; width:100px; min-width:100px; flex-shrink:0; padding-top:2px; }
64+
.spk { font-size:10px; font-weight:600; padding:2px 8px; border-radius:5px; flex-shrink:0; margin-right:10px; }
65+
.text { color:rgba(255,255,255,0.88); font-size:15px; line-height:1.7; flex:1; }
66+
.partial .text { color:rgba(255,255,255,0.25); font-style:italic; }
67+
.ph { color:rgba(255,255,255,0.1); text-align:center; padding:50px 20px; font-size:14px; line-height:2; }
68+
</style>
69+
</head>
70+
<body><div style="width:920px;height:100%;display:flex;flex-direction:column">
71+
72+
<div class="hero">
73+
<div class="hero-top">
74+
<div class="logo"><span class="g">Fun-ASR-Nano</span><span class="s">vLLM Engine</span></div>
75+
<div class="hero-links">
76+
<a href="https://github.com/modelscope/FunASR" target="_blank">GitHub</a>
77+
<a href="https://modelscope.cn/models/FunAudioLLM/Fun-ASR-Nano-2512" target="_blank">ModelScope</a>
78+
<a href="https://huggingface.co/FunAudioLLM/Fun-ASR-Nano-2512" target="_blank">HuggingFace</a>
79+
</div>
80+
</div>
81+
<div class="features">
82+
<span class="feat feat-green">Streaming ASR</span>
83+
<span class="feat feat-yellow">Speaker Diarization <span style="font-size:9px;opacity:0.7">(Beta)</span></span>
84+
<span class="feat feat-purple">Hotword Customization</span>
85+
<span class="feat feat-blue">31 Languages · 7 Dialects</span>
86+
</div>
87+
<p class="hero-desc">
88+
基于 <b>FunASR</b><b>vLLM 推理引擎</b>,实现流式语音识别服务。支持实时 VAD 分句、说话人分离 <span style="color:#f59e0b;font-size:11px">(Beta)</span><b>热词定制化</b>(加载人名、地名等实体词列表,提升专有名词识别准确率)、31种语言及中文方言。所有代码与模型已全部开源。<br>
89+
<span style="font-size:12px">Streaming ASR with vLLM engine, real-time VAD, speaker diarization <span style="color:#f59e0b">(Beta)</span>, <b>hotword customization</b> (names, places, entities), 31 languages &amp; Chinese dialects. Fully open-sourced.</span>
90+
&nbsp;·&nbsp; <a href="https://www.funasr.com" target="_blank">www.funasr.com</a>
91+
</p>
92+
</div>
93+
94+
<div class="main">
95+
<div class="panel">
96+
<div class="controls">
97+
<input type="text" id="serverUrl" value="ws://localhost:10095">
98+
<label><input type="checkbox" id="showSpk" checked> Speaker <span style="font-size:9px;color:#f59e0b;background:rgba(245,158,11,0.08);padding:1px 5px;border-radius:4px">Beta</span></label>
99+
<button class="btn btn-mic" id="btnMic" onclick="startMic()">Mic</button>
100+
<input type="file" id="fileInput" accept="audio/*,.wav,.mp3,.flac,.mp4,.m4a">
101+
<button class="btn btn-file" id="btnFile" onclick="document.getElementById('fileInput').click()">Audio File</button>
102+
<input type="file" id="hotwordFile" accept=".txt" style="display:none">
103+
<button class="btn btn-hw" id="btnHw" onclick="document.getElementById('hotwordFile').click()">Hotwords</button>
104+
<span class="hw-info" id="hwInfo"></span>
105+
<select id="langSelect" style="background:rgba(255,255,255,0.05);border:1px solid rgba(255,255,255,0.1);border-radius:8px;padding:6px 10px;color:#ccc;font-size:11px;outline:none">
106+
<option value="">Auto</option>
107+
<option value="中文">中文</option>
108+
<option value="English">English</option>
109+
<option value="日本語">日本語</option>
110+
<option value="한국어">한국어</option>
111+
<option value="Deutsch">Deutsch</option>
112+
<option value="Français">Français</option>
113+
<option value="Español">Español</option>
114+
<option value="Русский">Русский</option>
115+
<option value="العربية">العربية</option>
116+
<option value="Português">Português</option>
117+
<option value="Italiano">Italiano</option>
118+
</select>
119+
<button class="btn btn-stop" id="btnStop" onclick="stopAll()" style="display:none">Stop</button>
120+
<div class="sta"><div class="dot" id="dot"></div><span id="status">Ready</span></div>
121+
</div>
122+
<div class="result" id="resultBox">
123+
<div class="ph">Click <b>Mic</b> for real-time recognition or <b>Audio File</b> to transcribe a file<br>Load a <b>Hotwords</b> file (.txt, one word per line) to boost recognition of names, places &amp; entities</div>
124+
</div>
125+
</div>
126+
</div>
127+
128+
<input type="text" id="hotwords" style="display:none">
129+
<script>
130+
let ws=null,mediaStream=null,audioContext=null,processor=null,isRecording=false;
131+
const C=['#64ffda','#f472b6','#fbbf24','#34d399','#a78bfa','#fb923c','#67e8f9','#f87171','#38bdf8','#c084fc'];
132+
133+
document.getElementById('hotwordFile').addEventListener('change',function(e){
134+
const f=e.target.files[0];if(!f)return;
135+
const r=new FileReader();r.onload=ev=>{
136+
const words=ev.target.result.split('\n').map(l=>l.trim()).filter(l=>l);
137+
document.getElementById('hotwords').value=words.join(',');
138+
document.getElementById('hwInfo').textContent=words.length+' words loaded';
139+
setS('Hotwords: '+words.length+' loaded');
140+
};r.readAsText(f);
141+
});
142+
document.getElementById('fileInput').addEventListener('change',function(){if(this.files.length)startFile();});
143+
144+
function setS(m,on){document.getElementById('status').textContent=m;document.getElementById('dot').className=on?'dot on':'dot';}
145+
function sStop(){document.getElementById('btnMic').style.display='none';document.getElementById('btnFile').style.display='none';document.getElementById('btnHw').style.display='none';document.getElementById('btnStop').style.display='inline';}
146+
function sStart(){document.getElementById('btnMic').style.display='inline';document.getElementById('btnFile').style.display='inline';document.getElementById('btnHw').style.display='inline';document.getElementById('btnStop').style.display='none';}
147+
function render(ss,p,ps,d,f){
148+
const b=document.getElementById('resultBox'),sk=document.getElementById('showSpk').checked;let h='';
149+
ss.forEach(s=>{let st=s.start!==undefined?s.start:(s.start_ms||0),en=s.end!==undefined?s.end:(s.end_ms||0),sp=s.spk!==undefined?s.spk:-1,sh='';
150+
if(sk&&sp>=0){let c=C[sp%C.length];sh='<span class="spk" style="color:'+c+';background:'+c+'12;border:1px solid '+c+'30">SPK'+sp+'</span>';}
151+
h+='<div class="line"><span class="time">'+(st/1000).toFixed(1)+' - '+(en/1000).toFixed(1)+'s</span>'+sh+'<span class="text">'+s.text+'</span></div>';});
152+
if(p)h+='<div class="line partial"><span class="time">'+(ps/1000).toFixed(1)+'s ...</span><span class="text">'+p+'</span></div>';
153+
if(!h)h='<div class="ph">Listening...</div>';b.innerHTML=h;b.scrollTop=b.scrollHeight;}
154+
function con(cb){ws=new WebSocket(document.getElementById('serverUrl').value);ws.onopen=()=>{ws.send('START');var hw=document.getElementById('hotwords').value.trim();if(hw)ws.send('HOTWORDS:'+hw);var lang=document.getElementById('langSelect').value;if(lang)ws.send('LANGUAGE:'+lang);cb();};ws.onmessage=e=>{const d=JSON.parse(e.data);if(d.sentences!==undefined)render(d.sentences,d.partial,d.partial_start_ms,d.duration_ms,d.is_final);};ws.onerror=()=>setS('Error');ws.onclose=()=>{if(isRecording)stopAll();};}
155+
function startMic(){con(async()=>{setS('Recording',true);sStop();document.getElementById('resultBox').innerHTML='<div class="ph">Listening...</div>';try{mediaStream=await navigator.mediaDevices.getUserMedia({audio:{sampleRate:16000,channelCount:1,echoCancellation:true}});}catch(e){setS('Mic denied');ws.close();sStart();return;}audioContext=new AudioContext({sampleRate:16000});const s=audioContext.createMediaStreamSource(mediaStream);processor=audioContext.createScriptProcessor(4096,1,1);processor.onaudioprocess=e=>{if(!isRecording)return;const f=e.inputBuffer.getChannelData(0),i=new Int16Array(f.length);for(let j=0;j<f.length;j++)i[j]=Math.max(-32768,Math.min(32767,Math.round(f[j]*32768)));if(ws&&ws.readyState===1)ws.send(i.buffer);};s.connect(processor);processor.connect(audioContext.destination);isRecording=true;});}
156+
function startFile(){const fi=document.getElementById('fileInput');if(!fi.files.length){setS('Pick a file');return;}const file=fi.files[0];setS('Decoding...');document.getElementById('resultBox').innerHTML='<div class="ph">Decoding '+file.name+'...</div>';const r=new FileReader();r.onload=async e=>{const a=new AudioContext({sampleRate:16000});let buf;try{buf=await a.decodeAudioData(e.target.result);}catch(err){setS('Decode failed');return;}const p=buf.getChannelData(0),i=new Int16Array(p.length);for(let j=0;j<p.length;j++)i[j]=Math.max(-32768,Math.min(32767,Math.round(p[j]*32768)));a.close();con(async()=>{setS('Streaming '+(p.length/16000|0)+'s',true);sStop();isRecording=true;for(let j=0;j<i.length&&isRecording;j+=4096){if(ws&&ws.readyState===1)ws.send(i.slice(j,j+4096).buffer);await new Promise(r=>setTimeout(r,50));}if(isRecording)stopAll();});};r.readAsArrayBuffer(file);}
157+
function stopAll(){isRecording=false;if(ws&&ws.readyState===1)ws.send('STOP');if(processor){processor.disconnect();processor=null;}if(audioContext){audioContext.close();audioContext=null;}if(mediaStream){mediaStream.getTracks().forEach(t=>t.stop());mediaStream=null;}setTimeout(()=>{if(ws){ws.close();ws=null;}},2000);sStart();setS('Done');}
158+
</script>
159+
</div></body>
160+
</html>

0 commit comments

Comments
 (0)