-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathinfer.yaml
More file actions
28 lines (24 loc) · 786 Bytes
/
infer.yaml
File metadata and controls
28 lines (24 loc) · 786 Bytes
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
# ============================== DATA ================================
data:
sr: 16000
chunk_sec: 30.0
# ============================== MODEL ================================
model:
_target_: livi.apps.audio_encoder.models.LiviAudioEncoder
checkpoint_dir: livi/apps/audio_encoder/checkpoints
device: cuda:0
# Whisper encoder
dim_whisper: 1280
whisper_model_name: "openai/whisper-large-v3-turbo"
compile: false
# Attention Pooling
num_heads: 1
mlp_ratio: 2.0
qkv_bias: false
qk_scale: null
drop: 0.0
attn_drop: 0.0
init_scale: 1e-4
# Projection head (audio)
dim_embed: 768
dim_hiddens: [3072, 2048, 2048, 1536]