yuzaa commited on
Commit
c8c7670
·
1 Parent(s): 0893f10

update readme

Browse files
Files changed (2) hide show
  1. README.md +6 -21
  2. assets/demo.wav +3 -0
README.md CHANGED
@@ -1013,11 +1013,12 @@ def get_video_chunk_content(video_path, flatten=True):
1013
  return contents
1014
 
1015
  video_path="/path/to/video"
1016
- sys_msg = model.get_sys_prompt(mode='omni', language='en')
1017
  # if use voice clone prompt, please set ref_audio
1018
- # ref_audio_path = '/path/to/ref_audio'
1019
- # ref_audio, _ = librosa.load(ref_audio_path, sr=16000, mono=True)
1020
- # sys_msg = model.get_sys_prompt(ref_audio=ref_audio, mode='omni', language='en')
 
 
1021
 
1022
  contents = get_video_chunk_content(video_path)
1023
  msg = {"role":"user", "content": contents}
@@ -1122,7 +1123,7 @@ res = model.chat(
1122
  <details> <summary>Click to view the Python code for enabling MiniCPM-o 2.6 to interact with you in a specified voice.</summary>
1123
 
1124
  ```python
1125
- ref_audio, _ = librosa.load('./assert/voice_01.wav', sr=16000, mono=True) # load the reference audio
1126
 
1127
  # Audio RolePlay: # With this mode, model will role-play the character based on the audio prompt.
1128
  sys_prompt = model.get_sys_prompt(ref_audio=ref_audio, mode='audio_roleplay', language='en')
@@ -1135,14 +1136,10 @@ user_question = {'role': 'user', 'content': [librosa.load('xxx.wav', sr=16000, m
1135
  ```python
1136
  msgs = [sys_prompt, user_question]
1137
  res = model.chat(
1138
- image=None,
1139
  msgs=msgs,
1140
- context=None,
1141
  tokenizer=tokenizer,
1142
  sampling=True,
1143
  max_new_tokens=128,
1144
- stream=False,
1145
- stream_input=True,
1146
  use_tts_template=True,
1147
  generate_audio=True,
1148
  temperature=0.3,
@@ -1154,14 +1151,10 @@ history = msgs.append({'role': 'assistant', 'content': res})
1154
  user_question = {'role': 'user', 'content': [librosa.load('xxx.wav', sr=16000, mono=True)[0]]}
1155
  msgs = history.append(user_question)
1156
  res = model.chat(
1157
- image=None,
1158
  msgs=msgs,
1159
- context=None,
1160
  tokenizer=tokenizer,
1161
  sampling=True,
1162
  max_new_tokens=128,
1163
- stream=False,
1164
- stream_input=True,
1165
  use_tts_template=True,
1166
  generate_audio=True,
1167
  temperature=0.3,
@@ -1193,14 +1186,10 @@ audio_input, _ = librosa.load('xxx.wav', sr=16000, mono=True)
1193
  msgs = [{'role': 'user', 'content': [task_prompt,audio_input]}]
1194
 
1195
  res = model.chat(
1196
- image=None,
1197
  msgs=msgs,
1198
- context=None,
1199
  tokenizer=tokenizer,
1200
  sampling=True,
1201
  max_new_tokens=128,
1202
- stream=False,
1203
- stream_input=True,
1204
  use_tts_template=True,
1205
  generate_audio=True,
1206
  temperature=0.3,
@@ -1230,14 +1219,10 @@ msgs = [{'role': 'user', 'content': [task_prompt]}] # you can try to use the sam
1230
 
1231
  msgs = [sys_prompt, user_question]
1232
  res = model.chat(
1233
- image=None,
1234
  msgs=msgs,
1235
- context=None,
1236
  tokenizer=tokenizer,
1237
  sampling=True,
1238
  max_new_tokens=128,
1239
- stream=False,
1240
- stream_input=True,
1241
  use_tts_template=True,
1242
  generate_audio=True,
1243
  temperature=0.3,
 
1013
  return contents
1014
 
1015
  video_path="/path/to/video"
 
1016
  # if use voice clone prompt, please set ref_audio
1017
+ ref_audio_path = 'assets/demo.wav'
1018
+ ref_audio, _ = librosa.load(ref_audio_path, sr=16000, mono=True)
1019
+ sys_msg = model.get_sys_prompt(ref_audio=ref_audio, mode='omni', language='en')
1020
+ # or use default prompt
1021
+ # sys_msg = model.get_sys_prompt(mode='omni', language='en')
1022
 
1023
  contents = get_video_chunk_content(video_path)
1024
  msg = {"role":"user", "content": contents}
 
1123
  <details> <summary>Click to view the Python code for enabling MiniCPM-o 2.6 to interact with you in a specified voice.</summary>
1124
 
1125
  ```python
1126
+ ref_audio, _ = librosa.load('assets/demo.wav', sr=16000, mono=True) # load the reference audio
1127
 
1128
  # Audio RolePlay: # With this mode, model will role-play the character based on the audio prompt.
1129
  sys_prompt = model.get_sys_prompt(ref_audio=ref_audio, mode='audio_roleplay', language='en')
 
1136
  ```python
1137
  msgs = [sys_prompt, user_question]
1138
  res = model.chat(
 
1139
  msgs=msgs,
 
1140
  tokenizer=tokenizer,
1141
  sampling=True,
1142
  max_new_tokens=128,
 
 
1143
  use_tts_template=True,
1144
  generate_audio=True,
1145
  temperature=0.3,
 
1151
  user_question = {'role': 'user', 'content': [librosa.load('xxx.wav', sr=16000, mono=True)[0]]}
1152
  msgs = history.append(user_question)
1153
  res = model.chat(
 
1154
  msgs=msgs,
 
1155
  tokenizer=tokenizer,
1156
  sampling=True,
1157
  max_new_tokens=128,
 
 
1158
  use_tts_template=True,
1159
  generate_audio=True,
1160
  temperature=0.3,
 
1186
  msgs = [{'role': 'user', 'content': [task_prompt,audio_input]}]
1187
 
1188
  res = model.chat(
 
1189
  msgs=msgs,
 
1190
  tokenizer=tokenizer,
1191
  sampling=True,
1192
  max_new_tokens=128,
 
 
1193
  use_tts_template=True,
1194
  generate_audio=True,
1195
  temperature=0.3,
 
1219
 
1220
  msgs = [sys_prompt, user_question]
1221
  res = model.chat(
 
1222
  msgs=msgs,
 
1223
  tokenizer=tokenizer,
1224
  sampling=True,
1225
  max_new_tokens=128,
 
 
1226
  use_tts_template=True,
1227
  generate_audio=True,
1228
  temperature=0.3,
assets/demo.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d0b347d8ed0b2314c0d175fbdac79f6f3f91a6402bd7492ac5c860646a2ba309
3
+ size 1454196