openbmb
/

MiniCPM-o-2_6

Model card Files Files and versions Community

yuzaa commited on 5 days ago

Commit

c8c7670

1 Parent(s): 0893f10

update readme

Browse files

Files changed (2) hide show

README.md +6 -21
assets/demo.wav +3 -0

README.md CHANGED Viewed

@@ -1013,11 +1013,12 @@ def get_video_chunk_content(video_path, flatten=True):
     return contents
 video_path="/path/to/video"
-sys_msg = model.get_sys_prompt(mode='omni', language='en')
 # if use voice clone prompt, please set ref_audio
-# ref_audio_path = '/path/to/ref_audio'
-# ref_audio, _ = librosa.load(ref_audio_path, sr=16000, mono=True)
-# sys_msg = model.get_sys_prompt(ref_audio=ref_audio, mode='omni', language='en')
 contents = get_video_chunk_content(video_path)
 msg = {"role":"user", "content": contents}
@@ -1122,7 +1123,7 @@ res = model.chat(
 <details> <summary>Click to view the Python code for enabling MiniCPM-o 2.6 to interact with you in a specified voice.</summary>
 ```python
-ref_audio, _ = librosa.load('./assert/voice_01.wav', sr=16000, mono=True) # load the reference audio
 # Audio RolePlay:  # With this mode, model will role-play the character based on the audio prompt.
 sys_prompt = model.get_sys_prompt(ref_audio=ref_audio, mode='audio_roleplay', language='en')
@@ -1135,14 +1136,10 @@ user_question = {'role': 'user', 'content': [librosa.load('xxx.wav', sr=16000, m
 ```python
 msgs = [sys_prompt, user_question]
 res = model.chat(
-    image=None,
     msgs=msgs,
-    context=None,
     tokenizer=tokenizer,
     sampling=True,
     max_new_tokens=128,
-    stream=False,
-    stream_input=True,
     use_tts_template=True,
     generate_audio=True,
     temperature=0.3,
@@ -1154,14 +1151,10 @@ history = msgs.append({'role': 'assistant', 'content': res})
 user_question = {'role': 'user', 'content': [librosa.load('xxx.wav', sr=16000, mono=True)[0]]}
 msgs = history.append(user_question)
 res = model.chat(
-    image=None,
     msgs=msgs,
-    context=None,
     tokenizer=tokenizer,
     sampling=True,
     max_new_tokens=128,
-    stream=False,
-    stream_input=True,
     use_tts_template=True,
     generate_audio=True,
     temperature=0.3,
@@ -1193,14 +1186,10 @@ audio_input, _ = librosa.load('xxx.wav', sr=16000, mono=True)
 msgs = [{'role': 'user', 'content': [task_prompt,audio_input]}]
 res = model.chat(
-    image=None,
     msgs=msgs,
-    context=None,
     tokenizer=tokenizer,
     sampling=True,
     max_new_tokens=128,
-    stream=False,
-    stream_input=True,
     use_tts_template=True,
     generate_audio=True,
     temperature=0.3,
@@ -1230,14 +1219,10 @@ msgs = [{'role': 'user', 'content': [task_prompt]}] # you can try to use the sam
 msgs = [sys_prompt, user_question]
 res = model.chat(
-    image=None,
     msgs=msgs,
-    context=None,
     tokenizer=tokenizer,
     sampling=True,
     max_new_tokens=128,
-    stream=False,
-    stream_input=True,
     use_tts_template=True,
     generate_audio=True,
     temperature=0.3,

     return contents
 video_path="/path/to/video"
 # if use voice clone prompt, please set ref_audio
+ref_audio_path = 'assets/demo.wav'
+ref_audio, _ = librosa.load(ref_audio_path, sr=16000, mono=True)
+sys_msg = model.get_sys_prompt(ref_audio=ref_audio, mode='omni', language='en')
+# or use default prompt
+# sys_msg = model.get_sys_prompt(mode='omni', language='en')
 contents = get_video_chunk_content(video_path)
 msg = {"role":"user", "content": contents}
 <details> <summary>Click to view the Python code for enabling MiniCPM-o 2.6 to interact with you in a specified voice.</summary>
 ```python
+ref_audio, _ = librosa.load('assets/demo.wav', sr=16000, mono=True) # load the reference audio
 # Audio RolePlay:  # With this mode, model will role-play the character based on the audio prompt.
 sys_prompt = model.get_sys_prompt(ref_audio=ref_audio, mode='audio_roleplay', language='en')
 ```python
 msgs = [sys_prompt, user_question]
 res = model.chat(
     msgs=msgs,
     tokenizer=tokenizer,
     sampling=True,
     max_new_tokens=128,
     use_tts_template=True,
     generate_audio=True,
     temperature=0.3,
 user_question = {'role': 'user', 'content': [librosa.load('xxx.wav', sr=16000, mono=True)[0]]}
 msgs = history.append(user_question)
 res = model.chat(
     msgs=msgs,
     tokenizer=tokenizer,
     sampling=True,
     max_new_tokens=128,
     use_tts_template=True,
     generate_audio=True,
     temperature=0.3,
 msgs = [{'role': 'user', 'content': [task_prompt,audio_input]}]
 res = model.chat(
     msgs=msgs,
     tokenizer=tokenizer,
     sampling=True,
     max_new_tokens=128,
     use_tts_template=True,
     generate_audio=True,
     temperature=0.3,
 msgs = [sys_prompt, user_question]
 res = model.chat(
     msgs=msgs,
     tokenizer=tokenizer,
     sampling=True,
     max_new_tokens=128,
     use_tts_template=True,
     generate_audio=True,
     temperature=0.3,

assets/demo.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d0b347d8ed0b2314c0d175fbdac79f6f3f91a6402bd7492ac5c860646a2ba309
+size 1454196