YAML Metadata Warning: empty or missing yaml metadata in repo card (https://huggingface.co/docs/hub/model-cards#model-card-metadata)

Installtion

pip install git+https://github.com/huggingface/diffusers.git peft transformers torch sentencepiece opencv-python

Example

LandScape Example

from diffusers import MochiPipeline
from diffusers.utils import export_to_video
import torch

pipe = MochiPipeline.from_pretrained("genmo/mochi-1-preview", torch_dtype = torch.float16)
pipe.load_lora_weights("svjack/mochi_game_mix_early_lora")
pipe.enable_model_cpu_offload()
pipe.enable_sequential_cpu_offload()
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()

i = 50
generator = torch.Generator("cpu").manual_seed(i) 
pipeline_args = {
        "prompt": "The video presents a tranquil scene of a small, isolated island with a rocky outcrop. The island is covered in lush greenery and dotted with vibrant pink cherry blossom trees in full bloom. A traditional-style building with a pagoda-like roof stands prominently on the highest point of the island, suggesting a cultural or historical significance. The sky above is a gradient of soft pastel colors, transitioning from light blue to pink, indicating either dawn or dusk. The water surrounding the island is calm, reflecting the colors of the sky and the island's features. There are no visible people or moving objects, giving the scene a serene and untouched quality.",
        "guidance_scale": 6.0,
        "num_inference_steps": 64,
        "height": 480,
        "width": 848,
        "max_sequence_length": 1024,
        "output_type": "np",
        "num_frames": 19,
        "generator": generator
    }
    
video = pipe(**pipeline_args).frames[0]
export_to_video(video, "Island_scene_mix.mp4")
from IPython import display 
display.clear_output(wait = True)
display.Video("Island_scene_mix.mp4")
  • Without lora

  • With lora

Character Example

prompt = "The video opens with a close-up of a woman in a white and purple outfit, holding a glowing purple butterfly. She has dark hair and walks gracefully through a traditional Japanese-style village at night."
i = 50
generator = torch.Generator("cpu").manual_seed(i) 
pipeline_args = {
        "prompt": prompt,
        "guidance_scale": 6.0,
        "num_inference_steps": 64,
        "height": 480,
        "width": 848,
        "max_sequence_length": 1024,
        "output_type": "np",
        "num_frames": 19,
        "generator": generator
    }
    
video = pipe(**pipeline_args).frames[0]
export_to_video(video, "char_scene_mix_{}.mp4".format(i))
from IPython import display 
display.clear_output(wait = True)
display.Video("char_scene_mix_{}.mp4".format(i))
  • Without lora

  • With lora

STG Example (use STG make better video)

Additional Installtion

git clone https://github.com/svjack/STGuidance
cd STGuidance/diffusers/mochi

LandScape Example

import torch
from pipeline_stg_mochi import MochiSTGPipeline
from diffusers.utils import export_to_video
import os

# Load the pipeline
pipe = MochiSTGPipeline.from_pretrained("genmo/mochi-1-preview", variant="bf16", torch_dtype=torch.bfloat16)

pipe.load_lora_weights("svjack/mochi_game_mix_early_lora")
pipe.enable_model_cpu_offload()
pipe.enable_sequential_cpu_offload()
pipe.vae.enable_slicing()
pipe.vae.enable_tiling()
pipe.enable_vae_tiling()
#pipe = pipe.to("cuda")

#--------Option--------#
prompt = "The video presents a tranquil scene of a small, isolated island with a rocky outcrop. The island is covered in lush greenery and dotted with vibrant pink cherry blossom trees in full bloom. A traditional-style building with a pagoda-like roof stands prominently on the highest point of the island, suggesting a cultural or historical significance. The sky above is a gradient of soft pastel colors, transitioning from light blue to pink, indicating either dawn or dusk. The water surrounding the island is calm, reflecting the colors of the sky and the island's features. There are no visible people or moving objects, giving the scene a serene and untouched quality."
stg_mode = "STG-R" 
stg_applied_layers_idx = [35]
stg_scale = 0.8 # 0.0 for CFG (default)
do_rescaling = True # False (default)
#----------------------#

# Generate video frames
frames = pipe(
    prompt, 
    num_frames=84,
    stg_mode=stg_mode,
    stg_applied_layers_idx=stg_applied_layers_idx,
    stg_scale=stg_scale,
    do_rescaling=do_rescaling
).frames[0]

export_to_video(frames, "Island_scene_mix_stg.mp4")
from IPython import display 
display.clear_output(wait = True)
display.Video("Island_scene_mix_stg.mp4")
  • With lora and STG

Downloads last month

-

Downloads are not tracked for this model. How to track
Inference API
Unable to determine this model's library. Check the docs .