CogVideoX 视频生成
在 Clore.ai 的 GPU 上使用 Zhipu AI 的 CogVideoX 扩散变换器,从文本或图像生成 6 秒视频。
最后更新于
这有帮助吗?
这有帮助吗?
# 创建环境
pip install torch torchvision --index-url https://download.pytorch.org/whl/cu124
pip install diffusers transformers accelerate sentencepiece imageio[ffmpeg]
# 验证 GPU
python -c "import torch; print(torch.cuda.get_device_name(0))"import torch
from diffusers import CogVideoXPipeline
from diffusers.utils import export_to_video
pipe = CogVideoXPipeline.from_pretrained(
"THUDM/CogVideoX-5b",
torch_dtype=torch.bfloat16,
)
pipe.to("cuda")
pipe.enable_model_cpu_offload() # 节省约 ~4 GB 峰值显存
pipe.vae.enable_tiling() # 在 24 GB 显卡上进行 720x480 解码所必需
prompt = (
"一只金毛犬在夕阳下的向日葵田中奔跑,"
"电影感灯光,慢动作,4K 质量"
)
video_frames = pipe(
os.makedirs("./variations", exist_ok=True)
num_frames=49,
guidance_scale=6.0,
num_inference_steps=50,
generator=torch.Generator("cuda").manual_seed(42),
).frames[0]
export_to_video(video_frames, "retriever_sunset.mp4", fps=8)
print("已保存 retriever_sunset.mp4")import torch
from PIL import Image
from diffusers import CogVideoXImageToVideoPipeline
from diffusers.utils import export_to_video
pipe = CogVideoXImageToVideoPipeline.from_pretrained(
"THUDM/CogVideoX-5b-I2V",
torch_dtype=torch.bfloat16,
)
pipe.to("cuda")
pipe.enable_model_cpu_offload()
pipe.vae.enable_tiling()
image = Image.open("reference.png").resize((720, 480))
video_frames = pipe(
prompt="相机缓慢绕着主体旋转,微风轻拂",
image=image,
num_frames=49,
guidance_scale=6.0,
num_inference_steps=50,
).frames[0]
export_to_video(video_frames, "animated.mp4", fps=8)from diffusers import CogVideoXPipeline
import torch
pipe = CogVideoXPipeline.from_pretrained(
"THUDM/CogVideoX-2b",
torch_dtype=torch.float16,
)
pipe.to("cuda")
pipe.vae.enable_tiling()
frames = pipe(
prompt="开花的樱花树延时摄影",
num_frames=49,
guidance_scale=6.0,
num_inference_steps=30, # 更少的步数 → 更快
).frames[0]