Spaces:

orangewong
/

wan_t2v_prompt_enhancer

Runtime error

App Files Files Community

wan_t2v_prompt_enhancer / app.py

orangewong

Update app.py

ca4f1e5 verified 9 months ago

raw

history blame contribute delete

3.68 kB


	import gradio as gr
	from transformers import AutoModelForCausalLM, AutoTokenizer
	import torch
	import os
	import spaces

	# 读取增强指令（从 wan_instruction.md 文件中提取）
	def extract_instruction(md_path):
	"""
	从 wan_instruction.md 中提取用于AI视频提示词增强的指令部分。
	这里假设指令部分以 '## Prompt Recipe' 开始，直到 '## Prompt Bank' 结束。
	"""
	if not os.path.exists(md_path):
	return ""
	with open(md_path, "r", encoding="utf-8") as f:
	lines = f.readlines()
	start, end = None, None
	for idx, line in enumerate(lines):
	if "## Prompt Recipe" in line:
	start = idx
	if "## Prompt Bank" in line:
	end = idx
	break
	if start is not None and end is not None:
	# 只取中间部分
	instruction = "".join(lines[start:end])
	else:
	# 如果找不到，取前2000字符兜底
	instruction = "".join(lines[:2000])
	# 额外补充输出要求
	instruction += (
	"\n\n请你作为AI视频提示词增强助手，"
	"根据上述最佳实践，扩展和丰富用户输入的简短视频提示词，使其更详细、更具表现力、更符合AI视频生成的需求。"
	"输出仅为提示词，内容丰富、细致、流畅，适合直接用于AI视频生成。不要输出中文解释。"
	"如用户输入已包含部分要素，可自动补全缺失部分。"
	"输出语言和用户输入的语言保持一致。"
	"\n\n用户输入："
	)
	return instruction

	# 路径可根据实际部署情况调整
	WAN_INSTRUCTION_MD = os.path.join(os.path.dirname(__file__), "wan_instruction.md")
	WAN_INSTRUCTION = extract_instruction(WAN_INSTRUCTION_MD)

	# 加载 deepseek-r1 14b 模型和分词器
	MODEL_NAME = "deepseek-ai/DeepSeek-R1-Distill-Qwen-14B"
	tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)
	model = AutoModelForCausalLM.from_pretrained(
	MODEL_NAME,
	torch_dtype=torch.float16,
	device_map="auto",
	trust_remote_code=True
	)
	@spaces.GPU()
	def enhance_prompt(user_prompt):
	# 构建完整的输入
	full_prompt = WAN_INSTRUCTION + user_prompt.strip() + "\n"
	input_ids = tokenizer(full_prompt, return_tensors="pt").input_ids.to(model.device)
	with torch.no_grad():
	output = model.generate(
	input_ids,
	max_new_tokens=256,
	do_sample=True,
	temperature=0.7,
	top_p=0.95,
	eos_token_id=tokenizer.eos_token_id,
	pad_token_id=tokenizer.eos_token_id
	)
	result = tokenizer.decode(output[0][input_ids.shape[1]:], skip_special_tokens=True)
	return result.strip()

	with gr.Blocks(title="AI视频提示词增强器") as demo:
	gr.Markdown(
	"""
	# AI视频提示词增强器
	输入简短的视频提示词，自动智能扩写为详细、专业、适合AI视频生成的提示词。
	- 基于 deepseek-r1 14b 大模型
	- 增强规则自动读取自 wan_instruction.md(从[官方文档](https://alidocs.dingtalk.com/i/nodes/EpGBa2Lm8aZxe5myC99MelA2WgN7R35y?utm_scene=team_space)提取)
	"""
	)
	with gr.Row():
	with gr.Column():
	user_input = gr.Textbox(label="输入你的视频提示词", lines=4, placeholder="如：A girl dancing on the stage")
	submit_btn = gr.Button("一键增强")
	with gr.Column():
	enhanced_output = gr.Textbox(label="增强后的提示词", lines=10)
	submit_btn.click(enhance_prompt, inputs=user_input, outputs=enhanced_output)

	if __name__ == "__main__":
	demo.launch()