from langchain import PromptTemplate, LLMChain
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
from transformers import AutoModel, pipeline
from langchain import HuggingFacePipeline
from langchain import PromptTemplate
model_path = r'model\llama_model_4bit'if torch.cuda.is_available():
print(torch.cuda.device_count())
device = torch.device("cuda:0"if torch.cuda.is_available() else"cpu")
print(device)
else:
print('没有GPU')
tokenizer = AutoTokenizer.from_pretrained(model_path, use_fast=False)
if model_path.endswith("4bit"):
model = AutoModelForCausalLM.from_pretrained(
model_path,
load_in_4bit=True,
torch_dtype=torch.float16,
device_map='auto'
)
elif model_path.endswith("8bit"):
model = AutoModelForCausalLM.from_pretrained(
model_path,
load_in_8bit=True,
torch_dtype=torch.float16,
device_map='auto'
)
else:
model = AutoModelForCausalLM.from_pretrained(model_path).half().cuda()
pipe = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_length=512,
top_p=1,
repetition_penalty=1.15
)
llama_model = HuggingFacePipeline(pipeline=pipe)
template = '''
#context#
You are a good helpful, respectful and honest assistant.You are ready for answering human's question and always answer as helpfully as possible, while being safe.
Please ensure that your responses are socially unbiased and positive in nature.
#question#
Human:What is a good name for a company that makes {product}?"
'''
prompt = PromptTemplate(
input_variables=["product"],
template=template
)
chain = LLMChain(llm=llama_model, prompt=prompt)
chain.run("running shoes")
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 全程不用写代码,我用AI程序员写了一个飞机大战
· DeepSeek 开源周回顾「GitHub 热点速览」
· 记一次.NET内存居高不下排查解决与启示
· MongoDB 8.0这个新功能碉堡了,比商业数据库还牛
· 白话解读 Dapr 1.15:你的「微服务管家」又秀新绝活了