使用Ollama
推荐
应用
查看可支持的模型:https://ollama.com/library
- 查看运行中的模型
ollama ps
- 停止模型
方法1: kill -9 端口号
方法2: 关闭ollama的窗口
- API调用模型
- 启动模型
- 单次调用模型
from ollama import Client
import time
def llm(model, message):
client = Client(host='http://localhost:11434')
response = client.chat(model=model, messages=[
{
'role': 'user',
'content': message,
},
])
return response['message']['content']
start = time.time()
res = llm('qwen2', '您好,请介绍一下自己')
end = time.time()
print(res)
print(f'time: {end-start} s')
- 多次调用模型
import json
import requests
model = "qwen2"
def chat(messages):
r = requests.post(
"http://0.0.0.0:11434/api/chat",
json={"model": model, "messages": messages, "stream": True},
)
r.raise_for_status()
output = ""
for line in r.iter_lines():
body = json.loads(line)
if "error" in body:
raise Exception(body["error"])
if body.get("done") is False:
message = body.get("message", "")
content = message.get("content", "")
output += content
print(content, end="", flush=True)
if body.get("done", False):
message["content"] = output
return message
def main():
messages = []
while True:
user_input = input("Enter a prompt: ")
if not user_input:
exit()
print()
messages.append({"role": "user", "content": user_input})
message = chat(messages)
messages.append(message)
print("\n\n")
if __name__ == "__main__":
main()
- 对接langchain
- 安装langchain
pip install langchain
pip install -U langchain-community
- langchain启动模型
from langchain.llms import Ollama
ollama = Ollama(base_url='http://localhost:11434',model="qwen2")
print(ollama("你好,请介绍自己"))
- 文档问答【RAG】
- 加载文档
LangChain 的 WebBaseLoader 来从任何网页加载文本。
from langchain.document_loaders import WebBaseLoader
loader = WebBaseLoader("https://www.gutenberg.org/files/1727/1727-h/1727-h.htm")
data = loader.load()
- 选择向量库和嵌入模型
向量库:pip install chromadb
嵌入模型:ollama run nomic-embed-text
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import Ollama
# 链接模型
ollama = Ollama(base_url='http://localhost:11434',model="qwen2")
# 加载文档
loader = WebBaseLoader("https://www.ruanyifeng.com/blog/2024/07/weekly-issue-308.html")
data = loader.load()
# print(data)
# 文档分割
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)
# 文档存到向量库
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="nomic-embed-text")
vectorstore = Chroma.from_documents(documents=all_splits, embedding=oembed)
- 通过相似度搜索匹配
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import Ollama
# 链接模型
ollama = Ollama(base_url='http://localhost:11434',model="qwen2")
# 加载文档
loader = WebBaseLoader("https://www.ruanyifeng.com/blog/2024/07/weekly-issue-308.html")
data = loader.load()
# print(data)
# 文档分割
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)
# 文档存到向量库
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="nomic-embed-text")
vectorstore = Chroma.from_documents(documents=all_splits, embedding=oembed)
# 相似度匹配
question="请问找不到工作,还能读博么?"
docs = vectorstore.similarity_search(question)
print(f'匹配长度:{len(docs)}')
print(f'匹配内容:{docs}')
- 将问题和文档的相关部分拼接在一起,叫做链,发送给模型
from langchain.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import OllamaEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain.llms import Ollama
# 链接模型
ollama = Ollama(base_url='http://localhost:11434',model="qwen2")
# 加载文档
loader = WebBaseLoader("https://www.ruanyifeng.com/blog/2024/07/weekly-issue-308.html")
data = loader.load()
# print(data)
# 文档分割
text_splitter=RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=0)
all_splits = text_splitter.split_documents(data)
# 文档存到向量库
oembed = OllamaEmbeddings(base_url="http://localhost:11434", model="nomic-embed-text")
vectorstore = Chroma.from_documents(documents=all_splits, embedding=oembed)
# 相似度匹配
question="请问找不到工作,还能读博么?"
docs = vectorstore.similarity_search(question)
print(f'匹配长度:{len(docs)}')
print(f'匹配内容:{docs}')
# 将搜索结果与问题拼接
qachain=RetrievalQA.from_chain_type(ollama, retriever=vectorstore.as_retriever())
res = qachain.invoke({"query": question})
print(res['result'])