使用moondream模型识图和目标检测
跟官方示例相同的操作内容
根据https://github.com/vikhyat/moondream/tree/main \ 的样例程序,从https://hf-mirror.com/vikhyatk/moondream2/blob/onnx/moondream-2b-int8.mf.gz \ 下载moondream-2b-int8.mf
import moondream as md
from PIL import Image,ImageDraw,ImageFont
import sys
import os
print(f'python {__file__} img_name(test.jpg) wd(C:/Users/tellw/test)')
cnames = {'aliceblue': '#F0F8FF','antiquewhite': '#FAEBD7','aqua': '#00FFFF','aquamarine': '#7FFFD4','azure': '#F0FFFF','beige': '#F5F5DC','bisque': '#FFE4C4','black': '#000000','blanchedalmond': '#FFEBCD','blue': '#0000FF','blueviolet': '#8A2BE2','brown': '#A52A2A','burlywood': '#DEB887','cadetblue': '#5F9EA0','chartreuse': '#7FFF00','chocolate': '#D2691E','coral': '#FF7F50','cornflowerblue': '#6495ED','cornsilk': '#FFF8DC','crimson': '#DC143C','cyan': '#00FFFF','darkblue': '#00008B','darkcyan': '#008B8B','darkgoldenrod': '#B8860B','darkgray': '#A9A9A9','darkgreen': '#006400','darkkhaki': '#BDB76B','darkmagenta': '#8B008B','darkolivegreen': '#556B2F','darkorange': '#FF8C00','darkorchid': '#9932CC','darkred': '#8B0000','darksalmon': '#E9967A','darkseagreen': '#8FBC8F','darkslateblue': '#483D8B','darkslategray': '#2F4F4F','darkturquoise': '#00CED1','darkviolet': '#9400D3','deeppink': '#FF1493','deepskyblue': '#00BFFF','dimgray': '#696969','dodgerblue': '#1E90FF','firebrick': '#B22222','floralwhite': '#FFFAF0','forestgreen': '#228B22','fuchsia': '#FF00FF','gainsboro': '#DCDCDC','ghostwhite': '#F8F8FF','gold': '#FFD700','goldenrod': '#DAA520','gray': '#808080','green': '#008000','greenyellow': '#ADFF2F','honeydew': '#F0FFF0','hotpink': '#FF69B4','indianred': '#CD5C5C','indigo': '#4B0082','ivory': '#FFFFF0','khaki': '#F0E68C','lavender': '#E6E6FA','lavenderblush': '#FFF0F5','lawngreen': '#7CFC00','lemonchiffon': '#FFFACD','lightblue': '#ADD8E6','lightcoral': '#F08080','lightcyan': '#E0FFFF','lightgoldenrodyellow': '#FAFAD2','lightgreen': '#90EE90','lightgray': '#D3D3D3','lightpink': '#FFB6C1','lightsalmon': '#FFA07A','lightseagreen': '#20B2AA','lightskyblue': '#87CEFA','lightslategray': '#778899','lightsteelblue': '#B0C4DE','lightyellow': '#FFFFE0','lime': '#00FF00','limegreen': '#32CD32','linen': '#FAF0E6','magenta': '#FF00FF','maroon': '#800000','mediumaquamarine': '#66CDAA','mediumblue': '#0000CD','mediumorchid': '#BA55D3','mediumpurple': '#9370DB','mediumseagreen': '#3CB371','mediumslateblue': '#7B68EE','mediumspringgreen': '#00FA9A','mediumturquoise': '#48D1CC','mediumvioletred': '#C71585','midnightblue': '#191970','mintcream': '#F5FFFA','mistyrose': '#FFE4E1','moccasin': '#FFE4B5','navajowhite': '#FFDEAD','navy': '#000080','oldlace': '#FDF5E6','olive': '#808000','olivedrab': '#6B8E23','orange': '#FFA500','orangered': '#FF4500','orchid': '#DA70D6','palegoldenrod': '#EEE8AA','palegreen': '#98FB98','paleturquoise': '#AFEEEE','palevioletred': '#DB7093','papayawhip': '#FFEFD5','peachpuff': '#FFDAB9','peru': '#CD853F','pink': '#FFC0CB','plum': '#DDA0DD','powderblue': '#B0E0E6','purple': '#800080','red': '#FF0000','rosybrown': '#BC8F8F','royalblue': '#4169E1','saddlebrown': '#8B4513','salmon': '#FA8072','sandybrown': '#FAA460','seagreen': '#2E8B57','seashell': '#FFF5EE','sienna': '#A0522D','silver': '#C0C0C0','skyblue': '#87CEEB','slateblue': '#6A5ACD','slategray': '#708090','snow': '#FFFAFA','springgreen': '#00FF7F','steelblue': '#4682B4','tan': '#D2B48C','teal': '#008080','thistle': '#D8BFD8','tomato': '#FF6347','turquoise': '#40E0D0','violet': '#EE82EE','wheat': '#F5DEB3','white': '#FFFFFF','whitesmoke': '#F5F5F5','yellow': '#FFFF00','yellowgreen': '#9ACD32'}
color_table=list(cnames.keys())
color_table.sort()
# Initialize with local model path. Can also read .mf.gz files, but we recommend decompressing
# up-front to avoid decompression overhead every time the model is initialized.
print('loading moondream model')
model = md.vl(model="moondream-2b-int8.mf")
print('starting configurations')
if len(sys.argv)>=2:
img_name=sys.argv[1]
else:
img_name='test.jpg'
if len(sys.argv)>=3:
wd=sys.argv[2]
os.chdir(wd)
else:
wd='C:/Users/tellw/test'
# Load and process image
image = Image.open(img_name)
width,height=image.size
idr=ImageDraw.ImageDraw(image)
font=ImageFont.truetype('MochiyPopOne-Regular.ttf',30)
encoded_image = model.encode_image(image)
color_i=0
while True:
print('''
operations:
1) generate caption
2) generate short caption
3) ask questions
4) detect objects
5) point objects
6) select another image
h) help
q) quit
''')
op=input('>>>')
if op=='1':
# Generate caption
caption = model.caption(encoded_image)["caption"]
print("Caption:", caption)
elif op=='2':
# Generate short caption
caption = model.caption(encoded_image,'short')["caption"]
print("Short caption:", caption)
elif op=='3':
question=input('question:')
# Ask questions
answer = model.query(encoded_image, question)["answer"]
print("Answer:", answer)
elif op=='4':
ob=input('object name:')
bbox=model.detect(encoded_image,ob)
print('bbox:',bbox)
if bbox['objects']:
for obj in bbox['objects']:
idr.rectangle(((int(obj['x_min']*width),int(obj['y_min']*height)),(int(obj['x_max']*width),int(obj['y_max']*height))),fill=None,outline=color_table[color_i],width=8)
idr.text((int(obj['x_min']*width+5),int(obj['y_min']*height+5)),ob,font=font,fill='black')
image.save(f'{img_name.rsplit(".",1)[0]}_{ob}.jpg')
color_i=(color_i+1)%140
else:
print('does not detect',ob)
elif op=='5':
ob=input('object name:')
point=model.point(encoded_image,ob)
print('point:',point)
if point['points']:
for p in point['points']:
# idr.point((int(p['x']*width),int(p['y']*height)),fill=color_table[color_i])
idr.ellipse(((int(p['x']*width-5),int(p['y']*height-5)),(int(p['x']*width+5),int(p['y']*height+5))),fill=color_table[color_i],outline=color_table[color_i],width=5)
idr.text((int(p['x']*width+5),int(p['y']*height+5)),ob,font=font,fill='black')
image.save(f'{img_name.rsplit(".",1)[0]}_{ob}.jpg')
color_i=(color_i+1)%140
else:
print('does not point',ob)
elif op=='6':
img_name=input('img_name:')
wd=input('working directory:')
os.chdir(wd)
image = Image.open(img_name)
width,height=image.size
idr=ImageDraw.ImageDraw(image)
encoded_image = model.encode_image(image)
color_i=0
elif op=='h':
print(f'python {__file__} img_name(test.jpg) wd(C:/Users/tellw/test)')
elif op=='q':
break
else:
print('illegal operations')
gguf,效果很差
模型下载地址:https://hf-mirror.com/vikhyatk/moondream2/blob/main/moondream2-text-model-f16.gguf
服务端
import os
import subprocess
os.chdir('llama-server.exe\'s dir')
os.environ['CUDA_VISIBLE_DEVICES']='-1'
subprocess.run('llama-server.exe --model "C:/Users/tellw/Downloads/moondream2-text-model-f16_vikhyatk_moondream2.gguf" -c 2048 -ngl 200 -a moondream2-text-model-f16_vikhyatk_moondream2.gguf" --host 127.0.0.1 --port 8080 -np 1 --metrics --slots -fa --no-mmap',shell=True)
客户端
import openai
import base64
pic_name='C:/Users/tellw/test/test.jpg'
client=openai.OpenAI(base_url='http://127.0.0.1:8080/v1',api_key='1')
with open(pic_name,'rb') as f:
base64_image=base64.b64encode(f.read()).decode('utf8')
messages=[{'role':'user','content':[{'type':'image_url','image_url':{'url':f'data:image/jpeg;base64,{base64_image}'}},{'type':'text','text':'What is the girl doing?'}]}]
# Prompts示例如下,但只有描述prompt的结果还可以,其他prompt的回答都是在复读
# Describe this image and its style in a very detailed manner
# Describe this image
# What is the girl doing?
# What color is the girl's hair?
# What is this?
# What is behind the stand?
completion=client.chat.completions.create(model='moondream2-text-model-f16_vikhyatk_moondream2',messages=messages,frequency_penalty=0.2)# 服务启动时会打印模型名称,那个即为model的值
print(completion.choices[0].message.content)
参考链接:https://hf-mirror.com/vikhyatk/moondream2
https://docs.moondream.ai/openai-compatibility#working-with-local-images
创建于2501301915,修改于2501301915
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 分享一个免费、快速、无限量使用的满血 DeepSeek R1 模型,支持深度思考和联网搜索!
· 基于 Docker 搭建 FRP 内网穿透开源项目(很简单哒)
· ollama系列01:轻松3步本地部署deepseek,普通电脑可用
· 25岁的心里话
· 按钮权限的设计及实现