tfserving部署

  1. 进入root 用户下
      1. yum -y install docker 安装容器
      2. systemctl start docker 启动容器
      1. 查看命令 sudo docker ps ,如图所示 ,说明启动成功
      1. 拉取镜像 sudo docker pull tensorflow/serving:2.11.0 如图
      1. 查看镜像 sudo docker images

 

docker的文件路径:

配置文件和模型文件

 

 

2. docker的启动脚本 run.sh

1
2
3
4
5
6
7
8
9
#!/bin/bash
 
sudo docker run -t --rm -p 8016:8500 -p 8017:8501 --name prerank_8016  --cpuset-cpus="64-71" --memory="16g" -v "/home/public_sudo/data/prerank:/models" -v "/
home/public_sudo/data/prerank/config:/config"  -e TF_CPP_MIN_VLOG_LEVEL=4 tensorflow/serving:2.11.0  --per_process_gpu_memory_fraction=0.8  --enable_batching
=true --model_config_file_poll_wait_seconds=60 --model_config_file=/config/model.config > /home/public_sudo/data/prerank/logs/log8016.log &
 
sudo docker run -t --rm -p 8018:8500 -p 8019:8501 --cpuset-cpus="72-79" --memory="16g" -v "/home/public_sudo/data/prerank:/models" -v "/home/public_sudo/data
/prerank/config:/config"  --name prerank_8018 -e TF_CPP_MIN_VLOG_LEVEL=4 tensorflow/serving:2.11.0  --per_process_gpu_memory_fraction=0.8  --enable_batching=
true --model_config_file_poll_wait_seconds=60 --model_config_file=/config/model.config > /home/public_sudo/data/prerank/logs/log8018.log &<br><br><br>

sudo docker run -t --rm -p 8020:8500 -p 8021:8501 --name relevance_8020 --cpuset-cpus="80-87" --memory="16g" -v "/home/public_sudo/data/relevance:/models" -v "/home/public_sudo/data/relevance/config:/config" -e TF_CPP_MIN_VLOG_LEVEL=4 tensorflow/serving:2.11.0 --per_process_gpu_memory_fraction=0.8 --enable_batching=true --model_config_file_poll_wait_seconds=60 --model_config_file=/config/model.config > /home/public_sudo/data/relevance/logs/8020.log &

sudo docker run -t --rm -p 8022:8500 -p 8023:8501 --name relevance_8022 --cpuset-cpus="88-95" --memory="16g" -v "/home/public_sudo/data/relevance:/models" -v "/home/public_sudo/data/relevance/config:/config" -e TF_CPP_MIN_VLOG_LEVEL=4 tensorflow/serving:2.11.0 --per_process_gpu_memory_fraction=0.8 --enable_batching=true --model_config_file_poll_wait_seconds=60 --model_config_file=/config/model.config > /home/public_sudo/data/relevance/logs/8022.log &

3. docker的配置文件格式

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
model_config_list:{
 
config:{
 
name:'relevance_v5'
 
base_path:'/models/relevance_v5'
 
model_platform:'tensorflow'
 
},
 
config:{
 
name:'relevance_v4'
 
base_path:'/models/relevance_v4'
 
model_platform:'tensorflow'
 
},
 
config:{
 
name:'relevance_v3'
 
base_path:'/models/relevance_v3'
 
model_platform:'tensorflow'
 
},
 
config:{
 
name:'relevance_v1'
 
base_path:'/models/relevance_v1'
 
model_platform:'tensorflow'
 
},
 
config:{
 
name:'relevance_v2'
 
base_path:'/models/relevance_v2'
 
model_platform:'tensorflow'
 
}
 
}

4. docker的python测试

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
import json
import requests
 
input = [1 ,1 ,1 ,1 ,1 ,1 ,0 ,1 ,0.999 ,0 ,3 ,1 ,0.3333333333333333 ,2 ,0.6666666666666666 ,1 ,0.3333333333333333 ,0
         ,0.0 ,0 ,0 ,0 ,1 ,0 ,1 ,0 ,0 ,1 ,0 ,1 ,0 ,0.8652462791972874 ,0.020301 ,0.626570119698537 ,0.0 ,1 ,1 ,1.0 ,0
         ,0.0 ,1 ,1.0 ,0 ,0.0 ,1 ,1 ,0 ,0 ,0 ,1 ,0 ,0 ,0 ,0 ,0 ,1 ,0.8342278053174881 ,0.01272 ,1.0 ,0.0
         ,0.08677808000000002 ,-0.5144478466666667 ,0.09813555641 ,0.4995743833333333 ,0.6875768999999999 ,0.29120327
         ,-0.28226265 ,-0.4784384366666667 ,-0.58632582 ,0.04208295999999998 ,-0.32798395333333336 ,-0.02741264333333332
         ,0.049279349666666666 ,-0.16968005666666663 ,-0.33240925466666665 ,0.4360717 ,-0.2929261233333333
         ,-0.2981464466666667 ,-0.07636033 ,-0.274444911 ,-0.3359526518666667 ,0.50867007 ,0.39146848666666667
         ,-0.7227929266666666 ,-0.191814214 ,0.043227369999999994 ,-0.8780203333333333 ,0.10589143999999999 ,0.15558687
         ,-0.4661960733333333 ,-0.37050828333333335 ,0.04813903993333333 ,-0.142883988375 ,-0.734637185625
         ,0.15876262870312502 ,0.7311148107500001 ,0.4145796666875001 ,-0.009326533625000001 ,-0.3222952091875
         ,-0.5682045862499999 ,-0.8492591543749999 ,-0.057231256250000015 ,-0.2482288354999999 ,0.24096290889374997
         ,-0.030687868800000004 ,-0.496588725 ,-0.29752982512500004 ,0.283379688875 ,-0.5157605628124999
         ,-0.29660309737499996 ,0.23857122374999998 ,-0.2940595146875 ,-0.312397124 ,0.4648682387500001 ,0.3255289983125
         ,-0.599876252 ,0.06666920775 ,0.2970209875 ,-0.48590265374999997 ,-0.20199137175 ,0.18776840000000006
         ,-0.37830229231250007 ,-0.31018528725000005 ,0.02904392187500001]
 
 
def v1():
    instance = []
    instance.append({"relevance_input": input})
    instance.append({"relevance_input": input})
    data = json.dumps({"signature_name": "serving_default", "instances": instance})
    headers = {"content-type": "application/json"}
    json_response = requests.post('http://localhost:8021/v1/models/relevance_v1:predict',
                                  data=data, headers=headers)
    predictions = json.loads(json_response.text)["predictions"]
 
    json_response1 = requests.post('http://localhost:8023/v1/models/relevance_v1:predict',
                                  data=data, headers=headers)
    predictions1 = json.loads(json_response1.text)["predictions"]
    print("v1:", predictions, predictions1)
 
 
def v2():
    instance = []
    instance.append({"relevance_input": input[:60]})
    instance.append({"relevance_input": input[:60]})
    data = json.dumps({"signature_name": "serving_default", "instances": instance})
    headers = {"content-type": "application/json"}
    json_response = requests.post('http://localhost:8021/v1/models/relevance_v2:predict',
                                  data=data, headers=headers)
    predictions = json.loads(json_response.text)["predictions"]
 
    json_response1 = requests.post('http://localhost:8023/v1/models/relevance_v2:predict',
                                   data=data, headers=headers)
    predictions1 = json.loads(json_response1.text)["predictions"]
    print("v2:", predictions, predictions1)
 
 
def v3():
    instance = []
    instance.append({"relevance_input": input[:60]})
    instance.append({"relevance_input": input[:60]})
    data = json.dumps({"signature_name": "serving_default", "instances": instance})
    headers = {"content-type": "application/json"}
    json_response = requests.post('http://localhost:8021/v1/models/relevance_v3:predict',
                                  data=data, headers=headers)
    predictions = json.loads(json_response.text)["predictions"]
 
    json_response1 = requests.post('http://localhost:8023/v1/models/relevance_v3:predict',
                                   data=data, headers=headers)
    predictions1 = json.loads(json_response1.text)["predictions"]
    print("v3:", predictions, predictions1)
 
 
def v4():
    instance = []
    instance.append({"relevance_input": input[:60]})
    instance.append({"relevance_input": input[:60]})
    data = json.dumps({"signature_name": "serving_default", "instances": instance})
    headers = {"content-type": "application/json"}
    json_response = requests.post('http://localhost:8021/v1/models/relevance_v4:predict',
                                  data=data, headers=headers)
    predictions = json.loads(json_response.text)["predictions"]
 
    json_response1 = requests.post('http://localhost:8023/v1/models/relevance_v4:predict',
                                   data=data, headers=headers)
    predictions1 = json.loads(json_response1.text)["predictions"]
    print("v4:", predictions, predictions1)
 
 
def v5():
    instance = []
    instance.append({"relevance_input": input})
    instance.append({"relevance_input": input})
    data = json.dumps({"signature_name": "serving_default", "instances": instance})
    headers = {"content-type": "application/json"}
    json_response = requests.post('http://localhost:8021/v1/models/relevance_v5:predict',
                                  data=data, headers=headers)
    predictions = json.loads(json_response.text)["predictions"]
 
    json_response1 = requests.post('http://localhost:8023/v1/models/relevance_v5:predict',
                                   data=data, headers=headers)
    predictions1 = json.loads(json_response1.text)["predictions"]
    print("v5:", predictions, predictions1)
 
def v6():
    instance = []
    instance.append({"relevance_input": input})
    instance.append({"relevance_input": input})
    data = json.dumps({"signature_name": "serving_default", "instances": instance})
    headers = {"content-type": "application/json"}
    json_response = requests.post('http://localhost:8021/v1/models/relevance_v6:predict',
                                  data=data, headers=headers)
    predictions = json.loads(json_response.text)["predictions"]
 
    json_response1 = requests.post('http://localhost:8023/v1/models/relevance_v6:predict',
                                   data=data, headers=headers)
    predictions1 = json.loads(json_response1.text)["predictions"]
    print("v6:", predictions, predictions1)
 
if __name__ == '__main__':
    v1()
    v2()
    v3()
    v4()
    v5()

 

posted @   15375357604  阅读(11)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 【自荐】一款简洁、开源的在线白板工具 Drawnix
· 没有Manus邀请码?试试免邀请码的MGX或者开源的OpenManus吧
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· C#/.NET/.NET Core优秀项目和框架2025年2月简报
· DeepSeek在M芯片Mac上本地化部署
点击右上角即可分享
微信分享提示