OPEA Deployment

镜像下载

https://www.modelscope.cn/models/zhipuai/chatglm3-6b

RKE2 deployment K8s

export KUBECONFIG=/etc/rancher/rke2/rke2.yaml
mkdir -p ~/.kube/
sudo cp /etc/rancher/rke2/rke2.yaml  ~/.kube/config
sudo chown $USER:$USER ~/.kube/config
 
# https://kubernetes.io/docs/tasks/tools/install-kubectl-linux/
curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl"
sudo install -o root -g root -m 0755 kubectl /usr/local/bin/kubectl
# sudo chown root:root /usr/local/bin/kubectl
source <(kubectl completion bash)
 
IPADDR=$(ip route get 1 | head -n 1 | grep -o 'src\s[.0-9a-z]\+' | awk '{print $2}')
uiport=$(kubectl -n opea-chatqna get svc -l "app.kubernetes.io/name"=chatqna-ui -o jsonpath='{.items[0].spec.ports[0].nodePort}')
echo "login windows and open a browser with http://$IPADDR:$uiport"

check ConfigMap

kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=data-prep
kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=data-prep -o yaml
kubectl -n opea-chatqna get cm -l app.kubernetes.io/name=llm-uservice -o yaml
kubectl -n opea-chatqna  get cm -l app.kubernetes.io/name=retriever-usvc -o yaml
 
kubectl -n opea-chatqna  get cm -l app.kubernetes.io/name=tei -o json | jq .items[0].data
kubectl -n opea-chatqna  get cm -l app.kubernetes.io/name=teirerank  -o json | jq .items[0].data  
# kubectl -n opea-chatqna  get cm -l app.kubernetes.io/name=tgi
 
TOKEN=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
 
label=app.kubernetes.io/name=retriever-usvc
cm=$(kubectl -n opea-chatqna get cm -l $label -o name)
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]'
kubectl -n opea-chatqna get cm -l $label -o yaml
 
 
label=app.kubernetes.io/name=tei
cm=$(kubectl -n opea-chatqna get cm -l $label -o name)
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]'
kubectl -n opea-chatqna get cm -l $label -o yaml
 
label=app.kubernetes.io/name=teirerank
cm=$(kubectl -n opea-chatqna get cm -l $label -o name)
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]'
kubectl -n opea-chatqna get cm -l $label -o yaml
 
DATAP="/root"
MIRROR=https://hf-mirror.com/
label=app.kubernetes.io/name=tgi
# REF: https://github.com/opea-project/GenAIInfra/blob/main/helm-charts/common/tgi/templates/configmap.yaml#L14C18-L14C36
# CM
cm=$(kubectl -n opea-chatqna get cm -l $label -o name)
# Data path, X
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/global.modelUseHostPath","value":"'"$DATAP"'"}]'
# Data path, X
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/modelUseHostPath","value":"'"$DATAP"'"}]'
# Token, X
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]'
# Token, X
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/global.HUGGINGFACEHUB_API_TOKEN","value":"'"$TOKEN"'"}]'
# Token
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HF_TOKEN","value":"'"$TOKEN"'"}]'
# Mirror
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/HF_ENDPOINT","value":"'"$MIRROR"'"}]'
# Mirror, X
kubectl -n opea-chatqna patch $cm --type='json' -p '[
{"op":"add","path":"/data/global.HF_ENDPOINT","value":"'"$MIRROR"'"}]'
kubectl -n opea-chatqna get cm -l $label -o yaml
 
 
 
kubectl get pods -n opea-chatqna\
    --field-selector="status.containerStatuses.readt!=true" \
    -o custom-columns="POD:metadata.name"
 
 
# not ready pod
kubectl get pod -o jsonpath='{range .items[?(@.status.containerStatuses[*].ready!=true)]}{.metadata.name}{"\n"}{end}' -n opea-chatqna
 
 
kubectl -n opea-chatqna  get pods -l app.kubernetes.io/name=retriever-usvc -o json | jq .items[0].spec.containers[0].envFrom
kubectl -n opea-chatqna  get pods -l app.kubernetes.io/name=tei -o json | jq .items[0].spec.containers[0].envFrom  
kubectl -n opea-chatqna  get pods -l app.kubernetes.io/name=teirerank  -o json | jq .items[0].spec.containers[0].envFrom  
kubectl -n opea-chatqna  get pods -l app.kubernetes.io/name=tgi  -o json | jq .items[0].spec.containers[0].envFrom     
 
 
 
kubectl -n opea-chatqna  delete pods -l app.kubernetes.io/name=retriever-usvc
kubectl -n opea-chatqna  delete pods -l app.kubernetes.io/name=tei
kubectl -n opea-chatqna  delete pods -l app.kubernetes.io/name=teirerank
kubectl -n opea-chatqna  delete pods -l app.kubernetes.io/name=tgi
 
 
kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=retriever-usvc
kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=tei
kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=teirerank
kubectl -n opea-chatqna describe pods -l app.kubernetes.io/name=tgi
 
 
 
kubectl -n opea-chatqna logs -l app.kubernetes.io/name=retriever-usvc --tail -1
kubectl -n opea-chatqna logs -l app.kubernetes.io/name=tei --tail -1
kubectl -n opea-chatqna logs -l app.kubernetes.io/name=teirerank --tail -1
kubectl -n opea-chatqna logs -l app.kubernetes.io/name=tgi --tail -1
 
 
kubectl -n opea-chatqna wait --for=condition=ready pod -l app.kubernetes.io/name=tgi --timeout=3m

Port forward

kubectl -n opea-chatqna port-forward $(kubectl -n opea-chatqna get svc -l "app.kubernetes.io/name"=chatqna-ui -o name) 5174
 
kubectl cluster-info
 
clusterip=$(kubectl -n opea-chatqna get svc -l "app.kubernetes.io/name"=chatqna-ui -o jsonpath='{.items[0].spec.clusterIP}')
echo "copy the var to edge node: clusterip=$clusterip"
   
# login edge node
sudo apt install socat -y
  
IPADDR=$(ip route get 1 | head -n 1 | grep -o 'src\s[.0-9a-z]\+' | awk '{print $2}')
echo "login windows and open a browser with http://$IPADDR:8080"
   
clusterip=$clusterip
sudo ufw allow 8080/tcp
socat TCP-LISTEN:8080,fork TCP:${clusterip}:5174
 
 
# login the orchestrator
echo "in private network, we can use X11 forward, please run this command:"
echo "google-chrome --new-window http://$IPADDR:8080/ --user-data-dir=/tmp/chromedpdata --no-first-run"
 
# https://unix.stackexchange.com/questions/10428/simple-way-to-create-a-tunnel-from-one-local-port-to-another
# echo "ssh -g -L $IPADDR:8080:localhost:8000 -f -N user@$IPADDR.com"

 

question

It takes 12 minutes to saw a piece of wood into 4 pieces. How many minutes does it take to saw it into 7 pieces?

install HuggingFace CLI

sudo apt install python3-pip
pip install huggingface_hub
 
# To login, `huggingface_hub` requires a token generated from https://huggingface.co/settings/tokens .
huggingface-cli login
 
HUGGINGFACE_TOKEN=""
 
echo "PATH=`python3 -m site --user-base`:\$PATH" >> ~/.bashrc

index

https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA

architecture

https://github.com/opea-project/GenAIComps

RAG API workflow

REF: https://github.com/opea-project/GenAIExamples/tree/main/ChatQnA/docker/xeon

# 1 get the embedding of input
label='app.kubernetes.io/name=tei'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
input="What is Deep Learning?"
input="What is the revenue of Nike in 2023?"
curl -x "" http://${clusterip}:${port}/embed \
    -X POST \
    -d '{"inputs":"'"$input"'"}' \
    -H 'Content-Type: application/json'
your_embedding=$(curl -x "" http://${clusterip}:${port}/embed \
    -X POST \
    -d '{"inputs":"'"$input"'"}' \
    -H 'Content-Type: application/json' |jq .[0] -c)
 
label='app.kubernetes.io/name=embedding-usvc'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
curl -x "" http://${clusterip}:${port}/v1/embeddings\
  -X POST \
  -d '{"text":"hello"}' \
  -H 'Content-Type: application/json'
 
# 2 get the retriever docs
label='app.kubernetes.io/name=retriever-usvc'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
# export your_embedding=$(python3 -c "import random; embedding = [random.uniform(-1, 1) for _ in range(768)]; print(embedding)")
echo "clusterip=$clusterip"
echo "port=$port"
# text='Just a test'
text=$input
curl -x "" http://${clusterip}:${port}/v1/retrieval \
  -X POST \
  -d "{\"text\":\"${text}\",\"embedding\":${your_embedding}}" \
  -H 'Content-Type: application/json'
retrieved_docs=$(curl -x "" http://${clusterip}:${port}/v1/retrieval \
  -X POST \
  -d "{\"text\":\"${text}\",\"embedding\":${your_embedding}}" \
  -H 'Content-Type: application/json' | jq -c .retrieved_docs)
 
# 3 reranking the docs
label='app.kubernetes.io/name=reranking-usvc'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
# query="What is Deep Learning?"
query=$input
curl -x "" http://${clusterip}:${port}/v1/reranking\
  -X POST \
  -d '{"initial_query":"What is Deep Learning?", "retrieved_docs": [{"text":"Deep Learning is not..."}, {"text":"Deep learning is..."}]}' \
  -H 'Content-Type: application/json'
reranking_docs=$(curl -x "" http://${clusterip}:${port}/v1/reranking \
  -X POST \
  -d '{"initial_query":"'"$query"'", "retrieved_docs": '"$retrieved_docs"'}' \
  -H 'Content-Type: application/json' | jq -c .documents[0])
 
# reranking_docs=$(tr -d '"' <<< "${reranking_docs}" |sed 's/\"/ /g')
reranking_docs=$(sed 's/\\"/ /g' <<< "${reranking_docs}")
reranking_docs=$(tr -d '"' <<< "${reranking_docs}")
 
# 4.a llm
label='app.kubernetes.io/name=llm-uservice'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
curl -x "" http://${clusterip}:${port}/v1/completions \
  -X POST \
  -H "Content-Type: application/json" \
  -d '{"model": "Intel/neural-chat-7b-v3-3", "prompt": "What is Deep Learning?", "max_tokens": 32, "temperature": 0}'
  
label='app.kubernetes.io/name=chatqna'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
curl -x "" http://${clusterip}:${port}/v1/chatqna -H "Content-Type: application/json" -d '{
     "messages": "What is the revenue of Nike in 2023?"
     }'
# 4.b tgi
label='app.kubernetes.io/name=tgi'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
 
# your question
query=${input}
# inputs template.
inputs="### You are a helpful, respectful and honest assistant to help the user with questions. Please refer to the search results obtained from the local knowledge base. But be careful to not incorporate the information that you think is not relevant to the question. If you don't know the answer to a question, please don't share false information. ### Search results: ${reranking_docs} ### Question: ${query} \n\n### Answer:"
 
curl -x "" http://${clusterip}:${port}/generate \
  -X POST \
  -d '{"inputs":"'"${inputs}"'","parameters":{"max_new_tokens":1024, "do_sample": true}}' \
  -H 'Content-Type: application/json'
 
# 5 data-prep
label='app.kubernetes.io/name=data-prep'
clusterip=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.clusterIP}')
port=$(kubectl -n opea-chatqna get svc -l ${label} -o jsonpath='{.items[0].spec.ports[0].port}')
echo "clusterip=$clusterip"
echo "port=$port"
 
wget https://raw.githubusercontent.com/opea-project/GenAIComps/main/comps/retrievers/langchain/redis/data/nke-10k-2023.pdf
curl -x "" -X POST "http://${clusterip}:${port}/v1/dataprep" \
     -H "Content-Type: multipart/form-data" \
     -F "files=@./nke-10k-2023.pdf"
 
curl -x "" -X POST "http://${clusterip}:${port}/v1/dataprep/get_file" \
     -H "Content-Type: application/json"
 
curl -x "" -X POST "http://${clusterip}:${port}/v1/dataprep/delete_file" \
     -d '{"file_path": "https://opea.dev.txt"}' \
     -H "Content-Type: application/json"

 

get image info

edge@iot-edge-xr12-5:~/kubeconf$ kubectl -n opea-chatqna get  pod -o json | jq .items[].spec.containers[].image
"opea/chatqna:latest"
"amr-registry.caas.intel.com/nex-hybrid-ai/chatqna-conversation-ui:itep-build"
"opea/dataprep-redis:latest"
"opea/embedding-tei:latest"
"opea/llm-tgi:latest"
"redis/redis-stack:7.2.0-v9"
"opea/reranking-tei:latest"
"opea/retriever-redis:latest"
"ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
"ghcr.io/huggingface/text-embeddings-inference:cpu-1.5"
"ghcr.io/huggingface/text-generation-inference:2.1.0"
edge@iot-edge-xr12-5:~/kubeconf$ kubectl -n opea-chatqna get  pod -o json | jq .items[] |less
edge@iot-edge-xr12-5:~/kubeconf$ kubectl -n opea-chatqna get  pod -o json | jq .items[].metadata.name
"b-21365218-da18-5fad-b4be-0d843c751579-chatqna-686f56fc87-rbnn4"
"b-21365218-da18-5fad-b4be-0d843c751579-chatqna-ui-f7b644757rqgb"
"b-21365218-da18-5fad-b4be-0d843c751579-data-prep-5cf8f8dc475f9r"
"b-21365218-da18-5fad-b4be-0d843c751579-embedding-usvc-85875bjcn"
"b-21365218-da18-5fad-b4be-0d843c751579-llm-uservice-6cf647w8rrz"
"b-21365218-da18-5fad-b4be-0d843c751579-redis-vector-db-7b4frbxj"
"b-21365218-da18-5fad-b4be-0d843c751579-reranking-usvc-58f87zbrw"
"b-21365218-da18-5fad-b4be-0d843c751579-retriever-usvc-557dv4v5r"
"b-21365218-da18-5fad-b4be-0d843c751579-tei-6bc85bc8db-7j9vf"
"b-21365218-da18-5fad-b4be-0d843c751579-teirerank-8cb97f9b-pbl45"
"b-21365218-da18-5fad-b4be-0d843c751579-tgi-75b687bdd8-fz97w"

 

The ENV variable can works well from OPEA team

kubectl exec chatqna-tgi-cfd44f9c4-szhqt -- env
PATH=/opt/conda/bin:/usr/local/nvidia/bin:/usr/local/cuda/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
HOSTNAME=chatqna-tgi-cfd44f9c4-szhqt
NUMBA_CACHE_DIR=/tmp
TRANSFORMERS_CACHE=/tmp/transformers_cache
no_proxy=bj.intel.com,.bj.intel.com,10.0.0.0/8,192.168.0.0/16,localhost,127.0.0.0/8
CUDA_GRAPHS=0
HF_HOME=/tmp/.cache/huggingface
HF_TOKEN=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
HF_ENDPOINT=
https://hf-mirror.com
http_proxy=
http://child-prc.intel.com:913
PORT=2080
HABANA_LOGS=/tmp/habana_logs
https_proxy=
http://child-prc.intel.com:913
MODEL_ID=Intel/neural-chat-7b-v3-3
CHATQNA_TEIRERANK_SERVICE_HOST=10.97.193.140
CHATQNA_TEIRERANK_PORT=tcp://10.97.193.140:80
CHATQNA_TEIRERANK_PORT_80_TCP_PORT=80
CHATQNA_PORT_8888_TCP_PROTO=tcp
KUBERNETES_SERVICE_PORT_HTTPS=443
CHATQNA_LLM_USERVICE_SERVICE_HOST=10.110.202.82
CHATQNA_RERANKING_USVC_SERVICE_HOST=10.106.114.139
CHATQNA_RERANKING_USVC_PORT_8000_TCP=tcp://10.106.114.139:8000
CHATQNA_TGI_PORT_80_TCP=tcp://10.106.73.168:80
CHATQNA_TGI_SERVICE_PORT=80
CHATQNA_DATA_PREP_PORT=tcp://10.98.141.218:6007
CHATQNA_TEI_PORT_80_TCP_PORT=80
CHATQNA_SERVICE_PORT=8888
CHATQNA_PORT_8888_TCP_PORT=8888
CHATQNA_PORT_8888_TCP_ADDR=10.96.134.150
CHATQNA_TEI_SERVICE_HOST=10.97.141.126
CHATQNA_RETRIEVER_USVC_SERVICE_HOST=10.98.19.124
CHATQNA_RETRIEVER_USVC_PORT_7000_TCP_ADDR=10.98.19.124
CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP_ADDR=10.103.0.129
CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP_PORT=8001
CHATQNA_TEIRERANK_PORT_80_TCP=tcp://10.97.193.140:80
CHATQNA_TGI_PORT_80_TCP_PROTO=tcp
CHATQNA_SERVICE_PORT_CHATQNA=8888
KUBERNETES_SERVICE_HOST=10.96.0.1
KUBERNETES_PORT=tcp://10.96.0.1:443
CHATQNA_LLM_USERVICE_PORT_9000_TCP_ADDR=10.110.202.82
CHATQNA_REDIS_VECTOR_DB_SERVICE_PORT_REDIS_INSIGHT=8001
CHATQNA_RETRIEVER_USVC_SERVICE_PORT=7000
CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP_ADDR=10.103.0.129
CHATQNA_TEIRERANK_PORT_80_TCP_PROTO=tcp
CHATQNA_PORT_8888_TCP=tcp://10.96.134.150:8888
CHATQNA_EMBEDDING_USVC_PORT=tcp://10.100.45.231:6000
CHATQNA_EMBEDDING_USVC_PORT_6000_TCP_PROTO=tcp
CHATQNA_LLM_USERVICE_PORT_9000_TCP_PORT=9000
CHATQNA_RERANKING_USVC_PORT_8000_TCP_PROTO=tcp
CHATQNA_EMBEDDING_USVC_SERVICE_PORT_EMBEDDING_USVC=6000
CHATQNA_RETRIEVER_USVC_PORT_7000_TCP_PROTO=tcp
CHATQNA_RERANKING_USVC_PORT_8000_TCP_PORT=8000
CHATQNA_TEIRERANK_SERVICE_PORT_TEIRERANK=80
CHATQNA_LLM_USERVICE_PORT=tcp://10.110.202.82:9000
CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP_PROTO=tcp
CHATQNA_DATA_PREP_SERVICE_PORT=6007
CHATQNA_DATA_PREP_PORT_6007_TCP_ADDR=10.98.141.218
CHATQNA_PORT=tcp://10.96.134.150:8888
CHATQNA_EMBEDDING_USVC_SERVICE_HOST=10.100.45.231
CHATQNA_EMBEDDING_USVC_PORT_6000_TCP_ADDR=10.100.45.231
CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP=tcp://10.103.0.129:6379
CHATQNA_TGI_SERVICE_HOST=10.106.73.168
CHATQNA_TEI_SERVICE_PORT=80
CHATQNA_TEI_PORT_80_TCP_ADDR=10.97.141.126
CHATQNA_SERVICE_HOST=10.96.134.150
KUBERNETES_PORT_443_TCP_PROTO=tcp
CHATQNA_RERANKING_USVC_SERVICE_PORT=8000
CHATQNA_DATA_PREP_PORT_6007_TCP=tcp://10.98.141.218:6007
CHATQNA_EMBEDDING_USVC_PORT_6000_TCP=tcp://10.100.45.231:6000
CHATQNA_REDIS_VECTOR_DB_SERVICE_PORT=6379
CHATQNA_DATA_PREP_PORT_6007_TCP_PORT=6007
CHATQNA_LLM_USERVICE_PORT_9000_TCP=tcp://10.110.202.82:9000
CHATQNA_LLM_USERVICE_PORT_9000_TCP_PROTO=tcp
CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP_PROTO=tcp
CHATQNA_REDIS_VECTOR_DB_PORT_6379_TCP_PORT=6379
CHATQNA_DATA_PREP_SERVICE_PORT_DATA_PREP=6007
CHATQNA_TEI_PORT_80_TCP=tcp://10.97.141.126:80
CHATQNA_TEI_PORT_80_TCP_PROTO=tcp
KUBERNETES_PORT_443_TCP_ADDR=10.96.0.1
CHATQNA_RERANKING_USVC_PORT_8000_TCP_ADDR=10.106.114.139
CHATQNA_DATA_PREP_SERVICE_HOST=10.98.141.218
CHATQNA_TEI_SERVICE_PORT_TEI=80
CHATQNA_REDIS_VECTOR_DB_SERVICE_HOST=10.103.0.129
CHATQNA_TGI_PORT_80_TCP_PORT=80
CHATQNA_TGI_PORT_80_TCP_ADDR=10.106.73.168
CHATQNA_DATA_PREP_PORT_6007_TCP_PROTO=tcp
CHATQNA_EMBEDDING_USVC_PORT_6000_TCP_PORT=6000
CHATQNA_LLM_USERVICE_SERVICE_PORT=9000
CHATQNA_RERANKING_USVC_PORT=tcp://10.106.114.139:8000
CHATQNA_REDIS_VECTOR_DB_PORT_8001_TCP=tcp://10.103.0.129:8001
CHATQNA_RETRIEVER_USVC_PORT=tcp://10.98.19.124:7000
CHATQNA_RETRIEVER_USVC_PORT_7000_TCP=tcp://10.98.19.124:7000
CHATQNA_RERANKING_USVC_SERVICE_PORT_RERANKING_USVC=8000
KUBERNETES_SERVICE_PORT=443
KUBERNETES_PORT_443_TCP=tcp://10.96.0.1:443
KUBERNETES_PORT_443_TCP_PORT=443
CHATQNA_LLM_USERVICE_SERVICE_PORT_LLM_USERVICE=9000
CHATQNA_RETRIEVER_USVC_SERVICE_PORT_RETRIEVER_USVC=7000
CHATQNA_REDIS_VECTOR_DB_SERVICE_PORT_REDIS_SERVICE=6379
CHATQNA_TEIRERANK_SERVICE_PORT=80
CHATQNA_TGI_SERVICE_PORT_TGI=80
CHATQNA_TGI_PORT=tcp://10.106.73.168:80
CHATQNA_TEI_PORT=tcp://10.97.141.126:80
CHATQNA_EMBEDDING_USVC_SERVICE_PORT=6000
CHATQNA_RETRIEVER_USVC_PORT_7000_TCP_PORT=7000
CHATQNA_REDIS_VECTOR_DB_PORT=tcp://10.103.0.129:6379
CHATQNA_TEIRERANK_PORT_80_TCP_ADDR=10.97.193.140
NVARCH=x86_64
NVIDIA_REQUIRE_CUDA=cuda>=12.1 brand=tesla,driver>=470,driver<471 brand=unknown,driver>=470,driver<471 brand=nvidia,driver>=470,driver<471 brand=nvidiartx,driver>=470,driver<471 brand=geforce,driver>=470,driver<471 brand=geforcertx,driver>=470,driver<471 brand=quadro,driver>=470,driver<471 brand=quadrortx,driver>=470,driver<471 brand=titan,driver>=470,driver<471 brand=titanrtx,driver>=470,driver<471 brand=tesla,driver>=525,driver<526 brand=unknown,driver>=525,driver<526 brand=nvidia,driver>=525,driver<526 brand=nvidiartx,driver>=525,driver<526 brand=geforce,driver>=525,driver<526 brand=geforcertx,driver>=525,driver<526 brand=quadro,driver>=525,driver<526 brand=quadrortx,driver>=525,driver<526 brand=titan,driver>=525,driver<526 brand=titanrtx,driver>=525,driver<526
NV_CUDA_CUDART_VERSION=12.1.55-1
NV_CUDA_COMPAT_PACKAGE=cuda-compat-12-1
CUDA_VERSION=12.1.0
LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64
NVIDIA_VISIBLE_DEVICES=all
NVIDIA_DRIVER_CAPABILITIES=compute,utility
CONDA_PREFIX=/opt/conda
HUGGINGFACE_HUB_CACHE=/data
HF_HUB_ENABLE_HF_TRANSFER=1
LD_PRELOAD=/opt/conda/lib/python3.10/site-packages/nvidia/nccl/lib/libnccl.so.2
HOME=/root

TGI input example 

{ "inputs": "### You are a helpful, respectful and honest assistant to help the user with questions. Please refer to the search results obtained from the local knowledge base. But be careful to not incorporate the information that you think is not relevant to the question. If you don't know the answer to a question, please don't share false information. ### Search results:
  discounts, largely due to strategic pricing actions and product mix.\n• Selling and administrative expense increased 15% due to higher operating overhead and demand creation expense. The increase in operating overhead expense\nwas primarily due to higher wage-related costs and higher NIKE Direct variable costs, in part due to new store additions.  ……. (~ 1500 characters)
  ### Question: What is the revenue of Nike in last 10 years before 2023? Give me detail \n\n### Answer:",
  "parameters": {
    "details": false,
    "do_sample": false,
    "max_new_tokens": 1024,
    "repetition_penalty": 1.03,
    "return_full_text": false,
    "stop": [],
    "temperature": 0.01,
    "top_k": 10,
    "top_p": 0.95
  },
  "stream": true}

download file

https://huggingface.co/BAAI/bge-base-en-v1.5/resolve/main/onnx/model.onnx in tei

https://huggingface.co/BAAI/bge-reranker-base/resolve/main/tokenizer.json in teirerank

/api/models/Intel/neural-chat-7b-v3-3 tgi

 

REF:

https://github.com/intel-innersource/frameworks.edge.one-intel-edge.maestro-app.application-catalog-examples/tree/main/opea/chatqna

 

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

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

posted @ 2024-09-14 14:55  lvmxh  阅读(3)  评论(0编辑  收藏  举报