【kserve】transformer使用以及predictor所包含的接口
Transformer使用
transformer是对模型推理的预处理和后处理,
class ImageTransformer(Model): def __init__(self, name: str, predictor_host: str, protocol: str): super().__init__(name) self.predictor_host = predictor_host self.protocol = protocol self.ready = True def preprocess(self, request: Dict) -> ModelInferRequest: # Input follows the Tensorflow V1 HTTP API for binary values # https://www.tensorflow.org/tfx/serving/api_rest#encoding_binary_values input_tensors = [image_transform(instance) for instance in request["instances"]] # Transform to KServe v1/v2 inference protocol if self.protocol == PredictorProtocol.GRPC_V2.value: return self.v2_request_transform(numpy.asarray(input_tensors)) else: inputs = [{"data": input_tensor.tolist()} for input_tensor in input_tensors] request = {"instances": inputs} return request def v2_request_transform(self, input_tensors): request = ModelInferRequest() request.model_name = self.name input_0 = InferInput("INPUT__0", input_tensors.shape, "FP32") input_0.set_data_from_numpy(input_tensors) request.inputs.extend([input_0._get_tensor()]) if input_0._get_content() is not None: request.raw_input_contents.extend([input_0._get_content()]) return request def postprocess(self, infer_response: ModelInferResponse) -> Dict: if self.protocol == PredictorProtocol.GRPC_V2.value: response = InferResult(infer_response) return {"predictions": response.as_numpy("OUTPUT__0").tolist()} else: return infer_response parser = argparse.ArgumentParser(parents=[model_server.parser]) parser.add_argument( "--predictor_host", help="The URL for the model predict function", required=True ) parser.add_argument( "--protocol", help="The protocol for the predictor", default="v1" ) parser.add_argument( "--model_name", help="The name that the model is served under." ) args, _ = parser.parse_known_args() if __name__ == "__main__": model = ImageTransformer(args.model_name, predictor_host=args.predictor_host, protocol=args.protocol) ModelServer(workers=1).start([model])class ImageTransformer(Model): def __init__(self, name: str, predictor_host: str, protocol: str): super().__init__(name) self.predictor_host = predictor_host self.protocol = protocol self.ready = True def preprocess(self, request: Dict) -> ModelInferRequest: # Input follows the Tensorflow V1 HTTP API for binary values # https://www.tensorflow.org/tfx/serving/api_rest#encoding_binary_values input_tensors = [image_transform(instance) for instance in request["instances"]] # Transform to KServe v1/v2 inference protocol if self.protocol == PredictorProtocol.GRPC_V2.value: return self.v2_request_transform(numpy.asarray(input_tensors)) else: inputs = [{"data": input_tensor.tolist()} for input_tensor in input_tensors] request = {"instances": inputs} return request def v2_request_transform(self, input_tensors): request = ModelInferRequest() request.model_name = self.name input_0 = InferInput("INPUT__0", input_tensors.shape, "FP32") input_0.set_data_from_numpy(input_tensors) request.inputs.extend([input_0._get_tensor()]) if input_0._get_content() is not None: request.raw_input_contents.extend([input_0._get_content()]) return request def postprocess(self, infer_response: ModelInferResponse) -> Dict: if self.protocol == PredictorProtocol.GRPC_V2.value: response = InferResult(infer_response) return {"predictions": response.as_numpy("OUTPUT__0").tolist()} else: return infer_response parser = argparse.ArgumentParser(parents=[model_server.parser]) parser.add_argument( "--predictor_host", help="The URL for the model predict function", required=True ) parser.add_argument( "--protocol", help="The protocol for the predictor", default="v1" ) parser.add_argument( "--model_name", help="The name that the model is served under." ) args, _ = parser.parse_known_args() if __name__ == "__main__": model = ImageTransformer(args.model_name, predictor_host=args.predictor_host, protocol=args.protocol) ModelServer(workers=1).start([model])
- 自定义java镜像
虽然kserve官方并没有支持其他语言的api,用于支撑kserve,但根据源码解读,其实现它的透传协议即可完成kserve的自定义。
源码解读地址: https://github.com/kserve/kserve/blob/master/python/kserve/kserve/model.py
流程如下:
因此自定义接口完成2步即可:
1. 按照 /v1/models/([a-zA-Z0-9_-]+):predict 格式的url获取推理请求
2.根据 args获取predictor_host,在transformer完成预处理后转发给predictor_host进行推理
包含的接口
源码解析: https://github.com/kserve/kserve/blob/master/python/kserve/kserve/model_server.py
/v1/models 模型名称
/v1/models/([a-zA-Z0-9_-]+) 模型健康状态
/v1/models/([a-zA-Z0-9_-]+):predict 模型预测地址
/v1/models/([a-zA-Z0-9_-]+):explain 模型详情
/v2/health/live 健康检查
/v2/models 模型名称
/v2/models/([a-zA-Z0-9_-]+)/status 模型状态
/v2/models/([a-zA-Z0-9_-]+)/infer 模型预测地址
/v2/models/([a-zA-Z0-9_-]+)/explain 模型详情
/v2/repository/models/([a-zA-Z0-9_-]+)/load
/v2/repository/models/([a-zA-Z0-9_-]+)/unload