Nacos中文官网

https://nacos.io/zh-cn/docs/v2/quickstart/quick-start.html

客户端注册流程(客户端pull和服务端push)

//1、加载配置类NacosDiscoveryProperties
//配置的优先级
//心跳间隔时间
if (null != nacosDiscoveryProperties.getHeartBeatInterval()) {
			metadata.put(PreservedMetadataKeys.HEART_BEAT_INTERVAL,
					nacosDiscoveryProperties.getHeartBeatInterval().toString());
}
//心跳超时时间 默认15秒 超过会设置为不健康
if (null != nacosDiscoveryProperties.getHeartBeatTimeout()) {
			metadata.put(PreservedMetadataKeys.HEART_BEAT_TIMEOUT,
					nacosDiscoveryProperties.getHeartBeatTimeout().toString());
}
//IP删除时间 默认30秒 超过后将会进行下线
if (null != nacosDiscoveryProperties.getIpDeleteTimeout()) {
			metadata.put(PreservedMetadataKeys.IP_DELETE_TIMEOUT,
					nacosDiscoveryProperties.getIpDeleteTimeout().toString());
}

  @PostConstruct
	public void init() throws Exception {
		if (nacosServiceManager.isNacosDiscoveryInfoChanged(this)) {
            //发布事件
			applicationEventPublisher
					.publishEvent(new NacosDiscoveryInfoChangedEvent(this));
		}
	}

//2、定义的start组件 自动加载 NacosServiceRegistryAutoConfiguration 
   @Bean
	@ConditionalOnBean(AutoServiceRegistrationProperties.class)
	public NacosAutoServiceRegistration nacosAutoServiceRegistration(
			NacosServiceRegistry registry,
			AutoServiceRegistrationProperties autoServiceRegistrationProperties,
			NacosRegistration registration) {
		return new NacosAutoServiceRegistration(registry,
				autoServiceRegistrationProperties, registration);
	}

//3、监听事件 NacosDiscoveryInfoChangedEvent 
  @EventListener
	public void onNacosDiscoveryInfoChangedEvent(NacosDiscoveryInfoChangedEvent event) {
		restart();
	}

//4、调用注册逻辑
register();

//5、AbstractAutoServiceRegistration
protected void register() {
		this.serviceRegistry.register(getRegistration());
	}

//6、NacosServiceRegistry.register() 具体的实现类.方法

//7、NacosNamingService.registerInstance()
@Override
    public void registerInstance(String serviceName, String groupName, Instance instance) throws NacosException {
        String groupedServiceName = NamingUtils.getGroupedName(serviceName, groupName);
        if (instance.isEphemeral()) {
            //维护心跳信息  心跳周期参数:preserved.heart.beat.interval
            BeatInfo beatInfo = beatReactor.buildBeatInfo(groupedServiceName, instance);
            beatReactor.addBeatInfo(groupedServiceName, beatInfo);
        }
        serverProxy.registerService(groupedServiceName, groupName, instance);
    }

//8、定期去上报心跳信息 第一次上报会携带beat信息 后续不会携带
1. 定义的start组件 自动加载 NacosServiceRegistryAutoConfiguration 
   return new NacosAutoServiceRegistration(registry,
			autoServiceRegistrationProperties, registration)
2. 加载配置 并发布事件:NacosDiscoveryInfoChangedEvent
3. 注入NacosAutoServiceRegistration 并监听事件NacosDiscoveryInfoChangedEvent
4. 调用start方法
5. NacosServiceRegistry.register() 具体的实现类.方法
6. NacosNamingService.registerInstance()
7. 先开启定时任务 去定时刷新心跳 再去注册 默认情况下: 5秒上报一次心跳
8. 注册完成后 服务中心会将服务变动通知给客户端 采用UDP通信 例如:new ips(1) service
9. clien注册时 ephemeral=true 采用的是AP模式 即服务端集群中只要有一个注册成功则返回 不用等待所有都更新成功。
10. 第一次会携带beat信息 剩下的不会携带beat信息

ephemeral

nacos 目前的instance有一个ephemeral字段属性,该字段表示实例是否是临时实例还是持久化实例。如果是临时实例则不会在nacos中持久化,需要通过心跳上报,如果一段时间没有上报心跳,则会被nacos服务端删除。删除后如果又重新开始上报,则会重新实例注册。而持久化实例会被nacos服务端持久化,此时即使注册实例的进程不存在,这个实例也不会删除,只会将健康状态设置成不健康。 临时:true 持久化:false

心跳机制

  1. 客户端上报模式:客户端通过心跳上报的方式告知nacos 注册中心健康状态(默认心跳间隔5s,nacos将超过15s未收到心跳的实例设置为不健康(不会发送通知),超过30s将实例删除)。

服务端注册流程、服务端健康检查

//注册接口
 @CanDistro
    @PostMapping
    @Secured(action = ActionTypes.WRITE)
    public String register(HttpServletRequest request) throws Exception {
        
        final String namespaceId = WebUtils
                .optional(request, CommonParams.NAMESPACE_ID, Constants.DEFAULT_NAMESPACE_ID);
        final String serviceName = WebUtils.required(request, CommonParams.SERVICE_NAME);
        NamingUtils.checkServiceNameFormat(serviceName);
        
        final Instance instance = HttpRequestInstanceBuilder.newBuilder()
                .setDefaultInstanceEphemeral(switchDomain.isDefaultInstanceEphemeral()).setRequest(request).build();

        //注册实例
        getInstanceOperator().registerInstance(namespaceId, serviceName, instance);

        //实例跟踪事件
        NotifyCenter.publishEvent(new RegisterInstanceTraceEvent(System.currentTimeMillis(), "", false, namespaceId,
                NamingUtils.getGroupName(serviceName), NamingUtils.getServiceName(serviceName), instance.getIp(),
                instance.getPort()));
        return "ok";
    }

//InstanceOperatorClientImpl
 @Override
    public void registerInstance(String namespaceId, String serviceName, Instance instance) throws NacosException {
        NamingUtils.checkInstanceIsLegal(instance);
        
        boolean ephemeral = instance.isEphemeral();

        //ip地址+ephemeral
        String clientId = IpPortBasedClient.getClientId(instance.toInetAddr(), ephemeral);

        //创建ip、端口作为客户端 并进行健康检查 延迟5秒 每隔5秒检查一次  每个客户端开启一个线程进行检查
        createIpPortClientIfAbsent(clientId);
        Service service = getService(namespaceId, serviceName, ephemeral);

        //注册实例
        clientOperationService.registerInstance(service, instance, clientId);
    }

 //InstanceOperatorClientImpl  客户端连接
    private void createIpPortClientIfAbsent(String clientId) {
        if (!clientManager.contains(clientId)) {
            ClientAttributes clientAttributes;
            if (ClientAttributesFilter.threadLocalClientAttributes.get() != null) {
                clientAttributes = ClientAttributesFilter.threadLocalClientAttributes.get();
            } else {
                clientAttributes = new ClientAttributes();
            }
            //客户端连接
            clientManager.clientConnected(clientId, clientAttributes);
        }
    }

//客户端连接 EphemeralIpPortClientManager
@Override
    public boolean clientConnected(final Client client) {
        clients.computeIfAbsent(client.getClientId(), s -> {
            Loggers.SRV_LOG.info("Client connection {} connect", client.getClientId());
            IpPortBasedClient ipPortBasedClient = (IpPortBasedClient) client;
            //服务端健康检查
            ipPortBasedClient.init();
            return ipPortBasedClient;
        });
        return true;
    }

    //IpPortBasedClient
    public void init() {
        //临时
        if (ephemeral) {
            beatCheckTask = new ClientBeatCheckTaskV2(this);
            HealthCheckReactor.scheduleCheck(beatCheckTask);
        } else {
            //永久
            healthCheckTaskV2 = new HealthCheckTaskV2(this);
            HealthCheckReactor.scheduleCheck(healthCheckTaskV2);
        }
    }
  
//ClientBeatCheckTaskV2
    /**
     * 执行健康检查
     */
    @Override
    public void doHealthCheck() {
        try {
            Collection<Service> services = client.getAllPublishedService();
            for (Service each : services) {
                HealthCheckInstancePublishInfo instance = (HealthCheckInstancePublishInfo) client
                        .getInstancePublishInfo(each);
                interceptorChain.doInterceptor(new InstanceBeatCheckTask(client, each, instance));
            }
        } catch (Exception e) {
            Loggers.SRV_LOG.warn("Exception while processing client beat time out.", e);
        }
    }

//InstanceBeatCheckTask
   static {
        //不健康检测
        CHECKERS.add(new UnhealthyInstanceChecker());

        //过期的
        CHECKERS.add(new ExpiredInstanceChecker());
        CHECKERS.addAll(NacosServiceLoader.load(InstanceBeatChecker.class));
    }
    @Override
    public void passIntercept() {
        //过期和不健康都进行检测
        for (InstanceBeatChecker each : CHECKERS) {
            each.doCheck(client, service, instancePublishInfo);
        }
    }

public class IpPortBasedClient extends AbstractClient    
   //AbstractClient 内部维护了一个map进行保存新注册的服务
    protected final ConcurrentHashMap<Service, InstancePublishInfo> publishers = new ConcurrentHashMap<>(16, 0.75f, 1);

//注册的具体逻辑
EphemeralClientOperationServiceImpl
@Override
    public void registerInstance(Service service, Instance instance, String clientId) throws NacosException {
        NamingUtils.checkInstanceIsLegal(instance);
      
        Service singleton = ServiceManager.getInstance().getSingleton(service);
        if (!singleton.isEphemeral()) {
            throw new NacosRuntimeException(NacosException.INVALID_PARAM,
                    String.format("Current service %s is persistent service, can't register ephemeral instance.",
                            singleton.getGroupedServiceName()));
        }
        Client client = clientManager.getClient(clientId);
        if (!clientIsLegal(client, clientId)) {
            return;
        }
        InstancePublishInfo instanceInfo = getPublishInfo(instance);

        //将服务保存到内部维护的map中
        client.addServiceInstance(singleton, instanceInfo);
        client.setLastUpdatedTime();
        client.recalculateRevision();

        //发布事件
        NotifyCenter.publishEvent(new ClientOperationEvent.ClientRegisterServiceEvent(singleton, clientId));
        NotifyCenter
                .publishEvent(new MetadataEvent.InstanceMetadataEvent(singleton, instanceInfo.getMetadataId(), false));
    }

服务端心跳

https://www.cnblogs.com/wtzbk/p/14366240.html

 //方法入口
 @PutMapping("/beat")

//主要入口
int resultCode = getInstanceOperator()
             .handleBeat(namespaceId, serviceName, ip, port, clusterName, clientBeat, builder);
//处理心跳的逻辑
@Override
 public int handleBeat(String namespaceId, String serviceName, String ip, int port, String cluster,
         RsInfo clientBeat, BeatInfoInstanceBuilder builder) throws NacosException {
     Service service = getService(namespaceId, serviceName, true);
     String clientId = IpPortBasedClient.getClientId(ip + InternetAddressUtil.IP_PORT_SPLITER + port, true);
     IpPortBasedClient client = (IpPortBasedClient) clientManager.getClient(clientId);
     if (null == client || !client.getAllPublishedService().contains(service)) {
         if (null == clientBeat) {
             return NamingResponseCode.RESOURCE_NOT_FOUND;
         }
         Instance instance = builder.setBeatInfo(clientBeat).setServiceName(serviceName).build();
         registerInstance(namespaceId, serviceName, instance);
         client = (IpPortBasedClient) clientManager.getClient(clientId);
     }
     if (!ServiceManager.getInstance().containSingleton(service)) {
         throw new NacosException(NacosException.SERVER_ERROR,
                 "service not found: " + serviceName + "@" + namespaceId);
     }
     if (null == clientBeat) {
         clientBeat = new RsInfo();
         clientBeat.setIp(ip);
         clientBeat.setPort(port);
         clientBeat.setCluster(cluster);
         clientBeat.setServiceName(serviceName);
     }

     //定时任务处理
     ClientBeatProcessorV2 beatProcessor = new ClientBeatProcessorV2(namespaceId, clientBeat, client);
     HealthCheckReactor.scheduleNow(beatProcessor);
     client.setLastUpdatedTime();
     return NamingResponseCode.OK;
 }
具体执行:ClientBeatProcessorV2.run();

//更新最后更新时间、如果当前状态是非健康状态则更新为健康状态
@Override
 public void run() {
     if (Loggers.EVT_LOG.isDebugEnabled()) {
         Loggers.EVT_LOG.debug("[CLIENT-BEAT] processing beat: {}", rsInfo.toString());
     }
     String ip = rsInfo.getIp();
     int port = rsInfo.getPort();
     String serviceName = NamingUtils.getServiceName(rsInfo.getServiceName());
     String groupName = NamingUtils.getGroupName(rsInfo.getServiceName());
     Service service = Service.newService(namespace, groupName, serviceName, rsInfo.isEphemeral());
     HealthCheckInstancePublishInfo instance = (HealthCheckInstancePublishInfo) client.getInstancePublishInfo(service);
     //只过滤当前ip和端口
     if (instance.getIp().equals(ip) && instance.getPort() == port) {
         if (Loggers.EVT_LOG.isDebugEnabled()) {
             Loggers.EVT_LOG.debug("[CLIENT-BEAT] refresh beat: {}", rsInfo);
         }
         instance.setLastHeartBeatTime(System.currentTimeMillis());
         //实例非简单 则更新为健康状态
         if (!instance.isHealthy()) {
             instance.setHealthy(true);
             Loggers.EVT_LOG.info("service: {} {POS} {IP-ENABLED} valid: {}:{}@{}, region: {}, msg: client beat ok",
                     rsInfo.getServiceName(), ip, port, rsInfo.getCluster(), UtilsAndCommons.LOCALHOST_SITE);
             //发布服务变更事件 通知订阅者
             NotifyCenter.publishEvent(new ServiceEvent.ServiceChangedEvent(service));
             //发布客户端改变事件  同步到其他server
             NotifyCenter.publishEvent(new ClientEvent.ClientChangedEvent(client));
             //发布健康状态
             NotifyCenter.publishEvent(new HealthStateChangeTraceEvent(System.currentTimeMillis(),
                     service.getNamespace(), service.getGroup(), service.getName(), instance.getIp(),
                     instance.getPort(), true, "client_beat"));
         }
     }
 }

服务端健康检查

通过TCP端口测探方式检测实例是否存活,没有存活则标记为不健康
获取所有已经发布的服务进行健康检查

    //健康检查任务
    HealthCheckTaskV2.run();
    
    @Override
    public void doHealthCheck() {
        try {
            initIfNecessary();

            //发布的所有服务
            for (Service each : client.getAllPublishedService()) {
                //如果服务是健康的
                if (switchDomain.isHealthCheckEnabled(each.getGroupedServiceName())) {
                    InstancePublishInfo instancePublishInfo = client.getInstancePublishInfo(each);
                    ClusterMetadata metadata = getClusterMetadata(each, instancePublishInfo);
                    //执行具体的健康检查
                    ApplicationUtils.getBean(HealthCheckProcessorV2Delegate.class).process(this, each, metadata);
                    if (Loggers.EVT_LOG.isDebugEnabled()) {
                        Loggers.EVT_LOG.debug("[HEALTH-CHECK] schedule health check task: {}", client.getClientId());
                    }
                }
            }
        }
    //调用
    HealthCheckProcessorV2Delegate.process();
    //再调用
    TcpHealthCheckProcessor.process();
    @Override
    public void process(HealthCheckTaskV2 task, Service service, ClusterMetadata metadata) {
        HealthCheckInstancePublishInfo instance = (HealthCheckInstancePublishInfo) task.getClient()
                .getInstancePublishInfo(service);
        if (null == instance) {
            return;
        }
        // TODO handle marked(white list) logic like v1.x.
        if (!instance.tryStartCheck()) {
            SRV_LOG.warn("[HEALTH-CHECK-V2] tcp check started before last one finished, service: {} : {} : {}:{}",
                    service.getGroupedServiceName(), instance.getCluster(), instance.getIp(), instance.getPort());
            healthCheckCommon
                    .reEvaluateCheckRT(task.getCheckRtNormalized() * 2, task, switchDomain.getTcpHealthParams());
            return;
        }
        //放入到队列中
        taskQueue.add(new Beat(task, service, metadata, instance));
        MetricsMonitor.getTcpHealthCheckMonitor().incrementAndGet();
    }

  
   //对队列进行处理
   TcpHealthCheckProcessor.processTask();
   //具体代码
   private void processTask() throws Exception {
        Collection<Callable<Void>> tasks = new LinkedList<>();
        do {
            Beat beat = taskQueue.poll(CONNECT_TIMEOUT_MS / 2, TimeUnit.MILLISECONDS);
            if (beat == null) {
                return;
            }
            //添加处理任务到task中
            tasks.add(new TaskProcessor(beat));
        } while (taskQueue.size() > 0 && tasks.size() < NIO_THREAD_COUNT * 64);
        
        //并行调用 并获得返回结果
        for (Future<?> f : GlobalExecutor.invokeAllTcpSuperSenseTask(tasks)) {
            f.get();
        }
    }

  //TaskProcessor
  参考:https://www.dazhuanlan.com/mrzizai/topics/1120359
  TimeOutTask
  public void run() {
            if (key != null && key.isValid()) {
                SocketChannel channel = (SocketChannel) key.channel();
                Beat beat = (Beat) key.attachment();
                
                // 如果连接成功则什么都不做 否则认为连接失败
                if (channel.isConnected()) {
                    return;
                }
                
                //关闭本次连接
                try {
                    channel.finishConnect();
                } catch (Exception ignore) {
                }
                
                //直接标记为失败即可
                try {
                    beat.finishCheck(false, false, beat.getTask().getCheckRtNormalized() * 2, "tcp:timeout");
                    key.cancel();
                    key.channel().close();
                } catch (Exception ignore) {
                }
            }
        }


    IpPortBasedClient
    /**
     * Init client.
     */
    public void init() {
        //临时
        if (ephemeral) {
            beatCheckTask = new ClientBeatCheckTaskV2(this);
            HealthCheckReactor.scheduleCheck(beatCheckTask);
        } else {
            //永久
            healthCheckTaskV2 = new HealthCheckTaskV2(this);
            HealthCheckReactor.scheduleCheck(healthCheckTaskV2);
        }
    }

  //HealthCheckReactor
  public static void scheduleCheck(HealthCheckTaskV2 task) {
        task.setStartTime(System.currentTimeMillis());
        Runnable wrapperTask = new HealthCheckTaskInterceptWrapper(task);
        GlobalExecutor.scheduleNamingHealth(wrapperTask, task.getCheckRtNormalized(),   
  TimeUnit.MILLISECONDS);
    }

nacos自我保护机制

https://blog.csdn.net/why_2012_gogo/article/details/126077894
自我保护机制(保护阈值0):Nacos为每个服务提供了保护阀值,其值范围在0-1之间,当服务实例健康个数占比(健康实例个数/总实例个数)小于这个保护阀值时,为降低服务雪崩的可能,也会向不健康实例发送请求,虽然牺牲了部分请求,但也起到了均摊流量的作用,避免整体系统无法提供服务的风险,这是值得的,这个阀值可在nacos ui后台手动设定

服务拉取

  1. ServerListManager.start(); 间隔30秒获取一次服务器端的数据
  2. 相当于新注册一个服务上去 最多需要30秒客户端才能调用 配置参数:

配置客户端拉取服务端 列表的时间间隔 便于快速获取服务端的上线 默认值30秒

ribbon:
ServerListRefreshInterval: 10000

       public synchronized void start() throws NacosException {
      
      if (isStarted || isFixed) {
          return;
      }
      
      GetServerListTask getServersTask = new GetServerListTask(addressServerUrl);
      for (int i = 0; i < initServerlistRetryTimes && serverUrls.isEmpty(); ++i) {
          getServersTask.run();
          try {
              this.wait((i + 1) * 100L);
          } catch (Exception e) {
              LOGGER.warn("get serverlist fail,url: {}", addressServerUrl);
          }
      }
      
      if (serverUrls.isEmpty()) {
          LOGGER.error("[init-serverlist] fail to get NACOS-server serverlist! env: {}, url: {}", name,
                  addressServerUrl);
          throw new NacosException(NacosException.SERVER_ERROR,
                  "fail to get NACOS-server serverlist! env:" + name + ", not connnect url:" + addressServerUrl);
      }
      
      // executor schedules the timer task
      this.executorService.scheduleWithFixedDelay(getServersTask, 0L, 30L, TimeUnit.SECONDS);
      isStarted = true;
  }

nacos配置中心

客户端保持长轮训进行拉取,确保实时更新 这个比较简单

//组件自动加载
 配置类:NacosConfigAutoConfiguration
  //1、
  @Bean
  public NacosConfigManager nacosConfigManager(
  		NacosConfigProperties nacosConfigProperties) {
  	return new NacosConfigManager(nacosConfigProperties);
  }
  //2、通过反射进行加载 
   //类:ConfigFactory
   public static ConfigService createConfigService(Properties properties) throws NacosException {
      try {
          Class<?> driverImplClass = Class.forName("com.alibaba.nacos.client.config.NacosConfigService");
          Constructor constructor = driverImplClass.getConstructor(Properties.class);
          ConfigService vendorImpl = (ConfigService) constructor.newInstance(properties);
          return vendorImpl;
      } catch (Throwable e) {
          throw new NacosException(NacosException.CLIENT_INVALID_PARAM, e);
      }
  }
  //3、NacosConfigService的构造函数
   public NacosConfigService(Properties properties) throws NacosException {
      ValidatorUtils.checkInitParam(properties);
      String encodeTmp = properties.getProperty(PropertyKeyConst.ENCODE);
      if (StringUtils.isBlank(encodeTmp)) {
          this.encode = Constants.ENCODE;
      } else {
          this.encode = encodeTmp.trim();
      }
      initNamespace(properties);
      
      this.agent = new MetricsHttpAgent(new ServerHttpAgent(properties));
      this.agent.start();
      
      //初始化clientWorker
      this.worker = new ClientWorker(this.agent, this.configFilterChainManager, properties);
  }
  //4、ClientWorker的方法
   @SuppressWarnings("PMD.ThreadPoolCreationRule")
  public ClientWorker(final HttpAgent agent, final ConfigFilterChainManager configFilterChainManager,
          final Properties properties) {
      this.agent = agent;
      this.configFilterChainManager = configFilterChainManager;
      
      // Initialize the timeout parameter
      
      init(properties);
      
      this.executor = Executors.newScheduledThreadPool(1, new ThreadFactory() {
          @Override
          public Thread newThread(Runnable r) {
              Thread t = new Thread(r);
              t.setName("com.alibaba.nacos.client.Worker." + agent.getName());
              t.setDaemon(true);
              return t;
          }
      });
      
      this.executorService = Executors
              .newScheduledThreadPool(Runtime.getRuntime().availableProcessors(), new ThreadFactory() {
                  @Override
                  public Thread newThread(Runnable r) {
                      Thread t = new Thread(r);
                      t.setName("com.alibaba.nacos.client.Worker.longPolling." + agent.getName());
                      t.setDaemon(true);
                      return t;
                  }
              });
      
      this.executor.scheduleWithFixedDelay(new Runnable() {
          @Override
          public void run() {
              try {
                  //检查配置更新 长轮询方式
                  checkConfigInfo();
              } catch (Throwable e) {
                  LOGGER.error("[" + agent.getName() + "] [sub-check] rotate check error", e);
              }
          }
      }, 1L, 10L, TimeUnit.MILLISECONDS);
  }