Spring Cloud Ribbon的原理-负载均衡器

RestTemplate内部调用负载均衡拦截器,拦截器内最终是调用了负载均衡器来选择服务实例。
接下来撸一撸负载均衡器的内部,看看是如何获取服务实例,获取以后做了哪些处理,处理后又是如何选取服务实例的。
分成三个部分来撸:

①:配置
②:获取服务
③:选择服务

 

配置
在上一篇Spring Cloud Ribbon的原理的配置部分可以看到默认的负载均衡器是ZoneAwareLoadBalancer。

看一看配置类。
位置:org.springframework.cloud.netflix.ribbon.RibbonClientConfiguration

 1 public class RibbonClientConfiguration {
 2 
 3     // ...略
 4     @Bean
 5     @ConditionalOnMissingBean
 6     public ILoadBalancer ribbonLoadBalancer(IClientConfig config,
 7             ServerList<Server> serverList, ServerListFilter<Server> serverListFilter,
 8             IRule rule, IPing ping, ServerListUpdater serverListUpdater) {
 9         if (this.propertiesFactory.isSet(ILoadBalancer.class, name)) {
10             return this.propertiesFactory.get(ILoadBalancer.class, config, name);
11         }
12         return new ZoneAwareLoadBalancer<>(config, rule, ping, serverList,
13                 serverListFilter, serverListUpdater);
14     }
15     
16     // ...略
17 }

在实例化ZoneAwareLoadBalancer的时候注入了,config、rule、ping、serverList、serverListFilter、serverListUpdater实例。
config:配置实例。
rule:负载均衡策略实例。
ping:ping实例。
serverList:获取和更新服务的实例。
serverListFilter:服务过滤实例。
serverListUpdater:服务列表信息更新实例。

  1 public class RibbonClientConfiguration {
  2 
  3     /**
  4      * Ribbon client default connect timeout.
  5      */
  6     public static final int DEFAULT_CONNECT_TIMEOUT = 1000;
  7 
  8     /**
  9      * Ribbon client default read timeout.
 10      */
 11     public static final int DEFAULT_READ_TIMEOUT = 1000;
 12 
 13     /**
 14      * Ribbon client default Gzip Payload flag.
 15      */
 16     public static final boolean DEFAULT_GZIP_PAYLOAD = true;
 17 
 18     @RibbonClientName
 19     private String name = "client";
 20 
 21     // TODO: maybe re-instate autowired load balancers: identified by name they could be
 22     // associated with ribbon clients
 23 
 24     @Autowired
 25     private PropertiesFactory propertiesFactory;
 26 
 27     @Bean
 28     @ConditionalOnMissingBean
 29     public IClientConfig ribbonClientConfig() {
 30         DefaultClientConfigImpl config = new DefaultClientConfigImpl();
 31         config.loadProperties(this.name);
 32         config.set(CommonClientConfigKey.ConnectTimeout, DEFAULT_CONNECT_TIMEOUT);
 33         config.set(CommonClientConfigKey.ReadTimeout, DEFAULT_READ_TIMEOUT);
 34         config.set(CommonClientConfigKey.GZipPayload, DEFAULT_GZIP_PAYLOAD);
 35         return config;
 36     }
 37 
 38     @Bean
 39     @ConditionalOnMissingBean
 40     public IRule ribbonRule(IClientConfig config) {
 41         if (this.propertiesFactory.isSet(IRule.class, name)) {
 42             return this.propertiesFactory.get(IRule.class, config, name);
 43         }
 44         ZoneAvoidanceRule rule = new ZoneAvoidanceRule();
 45         rule.initWithNiwsConfig(config);
 46         return rule;
 47     }
 48 
 49     @Bean
 50     @ConditionalOnMissingBean
 51     public IPing ribbonPing(IClientConfig config) {
 52         if (this.propertiesFactory.isSet(IPing.class, name)) {
 53             return this.propertiesFactory.get(IPing.class, config, name);
 54         }
 55         return new DummyPing();
 56     }
 57 
 58     @Bean
 59     @ConditionalOnMissingBean
 60     @SuppressWarnings("unchecked")
 61     public ServerList<Server> ribbonServerList(IClientConfig config) {
 62         if (this.propertiesFactory.isSet(ServerList.class, name)) {
 63             return this.propertiesFactory.get(ServerList.class, config, name);
 64         }
 65         ConfigurationBasedServerList serverList = new ConfigurationBasedServerList();
 66         serverList.initWithNiwsConfig(config);
 67         return serverList;
 68     }
 69 
 70     @Bean
 71     @ConditionalOnMissingBean
 72     public ServerListUpdater ribbonServerListUpdater(IClientConfig config) {
 73         return new PollingServerListUpdater(config);
 74     }
 75 
 76     @Bean
 77     @ConditionalOnMissingBean
 78     public ILoadBalancer ribbonLoadBalancer(IClientConfig config,
 79             ServerList<Server> serverList, ServerListFilter<Server> serverListFilter,
 80             IRule rule, IPing ping, ServerListUpdater serverListUpdater) {
 81         if (this.propertiesFactory.isSet(ILoadBalancer.class, name)) {
 82             return this.propertiesFactory.get(ILoadBalancer.class, config, name);
 83         }
 84         return new ZoneAwareLoadBalancer<>(config, rule, ping, serverList,
 85                 serverListFilter, serverListUpdater);
 86     }
 87 
 88     @Bean
 89     @ConditionalOnMissingBean
 90     @SuppressWarnings("unchecked")
 91     public ServerListFilter<Server> ribbonServerListFilter(IClientConfig config) {
 92         if (this.propertiesFactory.isSet(ServerListFilter.class, name)) {
 93             return this.propertiesFactory.get(ServerListFilter.class, config, name);
 94         }
 95         ZonePreferenceServerListFilter filter = new ZonePreferenceServerListFilter();
 96         filter.initWithNiwsConfig(config);
 97         return filter;
 98     }
 99 
100     @Bean
101     @ConditionalOnMissingBean
102     public RibbonLoadBalancerContext ribbonLoadBalancerContext(ILoadBalancer loadBalancer,
103             IClientConfig config, RetryHandler retryHandler) {
104         return new RibbonLoadBalancerContext(loadBalancer, config, retryHandler);
105     }
106 
107     @Bean
108     @ConditionalOnMissingBean
109     public RetryHandler retryHandler(IClientConfig config) {
110         return new DefaultLoadBalancerRetryHandler(config);
111     }
112 
113     @Bean
114     @ConditionalOnMissingBean
115     public ServerIntrospector serverIntrospector() {
116         return new DefaultServerIntrospector();
117     }
118 
119     @PostConstruct
120     public void preprocess() {
121         setRibbonProperty(name, DeploymentContextBasedVipAddresses.key(), name);
122     }
123 
124     static class OverrideRestClient extends RestClient {
125 
126         private IClientConfig config;
127 
128         private ServerIntrospector serverIntrospector;
129 
130         protected OverrideRestClient(IClientConfig config,
131                 ServerIntrospector serverIntrospector) {
132             super();
133             this.config = config;
134             this.serverIntrospector = serverIntrospector;
135             initWithNiwsConfig(this.config);
136         }
137 
138         @Override
139         public URI reconstructURIWithServer(Server server, URI original) {
140             URI uri = updateToSecureConnectionIfNeeded(original, this.config,
141                     this.serverIntrospector, server);
142             return super.reconstructURIWithServer(server, uri);
143         }
144 
145         @Override
146         protected Client apacheHttpClientSpecificInitialization() {
147             ApacheHttpClient4 apache = (ApacheHttpClient4) super.apacheHttpClientSpecificInitialization();
148             apache.getClientHandler().getHttpClient().getParams().setParameter(
149                     ClientPNames.COOKIE_POLICY, CookiePolicy.IGNORE_COOKIES);
150             return apache;
151         }
152 
153     }
154 
155 }

在这里配置相关的实例

config:DefaultClientConfigImpl。
rule:ZoneAvoidanceRule。
ping:DummyPing。
serverList:ConfigurationBasedServerList,基于配置的服务列表实例。
serverListFilter:ZonePreferenceServerListFilter。
serverListUpdater:PollingServerListUpdater。
要注意的是,在这里serverList的实例是ConfigurationBasedServerList,这是在未使用Nacos时获取服务信息的实例,是从配置文件中获取。
那么在和Nacos配合使用时,需要从Nacos Server获取服务信息,那该是哪个实例来做这件事情呢。
在启用Nacos服务发现时,会首先会采用NacosRibbonClientConfiguration配置类。
位置:com.alibaba.cloud.nacos.ribbon.NacosRibbonClientConfiguration

 1 @Configuration(proxyBeanMethods = false)
 2 @ConditionalOnRibbonNacos
 3 public class NacosRibbonClientConfiguration {
 4 
 5     @Autowired
 6     private PropertiesFactory propertiesFactory;
 7 
 8     @Bean
 9     @ConditionalOnMissingBean
10     public ServerList<?> ribbonServerList(IClientConfig config,
11             NacosDiscoveryProperties nacosDiscoveryProperties) {
12         if (this.propertiesFactory.isSet(ServerList.class, config.getClientName())) {
13             ServerList serverList = this.propertiesFactory.get(ServerList.class, config,
14                     config.getClientName());
15             return serverList;
16         }
17         NacosServerList serverList = new NacosServerList(nacosDiscoveryProperties);
18         serverList.initWithNiwsConfig(config);
19         return serverList;
20     }
21 
22     @Bean
23     @ConditionalOnMissingBean
24     public NacosServerIntrospector nacosServerIntrospector() {
25         return new NacosServerIntrospector();
26     }
27 
28 }

获取服务

在找到获取服务信息入口前,先把负载均衡器的类继承关系撸一下。 

 

在ZoneAwareLoadBalancer的构造中调用了父类DynamicServerListLoadBalancer构造。

在DynamicServerListLoadBalancer的构造中,调用了restOfInit函数。
DynamicServerListLoadBalancer.class

 1     void restOfInit(IClientConfig clientConfig) {
 2         boolean primeConnection = this.isEnablePrimingConnections();
 3         // turn this off to avoid duplicated asynchronous priming done in BaseLoadBalancer.setServerList()
 4         this.setEnablePrimingConnections(false);
 5         enableAndInitLearnNewServersFeature();
 6 
 7         updateListOfServers();
 8         if (primeConnection && this.getPrimeConnections() != null) {
 9             this.getPrimeConnections()
10                     .primeConnections(getReachableServers());
11         }
12         this.setEnablePrimingConnections(primeConnection);
13         LOGGER.info("DynamicServerListLoadBalancer for client {} initialized: {}", clientConfig.getClientName(), this.toString());
14     }
15     

先是通过调用enableAndInitLearnNewServersFeature方法启动定时更新服务列表,然后立即调用updateListOfServers函数马上获取并更新服务列表信息。
先看下enableAndInitLearnNewServersFeature方法,实际上是调用了服务列表信息更新实例的start方法启动定时更新功能。

1     /**
2      * Feature that lets us add new instances (from AMIs) to the list of
3      * existing servers that the LB will use Call this method if you want this
4      * feature enabled
5      */
6     public void enableAndInitLearnNewServersFeature() {
7         LOGGER.info("Using serverListUpdater {}", serverListUpdater.getClass().getSimpleName());
8         serverListUpdater.start(updateAction);
9     }

这里的服务列表信息更新实例就是配置阶段配置的PollingServerListUpdater实例,看一下这个类的构造和start方法。

  1 public class PollingServerListUpdater implements ServerListUpdater {
  2 
  3     private static final Logger logger = LoggerFactory.getLogger(PollingServerListUpdater.class);
  4 
  5     private static long LISTOFSERVERS_CACHE_UPDATE_DELAY = 1000; // msecs;
  6     private static int LISTOFSERVERS_CACHE_REPEAT_INTERVAL = 30 * 1000; // msecs;
  7 
  8     private static class LazyHolder {
  9         private final static String CORE_THREAD = "DynamicServerListLoadBalancer.ThreadPoolSize";
 10         private final static DynamicIntProperty poolSizeProp = new DynamicIntProperty(CORE_THREAD, 2);
 11         private static Thread _shutdownThread;
 12 
 13         static ScheduledThreadPoolExecutor _serverListRefreshExecutor = null;
 14 
 15         static {
 16             int coreSize = poolSizeProp.get();
 17             ThreadFactory factory = (new ThreadFactoryBuilder())
 18                     .setNameFormat("PollingServerListUpdater-%d")
 19                     .setDaemon(true)
 20                     .build();
 21             _serverListRefreshExecutor = new ScheduledThreadPoolExecutor(coreSize, factory);
 22             poolSizeProp.addCallback(new Runnable() {
 23                 @Override
 24                 public void run() {
 25                     _serverListRefreshExecutor.setCorePoolSize(poolSizeProp.get());
 26                 }
 27 
 28             });
 29             _shutdownThread = new Thread(new Runnable() {
 30                 public void run() {
 31                     logger.info("Shutting down the Executor Pool for PollingServerListUpdater");
 32                     shutdownExecutorPool();
 33                 }
 34             });
 35             Runtime.getRuntime().addShutdownHook(_shutdownThread);
 36         }
 37 
 38         private static void shutdownExecutorPool() {
 39             if (_serverListRefreshExecutor != null) {
 40                 _serverListRefreshExecutor.shutdown();
 41 
 42                 if (_shutdownThread != null) {
 43                     try {
 44                         Runtime.getRuntime().removeShutdownHook(_shutdownThread);
 45                     } catch (IllegalStateException ise) { // NOPMD
 46                         // this can happen if we're in the middle of a real
 47                         // shutdown,
 48                         // and that's 'ok'
 49                     }
 50                 }
 51 
 52             }
 53         }
 54     }
 55 
 56     private static ScheduledThreadPoolExecutor getRefreshExecutor() {
 57         return LazyHolder._serverListRefreshExecutor;
 58     }
 59 
 60 
 61     private final AtomicBoolean isActive = new AtomicBoolean(false);
 62     private volatile long lastUpdated = System.currentTimeMillis();
 63     private final long initialDelayMs;
 64     private final long refreshIntervalMs;
 65 
 66     private volatile ScheduledFuture<?> scheduledFuture;
 67 
 68     public PollingServerListUpdater() {
 69         this(LISTOFSERVERS_CACHE_UPDATE_DELAY, LISTOFSERVERS_CACHE_REPEAT_INTERVAL);
 70     }
 71 
 72     public PollingServerListUpdater(IClientConfig clientConfig) {
 73         this(LISTOFSERVERS_CACHE_UPDATE_DELAY, getRefreshIntervalMs(clientConfig));
 74     }
 75 
 76     public PollingServerListUpdater(final long initialDelayMs, final long refreshIntervalMs) {
 77         this.initialDelayMs = initialDelayMs;
 78         this.refreshIntervalMs = refreshIntervalMs;
 79     }
 80 
 81     @Override
 82     public synchronized void start(final UpdateAction updateAction) {
 83         if (isActive.compareAndSet(false, true)) {
 84             final Runnable wrapperRunnable = new Runnable() {
 85                 @Override
 86                 public void run() {
 87                     if (!isActive.get()) {
 88                         if (scheduledFuture != null) {
 89                             scheduledFuture.cancel(true);
 90                         }
 91                         return;
 92                     }
 93                     try {
 94                         updateAction.doUpdate();
 95                         lastUpdated = System.currentTimeMillis();
 96                     } catch (Exception e) {
 97                         logger.warn("Failed one update cycle", e);
 98                     }
 99                 }
100             };
101 
102             scheduledFuture = getRefreshExecutor().scheduleWithFixedDelay(
103                     wrapperRunnable,
104                     initialDelayMs,
105                     refreshIntervalMs,
106                     TimeUnit.MILLISECONDS
107             );
108         } else {
109             logger.info("Already active, no-op");
110         }
111     }
112 
113     @Override
114     public synchronized void stop() {
115         if (isActive.compareAndSet(true, false)) {
116             if (scheduledFuture != null) {
117                 scheduledFuture.cancel(true);
118             }
119         } else {
120             logger.info("Not active, no-op");
121         }
122     }
123 
124     @Override
125     public String getLastUpdate() {
126         return new Date(lastUpdated).toString();
127     }
128 
129     @Override
130     public long getDurationSinceLastUpdateMs() {
131         return System.currentTimeMillis() - lastUpdated;
132     }
133 
134     @Override
135     public int getNumberMissedCycles() {
136         if (!isActive.get()) {
137             return 0;
138         }
139         return (int) ((int) (System.currentTimeMillis() - lastUpdated) / refreshIntervalMs);
140     }
141 
142     @Override
143     public int getCoreThreads() {
144         if (isActive.get()) {
145             if (getRefreshExecutor() != null) {
146                 return getRefreshExecutor().getCorePoolSize();
147             }
148         }
149         return 0;
150     }
151 
152     private static long getRefreshIntervalMs(IClientConfig clientConfig) {
153         return clientConfig.get(CommonClientConfigKey.ServerListRefreshInterval, LISTOFSERVERS_CACHE_REPEAT_INTERVAL);
154     }
155 }

从构造和常量定义看出来,延迟一秒执行,默认每隔30秒执行更新,可以通过配置修改间隔更新的时间。
从start方法看,就是开了一个定时执行的schedule,定时执行 updateAction.doUpdate()。
回到start方法调用方DynamicServerListLoadBalancer类中看一下UpdateAction实例的定义。

1     protected final ServerListUpdater.UpdateAction updateAction = new ServerListUpdater.UpdateAction() {
2         @Override
3         public void doUpdate() {
4             updateListOfServers();
5         }
6     };

实际上就是调用了DynamicServerListLoadBalancer类的updateListOfServers方法,这跟启动完定时更新后立即更新服务信息列表的路径是一致的。
继续看updateListOfServers方法。

 1     public void updateListOfServers() {
 2         List<T> servers = new ArrayList<T>();
 3         if (serverListImpl != null) {
 4             servers = serverListImpl.getUpdatedListOfServers();
 5             LOGGER.debug("List of Servers for {} obtained from Discovery client: {}",
 6                     getIdentifier(), servers);
 7 
 8             if (filter != null) {
 9                 servers = filter.getFilteredListOfServers(servers);
10                 LOGGER.debug("Filtered List of Servers for {} obtained from Discovery client: {}",
11                         getIdentifier(), servers);
12             }
13         }
14         updateAllServerList(servers);
15     }

1.通过ServerList实例获取服务信息列表。
2.通过ServerListFilter 实例对获取到的服务信息列表进行过滤。
3.将过滤后的服务信息列表保存到LoadBalancerStats中作为状态保持。

接下分别看一下。
1.通过ServerList实例获取服务信息列表。

public class NacosServerList extends AbstractServerList<NacosServer> {

    private NacosDiscoveryProperties discoveryProperties;

    private String serviceId;

    public NacosServerList(NacosDiscoveryProperties discoveryProperties) {
        this.discoveryProperties = discoveryProperties;
    }

    @Override
    public List<NacosServer> getInitialListOfServers() {
        return getServers();
    }

    @Override
    public List<NacosServer> getUpdatedListOfServers() {
        return getServers();
    }

    private List<NacosServer> getServers() {
        try {
            String group = discoveryProperties.getGroup();
            List<Instance> instances = discoveryProperties.namingServiceInstance()
                    .selectInstances(serviceId, group, true);
            return instancesToServerList(instances);
        }
        catch (Exception e) {
            throw new IllegalStateException(
                    "Can not get service instances from nacos, serviceId=" + serviceId,
                    e);
        }
    }

    private List<NacosServer> instancesToServerList(List<Instance> instances) {
        List<NacosServer> result = new ArrayList<>();
        if (CollectionUtils.isEmpty(instances)) {
            return result;
        }
        for (Instance instance : instances) {
            result.add(new NacosServer(instance));
        }

        return result;
    }

    public String getServiceId() {
        return serviceId;
    }

    @Override
    public void initWithNiwsConfig(IClientConfig iClientConfig) {
        this.serviceId = iClientConfig.getClientName();
    }

}

2.通过ServerListFilter实例对获取到的服务信息列表进行过滤。

serverListFilte实例就是配置阶段生成的ZonePreferenceServerListFilter,通过调用该实例的getFilteredListOfServers方法进行过滤。

public class ZonePreferenceServerListFilter extends ZoneAffinityServerListFilter<Server> {

    private String zone;

    @Override
    public void initWithNiwsConfig(IClientConfig niwsClientConfig) {
        super.initWithNiwsConfig(niwsClientConfig);
        if (ConfigurationManager.getDeploymentContext() != null) {
            this.zone = ConfigurationManager.getDeploymentContext()
                    .getValue(ContextKey.zone);
        }
    }

    @Override
    public List<Server> getFilteredListOfServers(List<Server> servers) {
        List<Server> output = super.getFilteredListOfServers(servers);
        if (this.zone != null && output.size() == servers.size()) {
            List<Server> local = new ArrayList<>();
            for (Server server : output) {
                if (this.zone.equalsIgnoreCase(server.getZone())) {
                    local.add(server);
                }
            }
            if (!local.isEmpty()) {
                return local;
            }
        }
        return output;
    }

    public String getZone() {
        return zone;
    }

    public void setZone(String zone) {
        this.zone = zone;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        ZonePreferenceServerListFilter that = (ZonePreferenceServerListFilter) o;
        return Objects.equals(zone, that.zone);
    }

    @Override
    public int hashCode() {
        return Objects.hash(zone);
    }

    @Override
    public String toString() {
        return new StringBuilder("ZonePreferenceServerListFilter{").append("zone='")
                .append(zone).append("'").append("}").toString();
    }

}

在getFilteredListOfServers方法里面,一上来是调用父类的同名方法先过滤,其实父类也是把和消费端同区域的服务给过滤出来使用,不仅如此,增加了些智能的判定,保证在故障/负载较高时或者可用实例较少时不进行同区域的过滤。
但是在ZonePreferenceServerListFilter.getFilteredListOfServers这里,就算父类没做过过滤,这里依然要把同zone的服务给滤出来使用,谁叫这里的类是ZonePreference的呢。
这是比较怪异的地方,感觉父类的智能判定没什么作用。
还是看看ZoneAffinityServerListFilter.getFilteredListOfServers做的辛苦工作吧。

 1 public class ZoneAffinityServerListFilter<T extends Server> extends
 2         AbstractServerListFilter<T> implements IClientConfigAware {
 3 
 4     private volatile boolean zoneAffinity = DefaultClientConfigImpl.DEFAULT_ENABLE_ZONE_AFFINITY;
 5     private volatile boolean zoneExclusive = DefaultClientConfigImpl.DEFAULT_ENABLE_ZONE_EXCLUSIVITY;
 6     private DynamicDoubleProperty activeReqeustsPerServerThreshold;
 7     private DynamicDoubleProperty blackOutServerPercentageThreshold;
 8     private DynamicIntProperty availableServersThreshold;
 9     private Counter overrideCounter;
10     private ZoneAffinityPredicate zoneAffinityPredicate = new ZoneAffinityPredicate();
11     
12     private static Logger logger = LoggerFactory.getLogger(ZoneAffinityServerListFilter.class);
13     
14     String zone;
15         
16     public ZoneAffinityServerListFilter() {      
17     }
18     
19     public ZoneAffinityServerListFilter(IClientConfig niwsClientConfig) {
20         initWithNiwsConfig(niwsClientConfig);
21     }
22     
23     @Override
24     public void initWithNiwsConfig(IClientConfig niwsClientConfig) {
25         String sZoneAffinity = "" + niwsClientConfig.getProperty(CommonClientConfigKey.EnableZoneAffinity, false);
26         if (sZoneAffinity != null){
27             zoneAffinity = Boolean.parseBoolean(sZoneAffinity);
28             logger.debug("ZoneAffinity is set to {}", zoneAffinity);
29         }
30         String sZoneExclusive = "" + niwsClientConfig.getProperty(CommonClientConfigKey.EnableZoneExclusivity, false);
31         if (sZoneExclusive != null){
32             zoneExclusive = Boolean.parseBoolean(sZoneExclusive);
33         }
34         if (ConfigurationManager.getDeploymentContext() != null) {
35             zone = ConfigurationManager.getDeploymentContext().getValue(ContextKey.zone);
36         }
37         activeReqeustsPerServerThreshold = DynamicPropertyFactory.getInstance().getDoubleProperty(niwsClientConfig.getClientName() + "." + niwsClientConfig.getNameSpace() + ".zoneAffinity.maxLoadPerServer", 0.6d);
38         logger.debug("activeReqeustsPerServerThreshold: {}", activeReqeustsPerServerThreshold.get());
39         blackOutServerPercentageThreshold = DynamicPropertyFactory.getInstance().getDoubleProperty(niwsClientConfig.getClientName() + "." + niwsClientConfig.getNameSpace() + ".zoneAffinity.maxBlackOutServesrPercentage", 0.8d);
40         logger.debug("blackOutServerPercentageThreshold: {}", blackOutServerPercentageThreshold.get());
41         availableServersThreshold = DynamicPropertyFactory.getInstance().getIntProperty(niwsClientConfig.getClientName() + "." + niwsClientConfig.getNameSpace() + ".zoneAffinity.minAvailableServers", 2);
42         logger.debug("availableServersThreshold: {}", availableServersThreshold.get());
43         overrideCounter = Monitors.newCounter("ZoneAffinity_OverrideCounter");
44 
45         Monitors.registerObject("NIWSServerListFilter_" + niwsClientConfig.getClientName());
46     }
47     
48     private boolean shouldEnableZoneAffinity(List<T> filtered) {    
49         if (!zoneAffinity && !zoneExclusive) {
50             return false;
51         }
52         if (zoneExclusive) {
53             return true;
54         }
55         LoadBalancerStats stats = getLoadBalancerStats();
56         if (stats == null) {
57             return zoneAffinity;
58         } else {
59             logger.debug("Determining if zone affinity should be enabled with given server list: {}", filtered);
60             ZoneSnapshot snapshot = stats.getZoneSnapshot(filtered);
61             double loadPerServer = snapshot.getLoadPerServer();
62             int instanceCount = snapshot.getInstanceCount();            
63             int circuitBreakerTrippedCount = snapshot.getCircuitTrippedCount();
64             if (((double) circuitBreakerTrippedCount) / instanceCount >= blackOutServerPercentageThreshold.get() 
65                     || loadPerServer >= activeReqeustsPerServerThreshold.get()
66                     || (instanceCount - circuitBreakerTrippedCount) < availableServersThreshold.get()) {
67                 logger.debug("zoneAffinity is overriden. blackOutServerPercentage: {}, activeReqeustsPerServer: {}, availableServers: {}", 
68                         new Object[] {(double) circuitBreakerTrippedCount / instanceCount,  loadPerServer, instanceCount - circuitBreakerTrippedCount});
69                 return false;
70             } else {
71                 return true;
72             }
73             
74         }
75     }
76         
77     @Override
78     public List<T> getFilteredListOfServers(List<T> servers) {
79         if (zone != null && (zoneAffinity || zoneExclusive) && servers !=null && servers.size() > 0){
80             List<T> filteredServers = Lists.newArrayList(Iterables.filter(
81                     servers, this.zoneAffinityPredicate.getServerOnlyPredicate()));
82             if (shouldEnableZoneAffinity(filteredServers)) {
83                 return filteredServers;
84             } else if (zoneAffinity) {
85                 overrideCounter.increment();
86             }
87         }
88         return servers;
89     }
90 
91     @Override
92     public String toString(){
93         StringBuilder sb = new StringBuilder("ZoneAffinityServerListFilter:");
94         sb.append(", zone: ").append(zone).append(", zoneAffinity:").append(zoneAffinity);
95         sb.append(", zoneExclusivity:").append(zoneExclusive);
96         return sb.toString();       
97     }
98 }

首先会将与消费端相同的zone的服务过滤出来,然后通过shouldEnableZoneAffinity(filteredServers)来判定是否可以采纳同zone的服务,还是采用所有的服务。
在shouldEnableZoneAffinity方法内,对相同zone的服务做了一次snapshot,获取这些服务的实例数量,平均负载,断路的实例数进行计算判定。
可以看一下initWithNiwsConfig方法中关键指标的值。

判定条件:
断路实例百分比>=0.8(断路的实例数/服务的实例数量)
平均负载>=0.6
可用实例数<2(实例数量-断路的实例数)
如果达到判定条件,那么就使用全部的服务,保证可用性。
但,上面也说了,因为ZonePreferenceServerListFilter本身总是会选用和消费端zone一致的服务,所以ZoneAffinityServerListFilter.getFilteredListOfServers中做的智能操作并没什么用。
不过,当然可以通过自定义配置来采用ZoneAffinityServerListFilter实例。

3.将过滤后的服务信息列表保存到LoadBalancerStats中作为状态保持。

跟进updateAllServerList(servers);去,一步步深入,会发现,实际上是保存到LoadBalancerStats中,并且这时候的服务是按照zone分组以HashMap<String, List<Server>>结构保存的,key是zone。

选择服务
实现了ILoadBalancer接口的负载均衡器,是通过实现chooseServer方法来进行服务的选择,选择后的服务做为目标请求服务。
看一下ZoneAwareLoadBalancer.chooseServer方法。

 1     @Override
 2     public Server chooseServer(Object key) {
 3         if (!ENABLED.get() || getLoadBalancerStats().getAvailableZones().size() <= 1) {
 4             logger.debug("Zone aware logic disabled or there is only one zone");
 5             return super.chooseServer(key);
 6         }
 7         Server server = null;
 8         try {
 9             LoadBalancerStats lbStats = getLoadBalancerStats();
10             Map<String, ZoneSnapshot> zoneSnapshot = ZoneAvoidanceRule.createSnapshot(lbStats);
11             logger.debug("Zone snapshots: {}", zoneSnapshot);
12             if (triggeringLoad == null) {
13                 triggeringLoad = DynamicPropertyFactory.getInstance().getDoubleProperty(
14                         "ZoneAwareNIWSDiscoveryLoadBalancer." + this.getName() + ".triggeringLoadPerServerThreshold", 0.2d);
15             }
16 
17             if (triggeringBlackoutPercentage == null) {
18                 triggeringBlackoutPercentage = DynamicPropertyFactory.getInstance().getDoubleProperty(
19                         "ZoneAwareNIWSDiscoveryLoadBalancer." + this.getName() + ".avoidZoneWithBlackoutPercetage", 0.99999d);
20             }
21             Set<String> availableZones = ZoneAvoidanceRule.getAvailableZones(zoneSnapshot, triggeringLoad.get(), triggeringBlackoutPercentage.get());
22             logger.debug("Available zones: {}", availableZones);
23             if (availableZones != null &&  availableZones.size() < zoneSnapshot.keySet().size()) {
24                 String zone = ZoneAvoidanceRule.randomChooseZone(zoneSnapshot, availableZones);
25                 logger.debug("Zone chosen: {}", zone);
26                 if (zone != null) {
27                     BaseLoadBalancer zoneLoadBalancer = getLoadBalancer(zone);
28                     server = zoneLoadBalancer.chooseServer(key);
29                 }
30             }
31         } catch (Exception e) {
32             logger.error("Error choosing server using zone aware logic for load balancer={}", name, e);
33         }
34         if (server != null) {
35             return server;
36         } else {
37             logger.debug("Zone avoidance logic is not invoked.");
38             return super.chooseServer(key);
39         }
40     }
41      

注意这里有两种用法:

1.通过配置ZoneAwareNIWSDiscoveryLoadBalancer.enabled=false关闭区域感知负载均衡,或者zone的个数<=1个。

2.采用区域感知,或者zone的个数>1。

 

一个个来看一下

1.通过配置ZoneAwareNIWSDiscoveryLoadBalancer.enabled=false关闭区域感知负载均衡,或者zone的个数<=1个。

这种情况下,调用了父类BaseLoadBalancer.chooseServer方法。

 1     public Server chooseServer(Object key) {
 2         if (counter == null) {
 3             counter = createCounter();
 4         }
 5         counter.increment();
 6         if (rule == null) {
 7             return null;
 8         } else {
 9             try {
10                 return rule.choose(key);
11             } catch (Exception e) {
12                 logger.warn("LoadBalancer [{}]:  Error choosing server for key {}", name, key, e);
13                 return null;
14             }
15         }
16     }

这里使用的负载均衡策略rule实际上就是构造ZoneAwareLoadBalancer时传进来的,在配置阶段生成的ZoneAvoidanceRule策略实例。

    public void setRule(IRule rule) {
        if (rule != null) {
            this.rule = rule;
        } else {
            /* default rule */
            this.rule = new RoundRobinRule();
        }
        if (this.rule.getLoadBalancer() != this) {
            this.rule.setLoadBalancer(this);
        }
    }

 默认使用RoundRobinRule规则

2.采用区域感知,或者zone的个数>1。

 1     @Override
 2     public Server chooseServer(Object key) {
 3         if (!ENABLED.get() || getLoadBalancerStats().getAvailableZones().size() <= 1) {
 4             logger.debug("Zone aware logic disabled or there is only one zone");
 5             return super.chooseServer(key);
 6         }
 7         Server server = null;
 8         try {
 9             LoadBalancerStats lbStats = getLoadBalancerStats();
10             Map<String, ZoneSnapshot> zoneSnapshot = ZoneAvoidanceRule.createSnapshot(lbStats);
11             logger.debug("Zone snapshots: {}", zoneSnapshot);
12             if (triggeringLoad == null) {
13                 triggeringLoad = DynamicPropertyFactory.getInstance().getDoubleProperty(
14                         "ZoneAwareNIWSDiscoveryLoadBalancer." + this.getName() + ".triggeringLoadPerServerThreshold", 0.2d);
15             }
16 
17             if (triggeringBlackoutPercentage == null) {
18                 triggeringBlackoutPercentage = DynamicPropertyFactory.getInstance().getDoubleProperty(
19                         "ZoneAwareNIWSDiscoveryLoadBalancer." + this.getName() + ".avoidZoneWithBlackoutPercetage", 0.99999d);
20             }
21             Set<String> availableZones = ZoneAvoidanceRule.getAvailableZones(zoneSnapshot, triggeringLoad.get(), triggeringBlackoutPercentage.get());
22             logger.debug("Available zones: {}", availableZones);
23             if (availableZones != null &&  availableZones.size() < zoneSnapshot.keySet().size()) {
24                 String zone = ZoneAvoidanceRule.randomChooseZone(zoneSnapshot, availableZones);
25                 logger.debug("Zone chosen: {}", zone);
26                 if (zone != null) {
27                     BaseLoadBalancer zoneLoadBalancer = getLoadBalancer(zone);
28                     server = zoneLoadBalancer.chooseServer(key);
29                 }
30             }
31         } catch (Exception e) {
32             logger.error("Error choosing server using zone aware logic for load balancer={}", name, e);
33         }
34         if (server != null) {
35             return server;
36         } else {
37             logger.debug("Zone avoidance logic is not invoked.");
38             return super.chooseServer(key);
39         }
40     }
41      

在这种情况下默认使用ZoneAvoidanceRule负载均衡策略。
获取zone的snapshot信息。
获取可用的zone,通过观察ZoneAvoidanceRule.getAvailableZones定义,不是可用zone的条件是:
所属实例数==0。
故障率>0.99999或者平均负载<0。
如果不是上面两种情况,就选择负载最高的一个去除不作为可用的zone。
可用zone都获取后,随机选一个。

并从该zone中,通过ZoneAwareLoadBalancer的父类BaseLoadBalancer.chooseServer选取服务,上面整理过,BaseLoadBalancer里如果没有传入rule,那么默认使用RoundRobinRule策略轮寻一个服务。
其实,还是上面获取服务中ZonePreferenceServerListFilter过滤器的问题,实际上过滤出来的只有一个和消费端相同的一个zone的服务,所以第2.部分的从可用zone中选取服务的功能是走不到,要走到就得把过滤器给换掉。

总结:
配置的负载均衡器会启动schedule获取服务信息,在使用了Eureka客户端时,会从Eureka服务获取所有服务实例信息,通过过滤器过滤出可以使用的服务,过滤器默认只过滤出与消费端相同zone的服务,如果要保证高可用可配置ZoneAffinityServerListFilter过滤器,过滤后的服务列表,通过实现了IRule接口的负载均衡策略选取对应的服务,如果是使用zone感知的策略,可以从负载情况良好的zone中选取合适的服务。

 

posted @ 2021-01-14 11:48  郭慕荣  阅读(416)  评论(0编辑  收藏  举报