Netflix中的负载均衡策略
Spring Cloud的负载均衡策略可以通过配置Ribbon搞定,也就是注入实现com.netflix.loadbalancer.IRule的类,当前包含的策略包括
1.RandomRule 随机策略 在while循环内,如果服务地址不为空会不停的循环直到随机出一个可用的服务。
@SuppressWarnings({"RCN_REDUNDANT_NULLCHECK_OF_NULL_VALUE"}) public Server choose(ILoadBalancer lb, Object key) { if (lb == null) { return null; } else { Server server = null; while(server == null) { if (Thread.interrupted()) { return null; } List<Server> upList = lb.getReachableServers(); List<Server> allList = lb.getAllServers(); int serverCount = allList.size(); if (serverCount == 0) { return null; } int index = this.rand.nextInt(serverCount); server = (Server)upList.get(index); if (server == null) { Thread.yield(); } else { if (server.isAlive()) { return server; } server = null; Thread.yield(); } } return server; } }
不过感觉怎么第一个就有坑呢。。upList表示当前可用的服务实例集合,这个集合可以由客户端开启定时任务定期对调用服务进行ping来更新,allList表示当前所有服务实例的集合。
也就是说当存在。通过com.netflix.loadbalancer.BaseLoadBalancer中可见:
public boolean[] pingServers(IPing ping, Server[] servers) { int numCandidates = servers.length; boolean[] results = new boolean[numCandidates]; BaseLoadBalancer.logger.debug("LoadBalancer: PingTask executing [{}] servers configured", numCandidates); for(int i = 0; i < numCandidates; ++i) { results[i] = false; try { if (ping != null) { results[i] = ping.isAlive(servers[i]); } } catch (Exception var7) { BaseLoadBalancer.logger.error("Exception while pinging Server: '{}'", servers[i], var7); } } return results; } public void runPinger() throws Exception { if (BaseLoadBalancer.this.pingInProgress.compareAndSet(false, true)) { Server[] allServers = null; boolean[] results = null; Lock allLock = null; Lock upLock = null; try { allLock = BaseLoadBalancer.this.allServerLock.readLock(); allLock.lock(); allServers = (Server[])BaseLoadBalancer.this.allServerList.toArray(new Server[BaseLoadBalancer.this.allServerList.size()]); allLock.unlock(); int numCandidates = allServers.length; boolean[] resultsx = this.pingerStrategy.pingServers(BaseLoadBalancer.this.ping, allServers); List<Server> newUpList = new ArrayList(); List<Server> changedServers = new ArrayList(); for(int i = 0; i < numCandidates; ++i) { boolean isAlive = resultsx[i]; Server svr = allServers[i]; boolean oldIsAlive = svr.isAlive(); svr.setAlive(isAlive); if (oldIsAlive != isAlive) { changedServers.add(svr); BaseLoadBalancer.logger.debug("LoadBalancer [{}]: Server [{}] status changed to {}", new Object[]{BaseLoadBalancer.this.name, svr.getId(), isAlive ? "ALIVE" : "DEAD"}); } if (isAlive) { newUpList.add(svr); } } upLock = BaseLoadBalancer.this.upServerLock.writeLock(); upLock.lock(); BaseLoadBalancer.this.upServerList = newUpList; upLock.unlock(); BaseLoadBalancer.this.notifyServerStatusChangeListener(changedServers); } finally { BaseLoadBalancer.this.pingInProgress.set(false); } } }
如此看来,当upList数量不等于allList数量时,这个server = (Server)upList.get(index);就出问题了!?当然,默认情况下ping的方法是不进行真实健康监测的,即所有服务都是健康的,保证allList.size()=upList.size();不过感觉很怪异。
2.RoundRobinRule 轮询策略,但是有个查找次数的限制,也就是说查了10次都是不可用的服务的话就会警告没有可用服务并返回null了,选择的方式是很简单,取余运算。
public Server choose(ILoadBalancer lb, Object key) { if (lb == null) { log.warn("no load balancer"); return null; } else { Server server = null; int count = 0; while(true) { if (server == null && count++ < 10) { List<Server> reachableServers = lb.getReachableServers(); List<Server> allServers = lb.getAllServers(); int upCount = reachableServers.size(); int serverCount = allServers.size(); if (upCount != 0 && serverCount != 0) { int nextServerIndex = this.incrementAndGetModulo(serverCount); server = (Server)allServers.get(nextServerIndex); if (server == null) { Thread.yield(); } else { if (server.isAlive() && server.isReadyToServe()) { return server; } server = null; } continue; } log.warn("No up servers available from load balancer: " + lb); return null; } if (count >= 10) { log.warn("No available alive servers after 10 tries from load balancer: " + lb); } return server; } } } private int incrementAndGetModulo(int modulo) { int current; int next; do { current = this.nextServerCyclicCounter.get(); next = (current + 1) % modulo; } while(!this.nextServerCyclicCounter.compareAndSet(current, next)); return next; }
此处的upCount依然是个摆设。。。
3.ClientConfigEnabledRoundRobinRule 默认使用RoundRobinRule 策略 不过字面意思,客户端可配置的,所以可以作为父类扩展
public void initWithNiwsConfig(IClientConfig clientConfig) { this.roundRobinRule = new RoundRobinRule(); } public Server choose(Object key) { if (this.roundRobinRule != null) { return this.roundRobinRule.choose(key); } else { throw new IllegalArgumentException("This class has not been initialized with the RoundRobinRule class"); } }
4.WeightedResponseTimeRule 实例初始化的时候会开启一个定时任务,通过定时任务来获取服务响应时间定期维护每个服务的权重
public Server choose(ILoadBalancer lb, Object key) { if (lb == null) { return null; } else { Server server = null; while(server == null) { List<Double> currentWeights = this.accumulatedWeights; if (Thread.interrupted()) { return null; } List<Server> allList = lb.getAllServers(); int serverCount = allList.size(); if (serverCount == 0) { return null; } int serverIndex = 0; double maxTotalWeight = currentWeights.size() == 0 ? 0.0D : ((Double)currentWeights.get(currentWeights.size() - 1)).doubleValue(); if (maxTotalWeight < 0.001D) { server = super.choose(this.getLoadBalancer(), key); if (server == null) { return server; } } else { double randomWeight = this.random.nextDouble() * maxTotalWeight; int n = 0; for(Iterator var13 = currentWeights.iterator(); var13.hasNext(); ++n) { Double d = (Double)var13.next(); if (d.doubleValue() >= randomWeight) { serverIndex = n; break; } } server = (Server)allList.get(serverIndex); } if (server == null) { Thread.yield(); } else { if (server.isAlive()) { return server; } server = null; } } return server; } }
public void maintainWeights() { ILoadBalancer lb = WeightedResponseTimeRule.this.getLoadBalancer(); if (lb != null) { if (WeightedResponseTimeRule.this.serverWeightAssignmentInProgress.compareAndSet(false, true)) { try { WeightedResponseTimeRule.logger.info("Weight adjusting job started"); AbstractLoadBalancer nlb = (AbstractLoadBalancer)lb; LoadBalancerStats stats = nlb.getLoadBalancerStats(); if (stats != null) { double totalResponseTime = 0.0D;//所有实例的响应时间总和 ServerStats ss; for(Iterator var6 = nlb.getAllServers().iterator(); var6.hasNext(); totalResponseTime += ss.getResponseTimeAvg()) { //通过ss.getResponseTimeAvg()获取每个服务的平均响应时间 然后累加到totalResponseTime中 Server server = (Server)var6.next(); ss = stats.getSingleServerStat(server); } Double weightSoFar = 0.0D; List<Double> finalWeights = new ArrayList(); Iterator var20 = nlb.getAllServers().iterator(); while(var20.hasNext()) { Server serverx = (Server)var20.next(); ServerStats ssx = stats.getSingleServerStat(serverx); double weight = totalResponseTime - ssx.getResponseTimeAvg();//所有服务的平均响应时间的和-该服务的平均响应时间=该服务的权重 weightSoFar = weightSoFar.doubleValue() + weight; //由于通过集合存储 所以此处采取区间的模式 也就是从0到n,n到...的模式 //比如 三个服务 响应时间分别为10,20,30 则权重分别为(0-50)(50-90)(90-120) finalWeights.add(weightSoFar); } WeightedResponseTimeRule.this.setWeights(finalWeights); return; } } catch (Exception var16) { WeightedResponseTimeRule.logger.error("Error calculating server weights", var16); return; } finally { WeightedResponseTimeRule.this.serverWeightAssignmentInProgress.set(false); } } } }
不过当权重的集合中没有数据的时候,这个类继承了RoundRobinRule 类,就使用轮询的方式选择了。如果存在权重信息则使用this.random.nextDouble() * maxTotalWeight的方式也就是1以内小数*最大权重值区间内的随机数来选取服务索引的方式。跟RandomRule 的模式一样,当选取的服务状态异常的时候会While循环走下去。直到。。。死循环。
5.BestAvailableRule 对所有实例进行迭代,首先过滤掉不可用的服务,然后选出连接数最少的服务返回,继承了ClientConfigEnabledRoundRobinRule类也就是使用了RoundRobinRule策略,也就是loadBalancerStats进行统计服务连接信息为空的时候先采用轮询策略过渡。
public Server choose(Object key) { if (this.loadBalancerStats == null) { return super.choose(key); } else { List<Server> serverList = this.getLoadBalancer().getAllServers(); int minimalConcurrentConnections = 2147483647; long currentTime = System.currentTimeMillis(); Server chosen = null; Iterator var7 = serverList.iterator(); while(var7.hasNext()) { Server server = (Server)var7.next(); ServerStats serverStats = this.loadBalancerStats.getSingleServerStat(server); if (!serverStats.isCircuitBreakerTripped(currentTime)) { int concurrentConnections = serverStats.getActiveRequestsCount(currentTime); if (concurrentConnections < minimalConcurrentConnections) { minimalConcurrentConnections = concurrentConnections; chosen = server; } } } if (chosen == null) { return super.choose(key); } else { return chosen; } } }
6.RetryRule 采用了轮询策略(内部直接实例化RoundRobinRule使用)的重试策略来获取可用的服务实例。这里有个maxRetryMillis属性用来限定重试的时间,如果首次获取服务实例为空,则开启一个定指定关闭时间的定时线程,在该指定时间内如果没有找到可用的实例就返回null了。默认为500毫秒。(轮询策略内不是10次以内不管找到可用实例与否都返回结果,所以此处可以看成一个次数微微可控的加强版)
public Server choose(ILoadBalancer lb, Object key) { long requestTime = System.currentTimeMillis(); long deadline = requestTime + this.maxRetryMillis; Server answer = null; answer = this.subRule.choose(key); if ((answer == null || !answer.isAlive()) && System.currentTimeMillis() < deadline) { InterruptTask task = new InterruptTask(deadline - System.currentTimeMillis()); while(!Thread.interrupted()) { answer = this.subRule.choose(key); if (answer != null && answer.isAlive() || System.currentTimeMillis() >= deadline) { break; } Thread.yield(); } task.cancel(); } return answer != null && answer.isAlive() ? answer : null; }
7.PredicateBasedRule 继承自ClientConfigEnabledRoundRobinRule的一个抽象类。
public abstract AbstractServerPredicate getPredicate(); public Server choose(Object key) { ILoadBalancer lb = this.getLoadBalancer(); Optional<Server> server = this.getPredicate().chooseRoundRobinAfterFiltering(lb.getAllServers(), key); return server.isPresent() ? (Server)server.get() : null; }
使用的时候需要重写getPredicate方法,目测是先过滤一部分服务然后在选择一个服务。
//上边方法this.getPredicate().chooseRoundRobinAfterFiltering(lb.getAllServers(), key);调用到这 public Optional<Server> chooseRoundRobinAfterFiltering(List<Server> servers, Object loadBalancerKey) { List<Server> eligible = this.getEligibleServers(servers, loadBalancerKey); return eligible.size() == 0 ? Optional.absent() : Optional.of(eligible.get(this.nextIndex.getAndIncrement() % eligible.size())); //此处可见过滤后的集合为空则返回 Optional.absent()表示不存在对象集合(通过isPresent()方法默认就是false),集合不为空则还是如同轮询算法般取余 } public List<Server> getEligibleServers(List<Server> servers, Object loadBalancerKey) { if (loadBalancerKey == null) { return ImmutableList.copyOf(Iterables.filter(servers, this.getServerOnlyPredicate()));//loadBalancerKey 如果为null的话 则返回当前即可(这个filter的过滤条件是不过滤。。。) } else { List<Server> results = Lists.newArrayList(); Iterator var4 = servers.iterator(); while(var4.hasNext()) { Server server = (Server)var4.next(); //此处进行条件判断 将满足条件的集合返回 if (this.apply(new PredicateKey(loadBalancerKey, server))) { results.add(server); } } return results; } }
当然这个抽象类需要我们实现getPredicate()返回AbstractServerPredicate过滤条件(默认全部返回为true,也就是等价于采用轮询的模式了)。
8.AvailabilityFilteringRule实现PredicateBasedRule类,如代码所示,组合条件是一个new AvailabilityPredicate().
private AbstractServerPredicate predicate = CompositePredicate.withPredicate(new AvailabilityPredicate(this, (IClientConfig)null)).addFallbackPredicate(AbstractServerPredicate.alwaysTrue()).build(); public void initWithNiwsConfig(IClientConfig clientConfig) { this.predicate = CompositePredicate.withPredicate(new AvailabilityPredicate(this, clientConfig)).addFallbackPredicate(AbstractServerPredicate.alwaysTrue()).build(); }
查看过滤条件
public boolean apply(@Nullable PredicateKey input) { LoadBalancerStats stats = this.getLBStats(); if (stats == null) { return true; } else { return !this.shouldSkipServer(stats.getSingleServerStat(input.getServer()));//下边返回true则这块会把该服务实例过滤掉 返回为!true } } //也就是这块 可以看出 如果断路器当前是开启状态或者当前服务实例的请求连接数大于配置的连接数阈值则进行过滤(默认是2147483647,可以通过clientConfig进行配置 Spring Cloud中也就是<clientName>.<nameSpace>.ActiveConnectionsLimit进行配置) private boolean shouldSkipServer(ServerStats stats) {//满足其一条件则会返回true return CIRCUIT_BREAKER_FILTERING.get() && stats.isCircuitBreakerTripped() || stats.getActiveRequestsCount() >= ((Integer)this.activeConnectionsLimit.get()).intValue(); }
public Server choose(Object key) { int count = 0; for(Server server = this.roundRobinRule.choose(key); count++ <= 10; server = this.roundRobinRule.choose(key)) { if (this.predicate.apply(new PredicateKey(server))) { return server; } } return super.choose(key); }
筛选的条件可以发现是先使用轮询的方式挑选出一个服务实例,然后再进行过滤查看是否满足可以的条件,不满足再轮询下一条。
8.ZoneAvoidanceRule实现PredicateBasedRule类,此处的过滤条件通过构造函数可以看出,字面意思,第一个是根据区域进行筛选,第二个是根据可用性进行筛选
public void initWithNiwsConfig(IClientConfig clientConfig) { ZoneAvoidancePredicate zonePredicate = new ZoneAvoidancePredicate(this, clientConfig); AvailabilityPredicate availabilityPredicate = new AvailabilityPredicate(this, clientConfig); this.compositePredicate = this.createCompositePredicate(zonePredicate, availabilityPredicate); }
ZoneAvoidancePredicate的过滤条件如下:
public boolean apply(@Nullable PredicateKey input) { if (!ENABLED.get()) {//查看niws.loadbalancer.zoneAvoidanceRule.enabled配置的熟悉是否为true(默认为true)如果为false没有开启分片过滤 则不进行过滤 return true; } else { String serverZone = input.getServer().getZone();//获取配置的分片字符串 默认为UNKNOWN if (serverZone == null) { return true; } else { LoadBalancerStats lbStats = this.getLBStats(); if (lbStats == null) {//无负载均衡的要求 return true; } else if (lbStats.getAvailableZones().size() <= 1) { return true;//可用的分片(处于Up状态)<=1 当然就没必要再过滤了 } else { Map<String, ZoneSnapshot> zoneSnapshot = ZoneAvoidanceRule.createSnapshot(lbStats);//key为服务实例配置的Zone if (!zoneSnapshot.keySet().contains(serverZone)) { return true;//如果所有分片的配置都不符合规则 那就没必要继续筛选了 不进行过滤 也就表示当前的分片设置没啥意义了 } else { logger.debug("Zone snapshots: {}", zoneSnapshot); Set<String> availableZones = ZoneAvoidanceRule.getAvailableZones(zoneSnapshot, this.triggeringLoad.get(), this.triggeringBlackoutPercentage.get());//此处开始挑选可用的区域 logger.debug("Available zones: {}", availableZones); return availableZones != null ? availableZones.contains(input.getServer().getZone()) : false; } } } } }
对两个过滤条件进行实例化后会通过this.compositePredicate = this.createCompositePredicate(zonePredicate, availabilityPredicate);将过滤条件合并。
private List<AbstractServerPredicate> fallbacks = Lists.newArrayList();//也就是所有过滤条件都存到这个fallback里了 public static CompositePredicate.Builder withPredicate(AbstractServerPredicate primaryPredicate) { return new CompositePredicate.Builder(primaryPredicate); } public CompositePredicate.Builder addFallbackPredicate(AbstractServerPredicate fallback) { this.toBuild.fallbacks.add(fallback); return this; }
ZoneAvoidanceRule实现PredicateBasedRule类所以还是会通过父类的choose方法进行选择。
public Server choose(Object key) { ILoadBalancer lb = this.getLoadBalancer(); Optional<Server> server = this.getPredicate().chooseRoundRobinAfterFiltering(lb.getAllServers(), key); return server.isPresent() ? (Server)server.get() : null; } public Optional<Server> chooseRoundRobinAfterFiltering(List<Server> servers, Object loadBalancerKey) { List<Server> eligible = this.getEligibleServers(servers, loadBalancerKey); return eligible.size() == 0 ? Optional.absent() : Optional.of(eligible.get(this.nextIndex.getAndIncrement() % eligible.size())); }
getEligibleServers方法在AbstractServerPredicate的子类CompositePredicate中进行了重写。
public class CompositePredicate extends AbstractServerPredicate { private List<AbstractServerPredicate> fallbacks = Lists.newArrayList(); private int minimalFilteredServers = 1; private float minimalFilteredPercentage = 0.0F; public List<Server> getEligibleServers(List<Server> servers, Object loadBalancerKey) { List<Server> result = super.getEligibleServers(servers, loadBalancerKey); AbstractServerPredicate predicate; for(Iterator i = this.fallbacks.iterator(); (result.size() < this.minimalFilteredServers || result.size() <= (int)((float)servers.size() * this.minimalFilteredPercentage)) && i.hasNext(); result = predicate.getEligibleServers(servers, loadBalancerKey)) { predicate = (AbstractServerPredicate)i.next(); } return result; } }
先使用父类的getEligibleServers进行过滤一遍( 默认情况下也就是没过滤)
然后按照fallbacks中存储的过滤器顺序进行过滤(此处就行先ZoneAvoidancePredicate然后AvailabilityPredicate)
当然进行下一条过滤是存在条件的 也就是:
(result.size() < this.minimalFilteredServers || result.size() <= (int)((float)servers.size() * this.minimalFilteredPercentage)) && i.hasNext()
当前过滤后的实例结果集大小小于最小过滤集合总数了(此处小于默认值1也就是0了)或者过滤后的结果集大小小于实例总数的最小过滤集合百分比了(此处比例因子是0所有相当于结果集大小还是0了)也就是当前服务示例的结果集以及不满足继续过滤的需求了 但这时候&& i.hasNext() 也就是过滤条件还没结束。。则继续进行过滤。
反复琢磨了会。。没看懂啊!!什么情况,假如父类过滤后result.size()>0的话,那循环条件中直接就(xx;false&&true;xxxx)了直接就退出了,那过滤条件是摆设么。。。换句话说,result.size()=0了 满足(xx;true&&true;xxxx)然后开始执行xxxx的过滤条件了。。问题上result都为空了。还过滤什么?!
总结:
Spring Cloud使用Feign+Ribbon可以方便的实现客户端负载均衡策略,而且提供多种负载规则,当然也可以通过实现AbstractLoadBalancerRule抽象类或者IRule进行扩展。简单方便。