Hbase的写入负载均衡

在写Hbase的时候,会担心数据分布在各个region上不均匀(与预分区无关);

这个时候可能想到的方式:hash、加盐等(当然是可以的,但是读取的时候咋办呢? 哪天写一个这样的文章)

本文采用新方式:

        <repository>
            <id>cloudera</id>
            <url>https://repository.cloudera.com/artifactory/cloudera-repos</url>
        </repository>
    </repositories>

<dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-client</artifactId>
            <version>1.2.1</version>
            <exclusions>
                <exclusion>
                    <groupId>javax.servlet</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
            </exclusions>
        </dependency>
        <dependency>
            <groupId>org.apache.hbase</groupId>
            <artifactId>hbase-server</artifactId>
            <version>1.2.1</version>
            <exclusions>
                <exclusion>
                    <groupId>javax.servlet</groupId>
                    <artifactId>*</artifactId>
                </exclusion>
            </exclusions>
        </dependency>

代码:

HbaseBalancer:
package hbase_balance;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.*;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;

import java.io.IOException;
import java.util.*;

/**
 *
 *  优化策略 : 在table级别,对多于平局值部分的region,采用最少region节点分配策略,替代 admin.balancer();
 *  最终效果 : 在table级别,每个regionServer的region个数都在平均值上下
 *  balance region on table level
 */
public class HbaseBalancer {

    public static final String tableStr = "data1";
    public static final String ZK_QUORUM = "hadoop01:2181,hadoop02:2181,hadoop03:2181";

    public static final Integer BUCKETS_PER_NODE = 50;
    public static void main(String[] args) throws IOException {

        Configuration config = HBaseConfiguration.create();
        config.set(HConstants.ZOOKEEPER_QUORUM, ZK_QUORUM);
        Connection conn = ConnectionFactory.createConnection(config);

        Admin admin = conn.getAdmin();
        ClusterStatus clusterStatus = admin.getClusterStatus();
        Collection<ServerName> hServers = clusterStatus.getServers();

        System.out.println("region servers :");
        Map<String,RegionServer> allRegionServers = new HashMap<String,RegionServer>(15);
        // 根据region server创建 hostname 和regionServer的映射,对allRegionServers进行初始化
        for(ServerName server : hServers){

            RegionServer rs = new RegionServer();
            rs.setServerName(server);
            allRegionServers.put( server.getHostname(), rs ) ;

            String getHostAndPort = server.getHostAndPort();
            String getHostname = server.getHostname();

            Long startCode = server.getStartcode();
            System.out.println(startCode +" "+getHostname +" " +getHostAndPort);
            //List<HRegionInfo> regionInfos = admin.getOnlineRegions(server);
            allRegionServers .put(server.getHostname(), rs);
        }

        Table table = conn.getTable(TableName.valueOf(tableStr ));
        // 获取region的位置信息
        RegionLocator locator = conn.getRegionLocator(table.getName());
        List<HRegionLocation> hRegionLocations= locator.getAllRegionLocations();

        int avgCnt = (( int)hRegionLocations.size())/ hServers.size();
        System.out.println("avgCnt :" + avgCnt);
        System.out.println("hRegionLocations.size() :"+hRegionLocations.size());
        System.out.println("hServers.size() :" + hServers.size());
        List<HRegionLocation> toAssign = new ArrayList<HRegionLocation>(); // 当一个region server 的region的数量大于平均值的时候,保存需要进行重新分配的region

        System.out.println("=============== get Region Location end =============== ");
        // 根据已有的regionLocation信息进行最大程度的分配到各自节点上
        for (HRegionLocation hRegionLocation: hRegionLocations) {
            String hostname =hRegionLocation.getHostname();
            System.out.println("hostname :" + hostname);
//            RegionServer rs = allRegionServers.getOrDefault(hostname , new RegionServer() );
            // 上面预先创建的allRegionServers,已经进行初始化,保证这里不会取空值
            RegionServer rs = allRegionServers.get(hostname);
            System.out.println("rs.getRegions().size() :"+rs.getRegions().size());
            if (rs.getRegions().size() == 0) {
                rs.setServerName(hRegionLocation.getServerName());
                System.out.println("hRegionLocation.getServerName()  :"+hRegionLocation.getServerName());
            }
            if (rs.getRegions().size() < avgCnt) {
                rs.addRegion(hRegionLocation.getRegionInfo().getRegionNameAsString());
            } else {
                toAssign.add(hRegionLocation);
            }
            //noinspection Since15
            allRegionServers.putIfAbsent(hostname,rs); // move to rs.add
            System.out.println(" one of the" + hRegionLocation.toString());
        }
        System.out.println("=============== get Region Location end =============== ");

        // get all table regions which need to move
        // move to erery serve
        System.out.println(" region reassign");
        Iterator<HRegionLocation> assign = toAssign.iterator();
        for (HRegionLocation assignRegion: toAssign) {
            System.out.println("all need to reassign region " + assignRegion.toString());
        }
        System.out.println("=============== region reassign began ===============");

        while (assign.hasNext()){
            HRegionLocation region = assign.next();
            ServerName sn = region.getServerName();

            HRegionInfo regionInfo = region.getRegionInfo();
            String getEncodedName = regionInfo.getEncodedName();
            String sourceHostname = region.getHostname();
            String sourceName = sn.getServerName();

            Random rand = new Random();
            //String destServerKey = allRegionServers.keySet().toArray()[rand .nextInt(toAssign.size())].toString();
            String destServerKey = getMinRegionServer(allRegionServers);
            RegionServer rs = allRegionServers.get(destServerKey);
            if (rs.getRegions().size() > avgCnt ){
                // 当所有的regionServer中的region个数大于 平均个数的是停止分配,保证每个节点的region的个数尽可能的平均分配到各个节点上,
                // 不会导致最后每个regionServer的region 个数已经达到平均值,但是某些regionServer的region个数仍然> (avgCnt+ 1)
                break;
            }
            System.out.println(" get region toAssign" + region);
            String destServerName = rs.getServerName().getServerName();
            admin.move(regionInfo.getEncodedNameAsBytes(),Bytes.toBytes(destServerName));
            System.out.println(" reassign to " + destServerName);
            rs.addRegion(regionInfo.getRegionNameAsString());
        }

        System.out.println("=============== region reassign end ===============");
    }

    /**
     * 从regionserver中遍历得到最小的 region server 的hostname
     * @param allRegionServers
     * @return region server host name
     */
    public static String getMinRegionServer(Map<String,RegionServer> allRegionServers ){
        String key = "";
        Integer cnt = Integer.MAX_VALUE ;
        for (String hostname : allRegionServers.keySet() ) {
            if ( allRegionServers.get(hostname).getRegions().size() < cnt ){
                cnt = allRegionServers.get(hostname).getRegions().size();
                key = hostname;
            }
        }
        return  key;
    }
}
RegionServer:
package hbase_balance;

import org.apache.hadoop.hbase.ServerName;

import java.util.ArrayList;
import java.util.List;

/**
 * Created by angel;
 */
public class RegionServer {

    private ServerName serverName;

    private List<String> regions ;

    public RegionServer(){
        super();
        this.regions = new ArrayList<String>();
    }

    public ServerName getServerName() {
        return serverName;
    }

    public void setServerName(ServerName serverName) {
        this.serverName = serverName;
    }


    public List<String> getRegions() {
        return regions;
    }

    public void setRegions(List<String> regions) {
        this.regions = regions;
    }
    public void addRegion ( String region ) {
        this.regions.add(region);
    }

}

 

posted @ 2016-03-17 17:48  niutao  阅读(719)  评论(0编辑  收藏  举报