HBase通过Excel导入数据

HBase数据库操作帮助类

package DBUtility;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Logger;

/**
 * 
 * @author LFF
 * @version 0.2 HBase数据库操作帮助类
 * 
 */
public class DbHelperHBase {

    private static Logger _logger = Logger.getLogger(DbHelperHBase.class);

    // HBase主节点IP(根据实际的修改)
    private final static String MASTER_IP = "192.168.129.130";
    // HBase表的命名空间
    private final static String HBASE_TABLE_NAMESPACE = "lff_tb";

    // 配置
    public static Configuration _configuration = null;
    // HBase管理对象
    private HBaseAdmin _hBaseAdmin = null;
    // HTable管理对象
    private HTable _hTableAdmin = null;
    private ResultScanner _resultScanner = null;
    // 删表锁
    private Boolean _lockDelTable = true;
    // 删行锁
    private Boolean _lockDelRow = true;

    // 静态构造函数
    static {
        _configuration = HBaseConfiguration.create();
        _configuration.set("hbase.zookeeper.quorum", MASTER_IP);
    }

    /**
     * 构造函数
     */
    public DbHelperHBase() {
        this.CreateHBaseNamespace();
    }

    /**
     * HBase的命名空间存在判断
     */
    private Boolean ExistsHBaseNamespace() {
        Boolean result = false;
        try {
            this._hBaseAdmin = new HBaseAdmin(_configuration);

            // 取现有命名空间列表
            NamespaceDescriptor[] nds = this._hBaseAdmin
                    .listNamespaceDescriptors();

            for (NamespaceDescriptor nd : nds) {
                // 判断是否存在此命名空间
                if (nd.getName().equals(HBASE_TABLE_NAMESPACE)) {
                    result = true;
                    break;
                }
            }
            _logger.info(String.format("HBase Namespace Exists[%s]!", result));
        } catch (MasterNotRunningException e) {
            _logger.error(e.getMessage());
        } catch (ZooKeeperConnectionException e) {
            _logger.error(e.getMessage());
        } catch (IOException e) {
            _logger.error(e.getMessage());
        } finally {
            this.CloseHBaseAdmin("ExistsHBaseNamespace");
        }
        return result;
    }

    /**
     * 创建HBase的命名空间
     */
    private void CreateHBaseNamespace() {
        try {
            this._hBaseAdmin = new HBaseAdmin(_configuration);
            if (!this.ExistsHBaseNamespace()) {
                NamespaceDescriptor descriptor = NamespaceDescriptor.create(
                        HBASE_TABLE_NAMESPACE).build();
                this._hBaseAdmin.createNamespace(descriptor);
                _logger.info(String.format(
                        "Create HBase Namespace[%s] success!",
                        HBASE_TABLE_NAMESPACE));
            }
        } catch (MasterNotRunningException e) {
            _logger.error(e.getMessage());
        } catch (ZooKeeperConnectionException e) {
            _logger.error(e.getMessage());
        } catch (IOException e) {
            _logger.error(e.getMessage());
        } finally {
            this.CloseHBaseAdmin("CreateHBaseNamespace");
        }
    }

    /**
     * 删除HBase的命名空间
     */
    private void DeleteHBaseNamespace() {
        try {
            this._hBaseAdmin = new HBaseAdmin(_configuration);
            if (this.ExistsHBaseNamespace()) {
                this._hBaseAdmin.deleteNamespace(HBASE_TABLE_NAMESPACE);
                _logger.info(String.format(
                        "Delete HBase Namespace[%s] success!",
                        HBASE_TABLE_NAMESPACE));
            }
        } catch (MasterNotRunningException e) {
            _logger.error(e.getMessage());
        } catch (ZooKeeperConnectionException e) {
            _logger.error(e.getMessage());
        } catch (IOException e) {
            _logger.error(e.getMessage());
        } finally {
            this.CloseHBaseAdmin("DeleteHBaseNamespace");
        }
    }

    /**
     * 表存在判断
     * 
     * @param tableName
     *            表名
     * @return 存在为true
     */
    public Boolean ExistsTable(String tableName) {
        Boolean result = false;
        try {
            this._hBaseAdmin = new HBaseAdmin(_configuration);
            result = this._hBaseAdmin.tableExists(tableName);
        } catch (IOException e) {
            _logger.error(e.getMessage());
        } finally {
            this.CloseHBaseAdmin("ExistsTable");
        }
        return result;
    }

    /**
     * 创建表
     * 
     * @param tableName
     *            表名
     * @param cFamilyName
     *            列族名
     */
    public void CreateTable(String tableName, String... cFamilyName) {
        this.CreateTable(tableName, false, cFamilyName);
    }

    /**
     * 创建表
     * 
     * @param tableName
     *            表名
     * @param isCoverTable
     *            是否覆盖已存在的表
     * @param cFamilyName
     *            列族名
     */
    public void CreateTable(String tableName, Boolean isCoverTable,
            String... cFamilyName) {
        _logger.info("start create table ......");
        try {
            this._hBaseAdmin = new HBaseAdmin(_configuration);
            tableName = HBASE_TABLE_NAMESPACE + ":" + tableName;
            // 表存在判断
            if (this.ExistsTable(tableName)) {
                // 是否强制新建表
                if (isCoverTable) {
                    this._hBaseAdmin.disableTable(tableName); // 禁用表
                    this._hBaseAdmin.deleteTable(tableName); // 删除表
                    _logger.info(tableName + " is exist,detele ......");
                    // 建表准备
                    TableName tn = TableName.valueOf(tableName);
                    HTableDescriptor htd = new HTableDescriptor(tn);
                    // 加列族
                    for (int i = 0; i < cFamilyName.length; i++) {
                        HColumnDescriptor hcd = new HColumnDescriptor(
                                cFamilyName[i]);
                        htd.addFamily(hcd);
                    }

                    // 建表
                    this._hBaseAdmin.createTable(htd);

                    _logger.info("Cover HBase Table[" + tableName
                            + "] success !");

                } else {
                    _logger.info(tableName + " is exist......no create table !");
                }
            } else {
                // 建表准备
                TableName tn = TableName.valueOf(tableName);
                HTableDescriptor htd = new HTableDescriptor(tn);
                // 加列族
                for (int i = 0; i < cFamilyName.length; i++) {
                    HColumnDescriptor hcd = new HColumnDescriptor(
                            cFamilyName[i]);
                    htd.addFamily(hcd);
                }

                // 建表
                this._hBaseAdmin.createTable(htd);

                _logger.info("Create New HBase Table[" + tableName
                        + "] success !");
            }
        } catch (MasterNotRunningException e) {
            _logger.error(e.getMessage());
        } catch (ZooKeeperConnectionException e) {
            _logger.error(e.getMessage());
        } catch (IOException e) {
            _logger.error(e.getMessage());
        } finally {
            this.CloseHBaseAdmin("CreateTable");
        }
        _logger.info("end create table ......");
    }

    /**
     * 添加一行多限定符的数据
     * 
     * @param tableName
     *            表名
     * @param columnFamily
     *            列族
     * @param rowKey
     *            行键
     * @param cqAndvalue
     *            列名(列限定符)和值的键值对
     */
    public void AddRowData(String tableName, String columnFamily,
            String rowKey, Map<String, Object> cqAndvalue) {
        if (cqAndvalue.isEmpty())
            return;
        tableName = HBASE_TABLE_NAMESPACE + ":" + tableName;
        try {
            List<Put> puts = new ArrayList<Put>();
            for (String cq : cqAndvalue.keySet()) {
                _hTableAdmin = new HTable(_configuration, tableName);
                Put put = new Put(Bytes.toBytes(rowKey));
                put.add(Bytes.toBytes(columnFamily), Bytes.toBytes(cq),
                        Bytes.toBytes(cqAndvalue.get(cq).toString()));
                puts.add(put);
            }
            _hTableAdmin.put(puts);
        } catch (IOException e) {
            _logger.error(e.getMessage());
        } finally {
            this.CloseHTableAdmin("AddRowData");
        }
    }

    /**
     * 关闭HBase连接
     */
    private void CloseHBaseAdmin(String methodName) {
        try {
            this._hBaseAdmin.close();
            _logger.info(methodName + "(...):关闭与HBase的连接!");
        } catch (IOException e) {
            _logger.error(e.getMessage());
        }
    }

    /**
     * 关闭HTable连接
     */
    private void CloseHTableAdmin(String methodName) {
        try {
            this._hTableAdmin.close();
            _logger.info(methodName + "(...):关闭与HTable的连接!");
        } catch (IOException e) {
            _logger.error(e.getMessage());
        }
    }

    /**
     * 关闭扫描器
     * 
     * @param methodName
     *            所在方法名
     */
    private void CloseResultScanner(String methodName) {
        this._resultScanner.close();
        _logger.info(methodName + "(...):关闭与ResultScanner的连接!");
    }
}
View Code

Excel文件操作帮助类

package Common;

import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.ss.usermodel.*;

/**
 * 
 * @author LFF
 * @version 0.5 Excel文件操作帮助类
 * 
 */
public class ExcelPOIHelper {

    // D盘建一个空的workbook.xls文件
    public static void Create(String path, String name) {
        Workbook wb = new HSSFWorkbook();
        FileOutputStream fileOut;
        try {
            fileOut = new FileOutputStream("D:/workbook.xls");
            wb.write(fileOut);
            fileOut.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
    }

    /**
     * 取出Excel所有工作簿名
     * 
     * @param fullPath
     *            Excel文件完整地址("D:/workbook.xls")
     * @return 工作簿名列表
     */
    public static List<String> GetSheets(String fullPath) {
        List<String> result = new ArrayList<String>();
        try {
            FileInputStream file = new FileInputStream(fullPath);
            POIFSFileSystem ts = new POIFSFileSystem(file);
            Workbook workbook = new HSSFWorkbook(ts);
            for (int i = 0; i < workbook.getNumberOfSheets(); i++) {
                String sheetName = workbook.getSheetName(i);
                result.add(i, sheetName);
            }
            file.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }
        return result;
    }

    /**
     * 取工作簿中所有的行
     * 
     * @param fullPath
     *            Excel文件完整地址("D:/workbook.xls")
     * @param sheetName
     *            工作簿名
     * @return 键值对:<RowKey,<ColumnName, Value>>
     */
    public static Map<String, List<Map<String, String>>> GetRows(
            String fullPath, String sheetName) {
        Map<String, List<Map<String, String>>> resultRow = new HashMap<String, List<Map<String, String>>>();
        List<Map<String, String>> resultCells;
        Map<String, String> resultCell;
        try {
            FileInputStream file = new FileInputStream(fullPath);
            POIFSFileSystem ts = new POIFSFileSystem(file);
            Workbook workbook = new HSSFWorkbook(ts);
            Sheet sheet = workbook.getSheet(sheetName);

            int rowCounts = sheet.getPhysicalNumberOfRows();// 行数
            int columnCounts = sheet.getRow(0).getPhysicalNumberOfCells(); // 列数

            for (int i = 1; i < rowCounts; i++) {
                Row row = sheet.getRow(i);// 循环取第一行之后的每一行
                row.getCell(0).setCellType(Cell.CELL_TYPE_STRING);

                resultCells = new ArrayList<Map<String, String>>();
                resultCell = new HashMap<String, String>();

                String rowKey = row.getCell(0).toString();

                for (int j = 1; j < columnCounts; j++) {
                    Cell cell = row.getCell(j);// 循环取第一列之后的每一列
                    if (null != cell) {
                        cell.setCellType(Cell.CELL_TYPE_STRING);

                        String columnName = sheet.getRow(0).getCell(j)
                                .toString();
                        String cellValue = cell.toString();

                        resultCell.put(columnName, cellValue);
                    }
                }
                resultCells.add(resultCell);
                resultRow.put(rowKey, resultCells);
            }
            file.close();
        } catch (FileNotFoundException e) {
            e.printStackTrace();
        } catch (IOException e) {
            e.printStackTrace();
        }

        return resultRow;
    }
}

取出方式

public static void main(String[] agrs) {        
        String fullPath = "C:/Users/LFF/Desktop/HBase导入模版.xls";
        // temp = ExcelPOIHelper.GetSheets(fullPath); // 取所有工作簿表名
        String sheetName = "User-Info";

        String tbName = sheetName.split("-")[0];
        String tbCF = sheetName.split("-")[1];

        Map<String, List<Map<String, String>>> rows = new HashMap<String, List<Map<String, String>>>();

        rows = ExcelPOIHelper.GetRows(fullPath, sheetName);

        Iterator rowIterator = rows.entrySet().iterator();
        while (rowIterator.hasNext()) {
            Entry rowEntry = (Entry) rowIterator.next();
            Object rowKey = rowEntry.getKey();
            Object cellsValue = rowEntry.getValue();
            // System.out.println(rowKey);
            // System.out.println(cellsValue);

            List<Map<String, String>> cells = new ArrayList<Map<String, String>>();
            cells = (List<Map<String, String>>) cellsValue;
            Iterator<Map<String, String>> it = cells.iterator();
            while (it.hasNext()) {
                Object cellObject = it.next();

                Map<String, String> cell = new HashMap<String, String>();
                cell = (HashMap<String, String>) cellObject;

                Iterator cellIterator = cell.entrySet().iterator();
                while (cellIterator.hasNext()) {
                    Entry cellEntry = (Entry) cellIterator.next();
                    Object cellColumn = cellEntry.getKey();
                    Object value = cellEntry.getValue();

                    String strCellColumn = cellColumn.toString();
                    String columnName = strCellColumn.split("<")[0];
                    String valueType = strCellColumn.substring(
                            strCellColumn.indexOf("<") + 1,
                            strCellColumn.lastIndexOf(">"));

                    System.out
                            .println(String
                                    .format("存储在表[%s]的列族[%s]中行键为[%s]的[%s]列中的值为[%s](类型为[%s])",
                                            tbName, tbCF, rowKey, columnName,
                                            value, valueType));

                    Add(tbName, tbCF, rowKey.toString(), columnName, value.toString());

                }
            }
        }

需要加入poi-3.10.1-20140818.jar包,下载在http://poi.apache.org/download.html。

posted @ 2014-09-19 10:39  悦峯  阅读(3569)  评论(3编辑  收藏  举报