用java api读取HDFS文件

import java.io.IOException;
import java.io.InputStream;
import java.security.PrivilegedExceptionAction;
import java.text.SimpleDateFormat;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FsStatus;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.security.UserGroupInformation;
import org.springframework.stereotype.Controller;
import org.springframework.web.bind.annotation.RequestMapping;
import org.springframework.web.bind.annotation.RequestMethod;
import org.springframework.web.bind.annotation.ResponseBody;

import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONObject;
import bean.TableStatistic;


@Controller
@RequestMapping("/dfview")
public class DataFrameViewController extends BaseController {

    private ConcurrentMap<String, UserGroupInformation> cache = new ConcurrentHashMap<String, UserGroupInformation>();
    private ConcurrentMap<String, FileSystem> fileSystemCache = new ConcurrentHashMap<String, FileSystem>();
    private Configuration hadoopConf = new Configuration();
    private static final String HDFS_JSON_NAME = "jsonObj";

    @RequestMapping(value = "/getDFviewOfColumn", method = { RequestMethod.GET })
    @ResponseBody
    public TableStatistic getDFviewOfTable(String tableName)
            throws Exception {
        String user = "bi";
        String dirpath = "/user/cbt/datax/temp_transfer/zzzdes";
        Path homePath = new Path(dirpath);
        FileSystem fs = this.createFileSystem(user);
        FileStatus[] stats = fs.listStatus(homePath);
        StringBuffer txtContent = new StringBuffer();
        for (int i = 0; i < stats.length; ++i) {
            if (stats[i].isFile()) {
                FileStatus file = stats[i];
                if( HDFS_JSON_NAME.equalsIgnoreCase(file.getPath().getName())){
                    InputStream in = fs.open(file.getPath());
                    byte[] b = new byte[1];
                    while (in.read(b) != -1)
                    {
                    // 字符串拼接
                    txtContent.append(new String(b));
                    }
                    in.close();
                    break;
                }
            }
        }
        TableStatistic ts = JSON.parseObject(txtContent.toString(), TableStatistic.class);
        return ts;
    }

    public static void main(String[] args) throws Exception {
        DataFrameViewController aaa = new DataFrameViewController();
        FileSystem fs = aaa.createFileSystem("bi");
        Path homePath = new Path("/user/cbt/datax/temp_transfer/zzzdes");
        System.out.println("***********************************");
        FileStatus[] stats = fs.listStatus(homePath);
        for (int i = 0; i < stats.length; ++i) {
            if (stats[i].isFile()) {
                FileStatus file = stats[i];
                StringBuffer txtContent = new StringBuffer();
                if( "jsonObj".equalsIgnoreCase(file.getPath().getName())){
                    InputStream in = fs.open(file.getPath());
                    byte[] b = new byte[1];
                    while (in.read(b) != -1)
                    {
                    // 字符串拼接
                    txtContent.append(new String(b));
                    }
//                    IOUtils.copyBytes(fs.open(file.getPath()), System.out, 4096,false);
                    in.close();
//                    fs.close();
                }
                System.out.print(txtContent.toString());
                System.out
                        .println("************************************************");
                JSONObject jb = JSON.parseObject(txtContent.toString());
                System.out.println("********!!!!! : "  + jb.get("colUnique"));
                TableStatistic ts = JSON.parseObject(txtContent.toString(), TableStatistic.class);
                System.out.println("********!!!!! : "  + ts.getColUnique().toString());
                
            } else if (stats[i].isDirectory()) {
                System.out.println(stats[i].getPath().toString());
            } else if (stats[i].isSymlink()) {
                System.out.println("&&&&&&&&" + stats[i].getPath().toString());
            }

        }
        FsStatus fsStatus = fs.getStatus(homePath);
    }

    public FileSystem createFileSystem(String user) throws Exception {
        final Configuration conf = loadHadoopConf();
        conf.set("hadoop.job.ugi", user);
//        conf.set("HADOOP_USER_NAME", user);
        if (fileSystemCache.get(user) != null) {
            return fileSystemCache.get(user);
        }
        UserGroupInformation ugi = getProxyUser(user);
        FileSystem fs = ugi.doAs(new PrivilegedExceptionAction<FileSystem>() {
            public FileSystem run() throws Exception {
                return FileSystem.get(conf);
            }
        });
        fileSystemCache.put(user, fs);
        return fs;
    }

    public static final ThreadLocal<SimpleDateFormat> appDateFormat = new ThreadLocal<SimpleDateFormat>() {
        @Override
        public SimpleDateFormat initialValue() {
            SimpleDateFormat dateformat = new java.text.SimpleDateFormat(
                    "yyyy-MM-dd HH:mm:ss");
            return dateformat;
        }
    };

    private static final String[] HADOOP_CONF_FILES = { "core-site.xml",
            "hdfs-site.xml" };

    private Configuration loadHadoopConf() {
        if (hadoopConf != null) {
            return hadoopConf;
        }
        Configuration conf = new Configuration();
        for (String fileName : HADOOP_CONF_FILES) {
            try {
                InputStream inputStream = DataFrameViewController.class
                        .getClassLoader().getResourceAsStream(fileName);
                conf.addResource(inputStream);
            } catch (Exception ex) {
            }
        }
        return conf;
    }

    public void destroy() {
        for (UserGroupInformation ugi : cache.values()) {
            try {
                FileSystem.closeAllForUGI(ugi);
            } catch (IOException ioe) {
//                 Logger.error("Exception occurred while closing filesystems for "
//                 + ugi.getUserName(), ioe);
            }
        }
        cache.clear();
    }

    private UserGroupInformation getProxyUser(String user) throws IOException {
        cache.putIfAbsent(user, UserGroupInformation.createRemoteUser(user));
        return cache.get(user);
    }
}

 

posted on 2016-05-02 09:36  在大地画满窗子  阅读(13663)  评论(0编辑  收藏  举报