Hadoop:java使用HDFS API实现基本操作工具类

1、引入库

<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-common</artifactId>
    <version>3.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-client</artifactId>
    <version>3.1.0</version>
</dependency>
<dependency>
    <groupId>org.apache.hadoop</groupId>
    <artifactId>hadoop-hdfs</artifactId>
    <version>3.1.0</version>
</dependency>

 

2、java实现工具类

package com.example.demo;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
import java.util.LinkedList;
import java.util.List;

public class HDFSUtils {
    public static FileSystem fileSystem;
    static {
        Configuration conf = new Configuration();
        //设置文件系统类型
        conf.set("fs.defaultFS","hdfs://127.0.0.1:9000");
        conf.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER");
        conf.setBoolean("dfs.client.block.write.replace-datanode-on-failure.enable", true);

        if(fileSystem == null){
            try {
                fileSystem = FileSystem.get(conf);
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }

    public static boolean createDir(String path) throws IOException {
        boolean flag = false;
        if (!fileSystem.exists(new Path(path))){//如果文件夹不存在
            flag = fileSystem.mkdirs(new Path(path));
        }

        return flag;
    }

    public static boolean delete(String path) throws IOException {
        boolean flag = false;
        if(fileSystem.exists(new Path(path))){
            flag = fileSystem.delete(new Path(path),true);
        }

        return flag;
    }

    public static void uploadToHdfs(String localPath,String remotePath,boolean override,boolean delSrc) throws IOException{
        if(fileSystem.exists(new Path(remotePath))){
            if(!override){
                throw new IOException(remotePath+" already exist");
            }
        }

        fileSystem.copyFromLocalFile(delSrc,new Path(localPath),new Path(remotePath));

    }

    public static void downloadFromHdfs(String localPath,String remotePath,boolean override,boolean delSrc) throws IOException{
        File localFile=new File(localPath);
        if(localFile.exists()){
            if(!override){
                throw new IOException(localPath+" already exist");
            }
            localFile.delete();
        }

        //最后一个参数指定true,就不会产生crc文件
        fileSystem.copyToLocalFile(delSrc,new Path(remotePath),new Path(localPath),true);
    }

    public static String readFile(String remotePath) throws IOException{
        if(!fileSystem.exists(new Path(remotePath))){
            throw new IOException(remotePath+" not exist");
        }
        StringBuffer sb=new StringBuffer();
        try(FSDataInputStream in=fileSystem.open(new Path(remotePath))){
            BufferedReader br = new BufferedReader(new InputStreamReader(in));
            // 定义行字符串
            String nextLine = "";
            // 通过循环读取缓冲字符输入流
            while ((nextLine = br.readLine()) != null) {
                sb.append(nextLine);
            }
            // 关闭缓冲字符输入流
            br.close();
        }//出了try块in会自动close

        return sb.toString();
    }

    public static void appendFile(String remotePath,String content) throws IOException{
        if(!fileSystem.exists(new Path(remotePath))){
            throw new IOException(remotePath+" not exist");
        }
        try(FSDataOutputStream out=fileSystem.append(new Path(remotePath))){
            out.write(content.getBytes(StandardCharsets.UTF_8));
        }
    }

    public static List<LocatedFileStatus> listFiles(String remotePath,boolean recursive) throws IOException{
        if(!fileSystem.exists(new Path(remotePath))){
            throw new IOException(remotePath+" not exist");
        }

        List<LocatedFileStatus> list=new LinkedList<>();
        RemoteIterator<LocatedFileStatus> listFiles = fileSystem.listFiles(new Path(remotePath), recursive);
        while(listFiles.hasNext()){
            list.add(listFiles.next());
        }

        return list;
    }

    public static List<FileStatus> listStatus(String remotePath) throws IOException{
        if(!fileSystem.exists(new Path(remotePath))){
            throw new IOException(remotePath+" not exist");
        }

        FileStatus[] listStatus = fileSystem.listStatus(new Path(remotePath));
        return Arrays.asList(listStatus);
    }


}

 

3、测试

try {
            //HDFSUtils.createDir("/t1/t1_1");
            //HDFSUtils.uploadToHdfs("D:\\tmp\\hello.txt","/t1/t1_1/hello.txt",false,false);
            //HDFSUtils.downloadFromHdfs("D:\\tmp\\hello.txt","/t1/t1_1/hello.txt",true,false);
            //System.out.println(HDFSUtils.readFile("/t1/t1_1/hello.txt"));
            //HDFSUtils.appendFile("/t1/t1_1/hello.txt","追加中文看看");

//            List<LocatedFileStatus> list=HDFSUtils.listFiles("/",false);
//            for(LocatedFileStatus lfs:list){
//                System.out.println(lfs.getPath().toString());
//            }
            List<FileStatus> list=HDFSUtils.listStatus("/");
            for(FileStatus fs:list){
                System.out.println(fs.getPath().toString());
            }
        } catch (IOException e) {
            e.printStackTrace();
        }

 

以上代码供各位及我本人进行参考。最后附上HDFS API地址,更多的高级功能,大家可以多多试验:

https://hadoop.apache.org/docs/current/api/org/apache/hadoop/fs/FileSystem.html

 

 

附:以下有一段代码,用于连接hadoop集群。当前没有集群环境,没有测试过,请大家以审慎的眼光进行了解学习

package com.imooc.bigdata;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hdfs.DistributedFileSystem;

import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;

/**
 * Created by Administrator on 2020/4/15.
 */
public class HDFSTest {

    public static void main(String[] args) throws URISyntaxException, IOException, InterruptedException {
        Configuration conf=new Configuration(false);
        String nameservices = "mycluster";
        String[] namenodesAddr = {"rocketmq-nameserver1:8020","rocketmq-nameserver2:8020"};
        String[] namenodes = {"nn1","nn2"};
        conf.set("fs.defaultFS", "hdfs://" + nameservices);
        conf.set("dfs.nameservices",nameservices);
        conf.set("dfs.ha.namenodes." + nameservices, namenodes[0]+","+namenodes[1]);
        conf.set("dfs.namenode.rpc-address." + nameservices + "." + namenodes[0], namenodesAddr[0]);
        conf.set("dfs.namenode.rpc-address." + nameservices + "." + namenodes[1], namenodesAddr[1]);
        conf.set("dfs.client.failover.proxy.provider." + nameservices,"org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");
        String hdfsRPCUrl = "hdfs://" + nameservices + ":" + 8020;
//        DistributedFileSystem dfs = new DistributedFileSystem();
//        dfs.initialize(URI.create(hdfsRPCUrl),conf);
        FileSystem dfs = FileSystem.get(new URI(hdfsRPCUrl), conf, "root");
        try {
            Path path = new Path("/dfs");
            boolean result = dfs.mkdirs(path);
            System.out.println(result);
        } catch (IOException e) {
            e.printStackTrace();
        } finally{
            try {
                dfs.close();
            } catch (IOException e) {
                e.printStackTrace();
            }
        }
    }
}

 

posted on 2024-08-03 10:48  咚..咚  阅读(90)  评论(0编辑  收藏  举报

导航