hadoop学习笔记(七):Java HDFS API
一、使用HDFS FileSystem详解
HDFS依赖的第三方包:
hadoop 1.x版本:
commons-configuration-1.6.jar
commons-lang-2.4.jar
commons-loggin-1.1.1.jar
hadoop-core-1.2.1.jar
log4j-1.2.15.jar
hadoop2.x版本:
hadoop-mapreduce-client-core-2.2.0.jar
hadoop-common-2.2.0.jar
hadoop-mapreduce-client-common-2.2.0.jar
hadoop-mapreduce-client-jobclient-2.2.0.jar
注意:可根据自身的情况和版本信息手动添加或maven仓库依赖。
文件操作
1 工具类的创建:
创建一个工具类HDFSUtils,主要用来加载公共信息:
1 package com.laowang.utils; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.FileSystem; 5 import org.apache.hadoop.hdfs.DistributedFileSystem; 6 import java.io.IOException; 7 import java.net.URI; 8 9 /** 10 * @author laowang 11 * @version v1.0.0 12 * @apiNote HDFS 工具类 13 * @since 2018/4/26 10:36 14 */ 15 public class HDFSUtil { 16 /** 17 * @author laowang 18 * @version v1.0.0 19 * @apiNote 获取FileSystem 20 * @since 2018/4/26 10:39 21 */ 22 public static FileSystem getFileSystem() { 23 FileSystem hdfs = null; 24 //获取配置文件信息 25 Configuration conf = new Configuration(); 26 conf.set("fs.hdfs.impl",DistributedFileSystem.class.getName()); 27 try { 28 //获取文件系统 29 hdfs = FileSystem.get(URI.create("hdfs://master01:9000"), conf); 30 } catch (IOException e) { 31 e.printStackTrace(); 32 } 33 return hdfs; 34 } 35 36 /** 37 * 关闭文件流 38 * @param fileSystem 39 */ 40 public static boolean closeFileSystem(FileSystem fileSystem) { 41 boolean flag = false; 42 if (fileSystem != null) { 43 try { 44 fileSystem.close(); 45 flag = true; 46 } catch (Exception e) { 47 e.printStackTrace(); 48 } 49 } 50 return flag; 51 } 52 }
2 上传文件到HDFS
1 /** 2 * @author laowang 3 * @version v1.0.0 4 * @apiNote 上传文件 5 * @since 2018/4/26 14:14 6 */ 7 public static void putFile() throws Exception { 8 Path sourcePath = new Path("E:\\port.txt"); 9 //文件名称 10 Path targetPath = new Path("/test/"); 11 //从本地复制文件到文件系统中去 12 hdfs.copyFromLocalFile(sourcePath,targetPath); 13 }
3 读取文件
1 /** 2 * @author laowang 3 * @version v1.0.0 4 * @apiNote 读取文件 5 * @since 2018/4/26 10:39 6 */ 7 public static void testRead() throws Exception{ 8 FileSystem hdfs = HDFSUtil.getFileSystem(); 9 //文件名称 10 Path path = new Path("/test/initNetwork.sh"); 11 //打开文件输入流 12 FSDataInputStream inputStream = hdfs.open(path); 13 //读取文件到控制台 14 IOUtils.copyBytes(inputStream,System.out,4096,false); 15 //关闭流 16 IOUtils.closeStream(inputStream); 17 }
1 /** 2 * @author laowang 3 * @version v1.0.0 4 * @apiNote 查看目录 5 * @since 2018/4/26 14:29 6 */ 7 public static void testList() throws Exception{ 8 FileSystem fileSystem=HDFSUtil.getFileSystem(); 9 Path path = new Path("/test/"); 10 FileStatus[] fileStatuses = hdfs.listStatus(path); 11 for(FileStatus fileStatus : fileStatuses){ 12 Path p = fileStatus.getPath(); 13 String info = fileStatus.isDirectory() ? "目录" : "文件"; 14 System.out.println(info + ": " + p); 15 } 16 }
4 在hadoop fs中新建文件,并写入
1 /** 2 * 创建文件 3 * @throws Exception 4 */ 5 public static void testDirectory() throws Exception { 6 FileSystem fileSystem = HDFSUtil.getFileSystem(); 7 Path path = new Path("/test/laowang"); 8 boolean mkdirs = hdfs.mkdirs(path); 9 System.out.println("创建:" + (mkdirs ? "成功" : "失败")); 10 }
1 /** 2 * 创建文件并写入内容 3 * @throws Exception 4 */ 5 public static void testCreate() throws Exception{ 6 Path path = new Path("/test/test.txt"); 7 //创建文件 8 FSDataOutputStream outputStream = hdfs.create(path); 9 //写入内容 10 outputStream.writeUTF("hello hadoop!"); 11 IOUtils.closeStream(outputStream); 12 }
5 重命名文件
1 /** 2 * 重命名 3 */ 4 public static void testRename() throws Exception{ 5 //原路径 6 Path srcPath = new Path("/test/test.txt"); 7 //目标路径 8 Path targetPath = new Path("/test/test2.txt"); 9 boolean rename = hdfs.rename(srcPath, targetPath); 10 System.out.println("重命名:" + (rename ? "成功" : "失败")); 11 }
6 删除hadoop fs上的文件
1 /** 2 * 删除 3 */ 4 public static void testDelete() throws Exception { 5 Path path = new Path("/test/test2.txt"); 6 boolean b = hdfs.deleteOnExit(path); 7 System.out.println("删除:" + (b ? "成功" : "失败")); 8 }
HDFS信息
1 查找某个文件在HDFS集群中的位置
1 /** 2 * @author laowang 3 * @version v1.0.0 4 * @apiNote 查找某个文件在HDFS中的位置 5 * @since 2018/4/26 15:31 6 */ 7 public static void testLocation() throws Exception{ 8 Path path = new Path("/test/port.txt"); 9 FileStatus fileStatus = hdfs.getFileStatus(path); 10 BlockLocation[] fileBlockLocations = hdfs.getFileBlockLocations(fileStatus, 0, fileStatus.getLen()); 11 for(BlockLocation blockLocation : fileBlockLocations){ 12 String[] hosts=blockLocation.getHosts(); 13 for (String host : hosts) { 14 System.out.println("端口:" + host); 15 } 16 } 17 }
2 获取HDFS集群上所有节点名称信息
1 /** 2 * @author laowang 3 * @version v1.0.0 4 * @apiNote 获取HDFS集群上所有节点名称信息 5 * @since 2018/4/26 15:38 6 */ 7 public static void testCluster() throws Exception { 8 DistributedFileSystem distributedFileSystem = (DistributedFileSystem) hdfs; 9 DatanodeInfo[] dataNodeStats = distributedFileSystem.getDataNodeStats(); 10 for (DatanodeInfo datanodeInfo : dataNodeStats) { 11 System.out.println(datanodeInfo.getHostName()); 12 } 13 }