hadoop中常用的hdfs代码操作
一:向HDFS中上传任意文本文件,如果指定的文件在HDFS中已经存在,由用户指定是追加到原有文件末尾还是覆盖原有的文件:
1 package hadoopTest; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.*; 5 import java.io.*; 6 7 8 public class HDFSApi { 9 /** 10 * 判断路径是否存在 11 */ 12 public static boolean test(Configuration conf, String path) throws IOException { 13 FileSystem fs = FileSystem.get(conf); 14 return fs.exists(new Path(path)); 15 } 16 17 /** 18 * 复制文件到指定路径 19 * 若路径已存在,则进行覆盖 20 */ 21 public static void copyFromLocalFile(Configuration conf, String localFilePath, String remoteFilePath) throws IOException { 22 FileSystem fs = FileSystem.get(conf); 23 Path localPath = new Path(localFilePath); 24 Path remotePath = new Path(remoteFilePath); 25 /* fs.copyFromLocalFile 第一个参数表示是否删除源文件,第二个参数表示是否覆盖 */ 26 fs.copyFromLocalFile(false, true, localPath, remotePath); 27 fs.close(); 28 } 29 30 /** 31 * 追加文件内容 32 */ 33 public static void appendToFile(Configuration conf, String localFilePath, String remoteFilePath) throws IOException { 34 FileSystem fs = FileSystem.get(conf); 35 Path remotePath = new Path(remoteFilePath); 36 /* 创建一个文件读入流 */ 37 FileInputStream in = new FileInputStream(localFilePath); 38 /* 创建一个文件输出流,输出的内容将追加到文件末尾 */ 39 FSDataOutputStream out = fs.append(remotePath); 40 /* 读写文件内容 */ 41 byte[] data = new byte[1024]; 42 int read = -1; 43 while ( (read = in.read(data)) > 0 ) { 44 out.write(data, 0, read); 45 } 46 out.close(); 47 in.close(); 48 fs.close(); 49 } 50 /** 51 * 主函数 52 */ 53 public static void main(String[] args) { 54 Configuration conf = new Configuration(); 55 conf.set("fs.default.name","hdfs://localhost:9000"); 56 String localFilePath = "/home/flyuz/text.txt"; // 本地路径 57 String remoteFilePath = "/text.txt"; // HDFS路径 58 String choice = "append"; // 若文件存在则追加到文件末尾 59 // String choice = "overwrite"; // 若文件存在则覆盖 60 try { 61 /* 判断文件是否存在 */ 62 Boolean fileExists = false; 63 if (HDFSApi.test(conf, remoteFilePath)) { 64 fileExists = true; 65 System.out.println(remoteFilePath + " 已存在."); 66 } else { 67 System.out.println(remoteFilePath + " 不存在."); 68 } 69 /* 进行处理 */ 70 if ( !fileExists) { // 文件不存在,则上传 71 HDFSApi.copyFromLocalFile(conf, localFilePath, remoteFilePath); 72 System.out.println(localFilePath + " 已上传至 " + remoteFilePath); 73 } else if ( choice.equals("overwrite") ) { // 选择覆盖 74 HDFSApi.copyFromLocalFile(conf, localFilePath, remoteFilePath); 75 System.out.println(localFilePath + " 已覆盖 " + remoteFilePath); 76 } else if ( choice.equals("append") ) { // 选择追加 77 HDFSApi.appendToFile(conf, localFilePath, remoteFilePath); 78 System.out.println(localFilePath + " 已追加至 " + remoteFilePath); 79 } 80 } catch (Exception e) { 81 e.printStackTrace(); 82 } 83 } 84 }
二:从HDFS中下载指定文件,如果本地文件与要下载的文件名称相同,则自动对下载的文件重命名;
1 package hadoopTest; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.*; 5 import java.io.*; 6 7 public class HDFSApi { 8 /** 9 * 下载文件到本地 判断本地路径是否已存在,若已存在,则自动进行重命名 10 */ 11 public static void copyToLocal(Configuration conf, String remoteFilePath, String localFilePath) throws IOException { 12 FileSystem fs = FileSystem.get(conf); 13 Path remotePath = new Path(remoteFilePath); 14 File f = new File(localFilePath); 15 /* 如果文件名存在,自动重命名(在文件名后面加上 _0, _1 ...) */ 16 if (f.exists()) { 17 System.out.println(localFilePath + " 已存在."); 18 Integer i = 0; 19 while (true) { 20 f = new File(localFilePath + "_" + i.toString()); 21 if (!f.exists()) { 22 localFilePath = localFilePath + "_" + i.toString(); 23 break; 24 } 25 } 26 System.out.println("将重新命名为: " + localFilePath); 27 } 28 // 下载文件到本地 29 Path localPath = new Path(localFilePath); 30 fs.copyToLocalFile(remotePath, localPath); 31 fs.close(); 32 } 33 34 /** 35 * 主函数 36 */ 37 public static void main(String[] args) { 38 Configuration conf = new Configuration(); 39 conf.set("fs.default.name", "hdfs://localhost:9000"); 40 String localFilePath = "/home/flyuz/text.txt"; // 本地路径 41 String remoteFilePath = "/text.txt"; // HDFS路径 42 try { 43 HDFSApi.copyToLocal(conf, remoteFilePath, localFilePath); 44 System.out.println("下载完成"); 45 } catch (Exception e) { 46 e.printStackTrace(); 47 } 48 } 49 }
三:将HDFS中指定文件的内容输出到终端中;
1 package hadoopTest; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.*; 5 import java.io.*; 6 7 public class HDFSApi { 8 /** 9 * 读取文件内容 10 */ 11 public static void cat(Configuration conf, String remoteFilePath) throws IOException { 12 FileSystem fs = FileSystem.get(conf); 13 Path remotePath = new Path(remoteFilePath); 14 FSDataInputStream in = fs.open(remotePath); 15 BufferedReader d = new BufferedReader(new InputStreamReader(in)); 16 String line = null; 17 while ((line = d.readLine()) != null) { 18 System.out.println(line); 19 } 20 d.close(); 21 in.close(); 22 fs.close(); 23 } 24 25 /** 26 * 主函数 27 */ 28 public static void main(String[] args) { 29 Configuration conf = new Configuration(); 30 conf.set("fs.default.name", "hdfs://localhost:9000"); 31 String remoteFilePath = "/text.txt"; // HDFS路径 32 33 try { 34 System.out.println("读取文件: " + remoteFilePath); 35 HDFSApi.cat(conf, remoteFilePath); 36 System.out.println("\n读取完成"); 37 } catch (Exception e) { 38 e.printStackTrace(); 39 } 40 } 41 }
四:显示HDFS中指定的文件的读写权限、大小、创建时间、路径等信息;
1 package hadoopTest; 2 3 import org.apache.hadoop.conf.Configuration; 4 import org.apache.hadoop.fs.*; 5 import java.io.*; 6 import java.text.SimpleDateFormat; 7 8 public class HDFSApi { 9 /** 10 * 显示指定文件的信息 11 */ 12 public static void ls(Configuration conf, String remoteFilePath) throws IOException { 13 FileSystem fs = FileSystem.get(conf); 14 Path remotePath = new Path(remoteFilePath); 15 FileStatus[] fileStatuses = fs.listStatus(remotePath); 16 for (FileStatus s : fileStatuses) { 17 System.out.println("路径: " + s.getPath().toString()); 18 System.out.println("权限: " + s.getPermission().toString()); 19 System.out.println("大小: " + s.getLen()); 20 /* 返回的是时间戳,转化为时间日期格式 */ 21 Long timeStamp = s.getModificationTime(); 22 SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss"); 23 String date = format.format(timeStamp); 24 System.out.println("时间: " + date); 25 } 26 fs.close(); 27 } 28 29 /** 30 * 主函数 31 */ 32 public static void main(String[] args) { 33 Configuration conf = new Configuration(); 34 conf.set("fs.default.name", "hdfs://localhost:9000"); 35 String remoteFilePath = "/text.txt"; // HDFS路径 36 try { 37 System.out.println("读取文件信息: " + remoteFilePath); 38 HDFSApi.ls(conf, remoteFilePath); 39 System.out.println("\n读取完成"); 40 } catch (Exception e) { 41 e.printStackTrace(); 42 } 43 } 44 }