基于eclipse的hadoop开发-----HDFS API学习
环境:ubuntu8.04.4 hadoop1.0.2 eclipse3.7.2
概要:本文主要是记录HDFS中常用API的使用,如文件上传、创建文件、重命名、删除、读取文件内容等。
一、实验步骤
1、启动Hadoop,切换到超级用户
gqy@localhost:/usr/local/hadoop-1.0.2$ su
root@localhost:/usr/local/hadoop-1.0.2# bin/hadoop namenode -format
root@localhost:/usr/local/hadoop-1.0.2# bin/start-all.sh
2、打开Eclipse,新建一个工程。
File-->New-->Other-->Map/Reduce Project
3、新建类,输入代码。运行时点 Run On Hadoop,观察控制台的输出信息,同时可以在终端用命令查看hdfs的内容 或者 在eclipse中右键DFS 选择disconnect,即可实现刷新文件系统显示
/* * upload the file from local system to HDFS * */ public void uploadLocalfileToHdfs(String src, String dst) throws IOException { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); Path srcPath = new Path(src); Path dstPath = new Path(dst); hdfs.copyFromLocalFile(srcPath, dstPath); //print System.out.println("Upload to "+conf.get("fs.default.name")); System.out.println("------------list files------------"+"\n"); FileStatus [] fileStatus = hdfs.listStatus(dstPath); for (FileStatus file : fileStatus) { System.out.println(file.getPath()); } hdfs.close(); }
/* * create a new file in the hdfs * * */ public void createNewHdfsFile(String dst, byte[] content) throws IOException { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); Path dstPath = new Path(dst); FSDataOutputStream outputStream = hdfs.create(dstPath); outputStream.write(content); outputStream.close(); hdfs.close(); System.out.println("success, create a new file in HDFS: "+dst); }
/* * rename a file in HDFS * */ public void renameHdfsFile(String src,String newName) throws IOException { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); Path srcPath = new Path(src); Path dstPath = new Path(newName); if (hdfs.rename(srcPath, dstPath)) { System.out.println("ok, file: "+src+" renamed to: "+newName); } else { System.out.println("error, file: "+src+" rename failed!"); } hdfs.close(); }
/* * delete a hdfs file * * */ public void deleteHdfsFile(String src) throws IOException { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); Path srcPath = new Path(src); if (hdfs.delete(srcPath, false)) { System.out.println("ok, delete file: "+srcPath); } else { System.out.println("error,delete file: "+srcPath+" failed!"); } hdfs.close(); }
/* * make a new dir in the Hdfs * * */ public void mkdir(String dir) throws IOException { Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); Path dirPath = new Path(dir); if(hdfs.mkdirs(dirPath)) { System.out.println("ok, make dir: "+dir); } else { System.out.println("error, make dir: "+dir+" failed!"); }; hdfs.close(); }
/* * read the hdfs file content * * */ public byte[] readHdfsFile(String src) throws IOException { byte[] buffer = null; Configuration conf = new Configuration(); FileSystem hdfs = FileSystem.get(conf); Path srcPath = new Path(src); if (hdfs.exists(srcPath)) { FSDataInputStream input = hdfs.open(srcPath); FileStatus state = hdfs.getFileStatus(srcPath); //long--->string--->int int length = Integer.parseInt(String.valueOf(state.getLen())); buffer = new byte[length]; input.readFully(buffer); input.close(); } else { System.out.println("error, file is not existed! Read failed!"); } hdfs.close(); return buffer; }
/* *main * */ public static void main(String[] args) throws IOException { //-------------test uploadLocalfile----------- String src ="/home/gqy/testFileOperate.txt"; String dst = "/"; HadoopFileOperate testFileOperate = new HadoopFileOperate(); testFileOperate.uploadLocalfileToHdfs(src, dst); //-----------test create HDFS file------------ FileInputStream file = new FileInputStream(src); byte[] content = new byte[file.available()]; file.read(content); //file the content arrays String newFileName = "/tmp/testFileOperate/newFile.txt"; testFileOperate.createNewHdfsFile(newFileName, content); //-----------test rename HDFS file-------------- String rename = "/new2.cpp"; testFileOperate.renameHdfsFile(newFileName, rename); //----------test make a new dir in Hdfs------- String dir = "/tmp/testFileOperate/test"; testFileOperate.mkdir(dir); //-----------test delete Hdfs file------------ testFileOperate.deleteHdfsFile("/tmp/testFileOperate/newFile.txt"); //-----------test read Hdfs file byte[] readContent = testFileOperate.readHdfsFile(rename); if (readContent != null) { String contentString = new String(readContent); System.out.println("OK,read content: \n"+contentString); } }
二、部分错误的分析
1、Hdfs中重命名文件失败的原因:
(1)指定的src文件,不是hdfs文件
(2)在hdfs中已存在该名字的文件
我感觉重命名就是把原来的文件删除,在新建一个文件
2、删除文件失败(delete):
当hdfs.delete(src,flag)中,src是一个目录时,flag必须为true,实现全部删除。若flag为false,则会抛出异常
3、上传文件失败:
目标路径必须是在命令中能显示的:
hadoop fs -ls /
若此时只显示 /home/gqy/hadoop
那么,目标路径取 /home/gqy/tmp 就会上传失败,可以取/home/gqy/hadoop/test