Hadoop-1、HDFS API简介
HDFS是一个高度容错的分布式文件系统,为了保证数据的一致性采用“写入一次,多次读取”的方式。
1、上传本地文件
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class CopeFile { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml")); FileSystem hdfs = FileSystem.get(conf); //设置本地文件 Path src = new Path("/home/ja/CADATA/SVD/prediction"); //设置上传数据的路劲 Path dst = new Path("/"); hdfs.copyFromLocalFile(src, dst); System.out.println("Upload to" + conf.get("fs.default.name")); FileStatus[] files = hdfs.listStatus(dst); for (FileStatus file : files) { System.out.println(file.getPath()); } } }
稍微描述下:Hadoop下各种奇葩问题,有些书上没添加配置文件路劲,结果会导致找不到HDFS的文件,所以注意下,不要被坑,而且有些书数组那块不写[],无良阿。
2、创建HDFS文件
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class CreateFile { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml")); FileSystem hdfs = FileSystem.get(conf); byte[] buff = "Hello Hadoop World!\n".getBytes(); Path dfs = new Path("/Test"); FSDataOutputStream outputstream = hdfs.create(dfs); outputstream.write(buff); } }
3、创建HDFS目录
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class CreateDir { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml")); FileSystem hdfs = FileSystem.get(conf); Path dfs = new Path("/TestDir"); hdfs.mkdirs(dfs); } }
4、重命名HDFS文件
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class Rename { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml")); FileSystem hdfs = FileSystem.get(conf); Path frpath = new Path("/Test"); Path topath = new Path("/T"); boolean isRename = hdfs.rename(frpath, topath); String result = isRename ? "成功" : "失败"; System.out.println("重命名结果为:" + result); } }
5、删除HDFS文件
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class DeleteFile { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml")); FileSystem hdfs = FileSystem.get(conf); Path delef = new Path("/prediction"); boolean isDelete = hdfs.delete(delef, false); System.out.println("Delete ? " + isDelete); } }
删除目录和文件类似。
6、查看HDFS文件是否存在
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class CheckFile { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml")); FileSystem hdfs = FileSystem.get(conf); Path findf = new Path("/usr/root/input/test.data"); boolean isExist = hdfs.exists(findf); System.out.println("Exists ? " + isExist); } }
7、查看HDFS文件的最后修改时间
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class GetLTime { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml")); FileSystem hdfs = FileSystem.get(conf); Path fPath = new Path("/T"); FileStatus fileStatus = hdfs.getFileStatus(fPath); long moditime = fileStatus.getModificationTime(); System.out.println("修改时间:" + moditime); } }
8、读取HDFS某个目录下的所有文件
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class ListALLFile { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml")); FileSystem hdfs = FileSystem.get(conf); Path listf = new Path("/usr/root/"); FileStatus[] status = hdfs.listStatus(listf); for (int i=0;i<status.length;i++) { System.out.println(status[i].getPath().toString()); } hdfs.close(); } }
9、查找某个文件在集群中的位置
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; public class FileLoc { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml")); FileSystem hdfs = FileSystem.get(conf); Path fpath = new Path("/T"); FileStatus status = hdfs.getFileStatus(fpath); BlockLocation[] blockcations = hdfs.getFileBlockLocations(status, 0, status.getLen()); int blockLen = blockcations.length; for (int i=0;i<blockLen;i++) { String[] hosts = blockcations[i].getHosts(); System.out.println("block_" + i + "_location:" + hosts[0]); } hdfs.close(); } }
10、获取HDFS集群上所有节点名称信息
import java.io.IOException; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.BlockLocation; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hdfs.DistributedFileSystem; import org.apache.hadoop.hdfs.protocol.DatanodeInfo; public class GetList { public static void main(String[] args) throws IOException { Configuration conf = new Configuration(); conf.addResource(new Path("/usr/hadoop-1.0.3/conf/core-site.xml")); FileSystem fs = FileSystem.get(conf); DistributedFileSystem hdfs = (DistributedFileSystem)fs; DatanodeInfo[] dataNodeStats = hdfs.getDataNodeStats(); for (int i=0;i<dataNodeStats.length;i++) { System.out.println("DataNode_" + i + "_Name:" + dataNodeStats[i].getHostName()); } hdfs.close(); } }