HDFS文件读写操作(基础基础超基础)
环境
- OS: Ubuntu 16.04 64-Bit
- JDK: 1.7.0_80 64-Bit
- Hadoop: 2.6.5
原理
《权威指南》有两张图,下次po上来好好聊一下
实测
读操作
- 创建在
hadoop
目录下myclass
(放.java
和.class
文件)和input
目录 - 在
input
目录下建立quangle.txt
文件,并写入内容 - 将本地文件上传到
hdfs
的相应文件夹(笔者此处为/class4
)中:
hadoop fs -copyFromLocal quangle.txt /class4/quangle.txt
- 配置
hadoop-env.sh
文件,添加HADOOP_CLASSPATH
变量指向myclass
- 在
myclass
中建立FileSystemCat.java
代码文件: - 编译代码
javac -classpath ../share/hadoop/common/hadoop-common-2.6.5.jar FileSystemCat.java
- 由编译代码读HDFS文件
hadoop FileSystemCat /class4/quangle.txt
import java.io.InputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.io.IOUtils;
public class FileSystemCat {
public static void main(String[] args) throws Exception {
String uri = args[0];
Configuration conf = new Configuration();
FileSystem fs = FileSystem.get(URI.create(uri), conf);
InputStream in = null;
try {
in = fs.open(new Path(uri));
IOUtils.copyBytes(in, System.out, 4096, false);
} finally {
IOUtils.closeStream(in);
}
}
}
写操作
本地文件读入HDFS中
步骤几乎与写操作一致,主要看如何调用API
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.net.URI;
// 以下调用到API均在hadoop-common-2.6.5.jar中
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
public class LocalFile2Hdfs {
public static void main(String[] args) throws Exception {
String local = args[0]; // 源文件地址
String uri = args[1]; // 目标文件位置参数
FileInputStream in = null;
OutputStream out = null;
Configuration conf = new Configuration();
try {
// 获取读入文件数据
in = new FileInputStream(new File(local));
// 获取目标文件信息
FileSystem fs = FileSystem.get(URI.create(uri), conf);
out = fs.create(new Path(uri), new Progressable() {
// 显示进度,每次将64KB数据包写入datanode后打印一次
public void progress() {
System.out.println("*");
}
});
in.skip(100);
byte[] buffer = new byte[20];
// 读去字符到buffer,再写入Path中
int bytesRead = in.read(buffer);
if(bytesRead >= 0) {
out.write(buffer, 0, bytesRead);
}
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}
}
从HDFS上读文件并写入本地
代码如下:
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.OutputStream;
import java.net.URI;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
public class Hdfs2LocalFile {
public static void main(String[] args) throws Exception {
String uri = args[0];
String local = args[1];
FSDataInputStream in = null;
OutputStream out = null;
Configuration conf = new Configuration();
try {
FileSystem fs = FileSystem.get(URI.create(uri), conf);
in = fs.open(new Path(uri));
out = new FileOutputStream(local);
byte[] buffer = new byte[20];
in.skip(100);
int bytesRead = in.read(buffer);
if (bytesRead >= 0) {
out.write(buffer, 0, bytesRead);
}
} finally {
IOUtils.closeStream(in);
IOUtils.closeStream(out);
}
}
}