通过流的方式访问hdfs
1.需求
通过流的方式访问hdfs,在mapreduce底层实现中会有直接应用。
2.环境配置
(1)hadoop为本地模式
(2)pom文件代码如下
<dependencies> <dependency> <groupId>org.apache.hadoop</groupId> <artifactId>hadoop-client</artifactId> <version>2.7.3</version> </dependency> <dependency> <groupId>junit</groupId> <artifactId>junit</artifactId> <version>4.11</version> </dependency> </dependencies>
3.使用hdfs的javaAPI操作hdfs的代码
(1)初始化环境
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.junit.Before;
import org.junit.Test;
import java.io.*;
import java.net.URI;
import java.net.URISyntaxException;
public class hadoopStreamApiDemo {
FileSystem fs = null;
@Before
public void init() throws IOException, URISyntaxException, InterruptedException {
Configuration configuration = new Configuration();
//设置文件系统为hdfs
//获取文件系统的客户端实例对象
fs = FileSystem.get(new URI("hdfs://127.0.0.1:9900"),configuration,"hadoop");
}
}
(2)使用流的方式上传文件
@Test
public void testUpload() throws IOException {
FSDataOutputStream outputStream = fs.create(new Path("/xx/aa"),true);
FileInputStream inputStream = new FileInputStream("[本地路径]");
//将本地文件拷贝到hdfs上
IOUtils.copyBytes(inputStream,outputStream,4096);
}
(3)使用流的方式下载文件
@Test
public void testDownLoadFileToLocal() throws IOException {
FSDataInputStream inputStream = fs.open(new Path("/xx/aa"));
FileOutputStream outputStream = new FileOutputStream(new File("[本地路径]"));
//将hdfs上的文件输入流传输到本地
IOUtils.copyBytes(inputStream,outputStream,4096);
}
(4)随机访问,读取时候带有偏移量
@Test
public void testTandomAccess() throws IOException {
FSDataInputStream in = fs.open(new Path("/xx/aa"));
FileOutputStream outputStream = new FileOutputStream(new File("[本地路径]"));
//从输入流中截取前19个字节
IOUtils.copyBytes(in,outputStream,19L,true);
}
(5)将hdfs上文件内容打印到屏幕上
@Test
//将hdfs上的文件内容打印到屏幕上(System.out)
public void testCat() throws IOException {
FSDataInputStream in = fs.open(new Path("/xx/aa"));
IOUtils.copyBytes(in,System.out,1024);
}
4.github链接
https://github.com/gulu2016/STBigData/hadoopStreamApiDemo.java