|NO.Z.00019|——————————|BigDataEnd|——|Hadoop&HDFS.V04|——|Hadoop.v04|HDFS文件上传下载|文件类型判断|
一、上传文件
### --- 上传文件
~~~ [HDFS之API客户端上传下载文件]
~~~ [HDFS之API客户端文件详情及文件类型判断]
~~~ [HDFS分布式文件系统]
### --- 编写源代码
@Test
public void testCopyFromLocalFile() throws IOException, InterruptedException, URISyntaxException {
// 1 获取文件系统
Configuration configuration = new Configuration();
configuration.set("dfs.replication", "2");
FileSystem fs = FileSystem.get(new URI("hdfs://linux121:9000"), configuration, "root");
// 2 上传文件
fs.copyFromLocalFile(new Path("e:/yanqi.txt"), new
Path("/yanqi.txt"));
// 3 关闭资源
fs.close();
System.out.println("end");
}
### --- 将hdfs-site.xml拷贝到项目的根目录下
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
### --- 参数优先级
~~~ 参数优先级排序:(1)代码中设置的值 >(2)用户自定义配置文件 >(3)服务器的默认配置
二、下载文件
@Test
public void testCopyToLocalFile() throws IOException, InterruptedException,
URISyntaxException{
// 1 获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://linux121:9000"),configuration, "root");
// 2 执行下载操作
// boolean delSrc 指是否将原文件删除
// Path src 指要下载的文件路径
// Path dst 指将文件下载到的路径
// boolean useRawLocalFileSystem 是否开启文件校验
fs.copyToLocalFile(false, new Path("/yanqi.txt"), new Path("e:/yanqi_copy.txt"), true);
// 3 关闭资源
fs.close();
}
三、删除文件/文件夹
@Test
public void testDelete() throws IOException, InterruptedException,
URISyntaxException{
// 1 获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://linux121:9000"),configuration, "root");
// 2 执行删除
fs.delete(new Path("/api_test/"), true);
// 3 关闭资源
fs.close();
}
四、查看文件名称、权限、长度、块信息
@Test
public void testListFiles() throws IOException, InterruptedException,
URISyntaxException{
// 1获取文件系统
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://linux121:9000"),configuration, "root");
//2 获取文件详情
RemoteIterator<LocatedFileStatus> listFiles = fs.listFiles(new Path("/"),true);
while(listFiles.hasNext()){
LocatedFileStatus status = listFiles.next();
// 输出详情
// 文件名称
System.out.println(status.getPath().getName());
// 长度
System.out.println(status.getLen());
// 权限
System.out.println(status.getPermission());
// 分组
System.out.println(status.getGroup());
// 获取存储的块信息
BlockLocation[] blockLocations = status.getBlockLocations();
for (BlockLocation blockLocation : blockLocations) {
// 获取块存储的主机节点
String[] hosts = blockLocation.getHosts();
for (String host : hosts) {
System.out.println(host);
}
}
System.out.println("-----------华丽的分割线----------");
}
// 3 关闭资源
fs.close();
}
五、文件夹类型判断
@Test
public void testListStatus() throws IOException, InterruptedException,
URISyntaxException{
// 1 获取文件配置信息
Configuration configuration = new Configuration();
FileSystem fs = FileSystem.get(new URI("hdfs://linux121:9000"),
configuration, "root");
// 2 判断是文件还是文件夹
FileStatus[] listStatus = fs.listStatus(new Path("/"));
for (FileStatus fileStatus : listStatus) {
// 如果是文件
if (fileStatus.isFile()) {
System.out.println("f:"+fileStatus.getPath().getName());
}else {
System.out.println("d:"+fileStatus.getPath().getName());
}
}
// 3 关闭资源
fs.close();
}
六、编程代码:上传文件
package com.yanqi.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class HdfsClientDemo {
FileSystem fs = null;
@Before
public void init() throws URISyntaxException, IOException, InterruptedException {
//1、获取Hadoop集群的configuration对象
Configuration configuration = new Configuration();
//configuration.set( "fs.defaultFS", "hdfs://linux121:9000" );
configuration.set("dfs.replication", "2");
//2、根据configuration获取Filesystem对象
fs = FileSystem.get( new URI( "hdfs://linux121:9000" ), configuration, "root" );
}
@After
public void destory() throws IOException {
//4、释放filesystem对象(类似数据库连接)
fs.close();
}
@Test
public void testMkdirs() throws URISyntaxException, IOException, InterruptedException {
//FileSystem fs = FileSystem.get(configuration);
//3、使用filesystem对象创建一个测试
fs.mkdirs( new Path( "/api_test2" ) );
}
//上传文件
@Test
public void copyFormLocalToHdfs() throws IOException, InterruptedException, URISyntaxException {
//上传文件
//src源文件目录:本地路径
//dst:目标文件目录,hdfs路径
fs.copyFromLocalFile(new Path("e:/yanqi.txt"), new Path("/yanqi.txt"));
//上传文件到hdfs默认是3个副本
//如何改变上传文件的副本数量
}
}
### --- hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
二、编程代码:下载文件
package com.yanqi.hdfs;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class HdfsClientDemo {
FileSystem fs = null;
@Before
public void init() throws URISyntaxException, IOException, InterruptedException {
//1、获取Hadoop集群的configuration对象
Configuration configuration = new Configuration();
//configuration.set( "fs.defaultFS", "hdfs://linux121:9000" );
configuration.set("dfs.replication", "2");
//2、根据configuration获取Filesystem对象
fs = FileSystem.get( new URI( "hdfs://linux121:9000" ), configuration, "root" );
}
@After
public void destory() throws IOException {
//4、释放filesystem对象(类似数据库连接)
fs.close();
}
@Test
public void testMkdirs() throws URISyntaxException, IOException, InterruptedException {
//FileSystem fs = FileSystem.get(configuration);
//3、使用filesystem对象创建一个测试
fs.mkdirs( new Path( "/api_test2" ) );
}
//上传文件
@Test
public void copyFormLocalToHdfs() throws IOException, InterruptedException, URISyntaxException {
//上传文件
//src源文件目录:本地路径
//dst:目标文件目录,hdfs路径
fs.copyFromLocalFile(new Path("e:/yanqi.txt"), new Path("/yanqi.txt"));
//上传文件到hdfs默认是3个副本
//如何改变上传文件的副本数量
}
//下载文件
@Test
public void testCopyToLocalFile() throws IOException, InterruptedException, URISyntaxException{
//boolean:是否删除源文件
//src:hdfs路径
//dst:目标路径,本地路径
fs.copyToLocalFile(true, new Path("/yanqi.txt"), new Path("e:/yanqi_copy.txt"), true);
}
}
三、编程代码:删除文件或文件夹
package cn.yanqi.hdfs;
/*
通过API客户端删除文件或者文件夹
b:递归删除
*/
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class HdfsClientDemo06 {
FileSystem fs = null;
@Before
public void init() throws URISyntaxException, IOException, InterruptedException {
//1、获取Hadoop集群的configuration对象
Configuration configuration = new Configuration();
//configuration.set( "fs.defaultFS", "hdfs://linux121:9000" );
configuration.set("dfs.replication", "2");
//2、根据configuration获取Filesystem对象
fs = FileSystem.get( new URI( "hdfs://linux121:9000" ), configuration, "root" );
}
@After
public void destory() throws IOException {
//4、释放filesystem对象(类似数据库连接)
fs.close();
}
@Test
public void testMkdirs() throws URISyntaxException, IOException, InterruptedException {
//FileSystem fs = FileSystem.get(configuration);
//3、使用filesystem对象创建一个测试
fs.mkdirs( new Path( "/api_test2" ) );
}
//上传文件
@Test
public void copyFormLocalToHdfs() throws IOException, InterruptedException, URISyntaxException {
//上传文件
//src源文件目录:本地路径
//dst:目标文件目录,hdfs路径
fs.copyFromLocalFile(new Path("e:/yanqi.txt"), new Path("/yanqi.txt"));
//上传文件到hdfs默认是3个副本
//如何改变上传文件的副本数量
}
//下载文件
@Test
public void testCopyToLocalFile() throws IOException, InterruptedException, URISyntaxException{
//boolean:是否删除源文件
//src:hdfs路径
//dst:目标路径,本地路径
fs.copyToLocalFile(true, new Path("/yanqi.txt"), new Path("e:/yanqi_copy.txt"), true);
}
//删除文件或者文件夹
@Test
public void deleteFile() throws IOException, InterruptedException, URISyntaxException{
fs.delete(new Path("/api_test2/"), true);
}
}
四、编程代码:查看所有文件及文件夹的所有信息
package cn.yanqi.hdfs;
/*
遍历hdfs的根目录得到文件以及文件夹的信息:名称,权限,长度等
*/
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class HdfsClientDemo07 {
FileSystem fs = null;
@Before
public void init() throws URISyntaxException, IOException, InterruptedException {
//1、获取Hadoop集群的configuration对象
Configuration configuration = new Configuration();
//configuration.set( "fs.defaultFS", "hdfs://linux121:9000" );
configuration.set("dfs.replication", "2");
//2、根据configuration获取Filesystem对象
fs = FileSystem.get( new URI( "hdfs://linux121:9000" ), configuration, "root" );
}
@After
public void destory() throws IOException {
//4、释放filesystem对象(类似数据库连接)
fs.close();
}
@Test
public void testMkdirs() throws URISyntaxException, IOException, InterruptedException {
//FileSystem fs = FileSystem.get(configuration);
//3、使用filesystem对象创建一个测试
fs.mkdirs( new Path( "/api_test2" ) );
}
//上传文件
@Test
public void copyFormLocalToHdfs() throws IOException, InterruptedException, URISyntaxException {
//上传文件
//src源文件目录:本地路径
//dst:目标文件目录,hdfs路径
fs.copyFromLocalFile(new Path("e:/yanqi.txt"), new Path("/yanqi.txt"));
//上传文件到hdfs默认是3个副本
//如何改变上传文件的副本数量
}
//下载文件
@Test
public void testCopyToLocalFile() throws IOException, InterruptedException, URISyntaxException{
//boolean:是否删除源文件
//src:hdfs路径
//dst:目标路径,本地路径
fs.copyToLocalFile(true, new Path("/yanqi.txt"), new Path("e:/yanqi_copy.txt"), true);
}
//删除文件或者文件夹
@Test
public void deleteFile() throws IOException, InterruptedException, URISyntaxException{
fs.delete(new Path("/api_test2/"), true);
}
//遍历hdfs的根目录得到文件以及文件夹的信息:名称,权限,长度等
@Test
public void ListFiles() throws IOException, InterruptedException, URISyntaxException{
//得到一个迭代器;装有指定目录下所有文件信息
RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles(new Path("/"),true);
while(remoteIterator.hasNext()) {
LocatedFileStatus filestatus = remoteIterator.next();
//输出详情
//文件名称
final String fileName = filestatus.getPath().getName();
//长度
final long len = filestatus.getLen();
//权限
final FsPermission permission = filestatus.getPermission();
//分组
final String group = filestatus.getGroup();
//用户
final String owner = filestatus.getOwner();
System.out.println(fileName + "\t" + len + "\t" + permission + "\t" + group + "\t" + owner);
//块信息
final BlockLocation[] blockLocations = filestatus.getBlockLocations();
for (BlockLocation blockLocation : blockLocations){
final String[] hosts = blockLocation.getHosts();
for (String host : hosts){
System.out.println( "主机名称"+host );
}
}
System.out.println("-----------华丽的分割线----------");
}
}
}
五、编程代码:文件夹类型判断
package cn.yanqi.hdfs;
/*
通过API判断文件还是文件夹
封装好的方法只能判断根目录下一层的判断;
递归判断自己可以去实现
*/
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.util.Progressable;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
public class HdfsClientDemo08 {
FileSystem fs = null;
@Before
public void init() throws URISyntaxException, IOException, InterruptedException {
//1、获取Hadoop集群的configuration对象
Configuration configuration = new Configuration();
//configuration.set( "fs.defaultFS", "hdfs://linux121:9000" );
configuration.set( "dfs.replication", "2" );
//2、根据configuration获取Filesystem对象
fs = FileSystem.get( new URI( "hdfs://linux121:9000" ), configuration, "root" );
}
@After
public void destory() throws IOException {
//4、释放filesystem对象(类似数据库连接)
fs.close();
}
@Test
public void testMkdirs() throws URISyntaxException, IOException, InterruptedException {
//FileSystem fs = FileSystem.get(configuration);
//3、使用filesystem对象创建一个测试
fs.mkdirs( new Path( "/api_test2" ) );
}
//上传文件
@Test
public void copyFormLocalToHdfs() throws IOException, InterruptedException, URISyntaxException {
//上传文件
//src源文件目录:本地路径
//dst:目标文件目录,hdfs路径
fs.copyFromLocalFile( new Path( "e:/yanqi.txt" ), new Path( "/yanqi.txt" ) );
//上传文件到hdfs默认是3个副本
//如何改变上传文件的副本数量
}
//下载文件
@Test
public void testCopyToLocalFile() throws IOException, InterruptedException, URISyntaxException {
//boolean:是否删除源文件
//src:hdfs路径
//dst:目标路径,本地路径
fs.copyToLocalFile( true, new Path( "/yanqi.txt" ), new Path( "e:/yanqi_copy.txt" ), true );
}
//删除文件或者文件夹
@Test
public void deleteFile() throws IOException, InterruptedException, URISyntaxException {
fs.delete( new Path( "/api_test2/" ), true );
}
//遍历hdfs的根目录得到文件以及文件夹的信息:名称,权限,长度等
@Test
public void ListFiles() throws IOException, InterruptedException, URISyntaxException {
//得到一个迭代器;装有指定目录下所有文件信息
RemoteIterator<LocatedFileStatus> remoteIterator = fs.listFiles( new Path( "/" ), true );
while (remoteIterator.hasNext()) {
LocatedFileStatus filestatus = remoteIterator.next();
//输出详情
//文件名称
final String fileName = filestatus.getPath().getName();
//长度
final long len = filestatus.getLen();
//权限
final FsPermission permission = filestatus.getPermission();
//分组
final String group = filestatus.getGroup();
//用户
final String owner = filestatus.getOwner();
System.out.println( fileName + "\t" + len + "\t" + permission + "\t" + group + "\t" + owner );
//块信息
final BlockLocation[] blockLocations = filestatus.getBlockLocations();
for (BlockLocation blockLocation : blockLocations) {
final String[] hosts = blockLocation.getHosts();
for (String host : hosts) {
System.out.println( "主机名称" + host );
}
}
System.out.println( "-----------华丽的分割线----------" );
}
}
//文件及文件夹的判断
@Test
public void isFile() throws IOException, InterruptedException, URISyntaxException {
final FileStatus[] listStatus = fs.listStatus( new Path( "/" ) );
for (FileStatus fileStatus : listStatus) {
final boolean flag = fileStatus.isFile();
if (flag) {
System.out.println( "文件:" + fileStatus.getPath().getName() );
} else {
System.out.println( "文件夹:" + fileStatus.getPath().getName() );
}
}
}
}
Walter Savage Landor:strove with none,for none was worth my strife.Nature I loved and, next to Nature, Art:I warm'd both hands before the fire of life.It sinks, and I am ready to depart
——W.S.Landor
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· 无需6万激活码!GitHub神秘组织3小时极速复刻Manus,手把手教你使用OpenManus搭建本
· Manus爆火,是硬核还是营销?
· 终于写完轮子一部分:tcp代理 了,记录一下
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通