2018年的时候写HDFS项目时候遗留的笔记，Scala简单操作HDFS。

点击查看代码

 package info.aoye.hadoop
 
import java.io.ByteArrayInputStream
import java.net.URI
 
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
import org.apache.commons.lang.StringUtils
import org.apache.zookeeper.common.IOUtils
 
import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 
 
/**
  * @author duchaoqun
  * @since 2018-08-09
  */
object DemoHdfs1 extends scala.App {
  val hdfsPath = "hdfs://172.17.2.135:8020/user/"
 
  /**
    * Make a new dir in the hdfs!
    *
    * @param dir dir name
    * @return true or false
    */
  def createDir(dir: String): Boolean = {
    val dirPath = hdfsPath + dir // 留意拼接的这个dir, 如果前面的url 最后面没有斜杠, 这里会创建到上一级目录中.
 
    // 验证路径是否正确
    if (StringUtils.isBlank(dirPath)) {
      false
    } else {
      // 创建HDFS对象, todo 如何判断创建失败了?
      val fileSystem = FileSystem.get(URI.create(dirPath), new Configuration(), "hdfs")
      if (!fileSystem.exists(new Path(dirPath))) {
        fileSystem.mkdirs(new Path(dirPath))
      }
      fileSystem.close()
      true
    }
  }
 
 
  /**
    * 删除HDFS上的目录
    *
    * @param dir dir name
    * @return true or false
    */
  def deleteDir(dir: String): Boolean = {
    val dirPath = hdfsPath + dir // 留意拼接的这个dir, 如果前面的url 最后面没有斜杠, 这里会创建到上一级目录中.
    if (StringUtils.isBlank(dir)) {
      false
    } else {
      val fileSystem = FileSystem.get(URI.create(dirPath), new Configuration(), "hdfs")
      //fileSystem.delete(new Path(dir),true)
      fileSystem.deleteOnExit(new Path(dirPath))
      fileSystem.close()
      true
    }
  }
 
  /**
    * 删除HDFS上的文件
    *
    * @param filePath HDFS上文件的绝对路径
    * @return true or false
    */
  def deleteFile(filePath: String): Boolean = {
    if (StringUtils.isBlank(filePath)) {
      false
    } else {
      val fileSystem = FileSystem.get(URI.create(filePath), new Configuration(), "hdfs")
      fileSystem.deleteOnExit(new Path(filePath))
      fileSystem.close()
      true
    }
  }
 
  //deleteFile("hdfs://172.17.2.135:8020/user/test2.txt")
 
  def listAll(dirPath: String): Option[List[String]] = {
    if (StringUtils.isBlank(dirPath)) {
      None
    } else {
      val fileSystem = FileSystem.get(URI.create(dirPath), new Configuration(), "hdfs")
      val fileStatus = fileSystem.listStatus(new Path(dirPath))
      val statusListBuffer = new ListBuffer[String]
      fileStatus.foreach(x => statusListBuffer += x.getPath.toString)
      //fileStatus.foreach(x => println(x.getPath.toString))
      Some(statusListBuffer.toList)
    }
  }
 
  listAll("hdfs://172.17.2.135:8020/user/") match {
    case Some(s) => s.foreach(println(_))
    case None => println("Get Nothing!")
  }
 
 
  /**
    * 上传本地文件到HDFS上
    *
    * @param localFile 本地文件位置(包含绝对路径信息)
    * @param hdfsFile  在HDFS上保存的名字
    * @param hdfsPath  在HDFS上保存的路径
    * @return true or false
    */
  def uploadLocalToHDFS(localFile: String, hdfsFile: String, hdfsPath: String): Boolean = {
    if (StringUtils.isBlank(localFile) && StringUtils.isBlank(hdfsFile) && StringUtils.isBlank(hdfsPath)) {
      false
    } else {
      val fileSystem = FileSystem.get(URI.create(hdfsPath), new Configuration(), "hdfs")
      fileSystem.copyFromLocalFile(new Path(localFile), new Path(hdfsPath + hdfsFile))
      fileSystem.close()
      true
    }
  }
 
  //uploadLocalToHDFS("C:\\Users\\ducha\\Documents\\test1.txt", "test1.txt", "hdfs://172.17.2.135:8020/user/")
 
 
  /**
    * 在hdfs上创建新文件
    *
    * @param fileName    文件名称
    * @param fileContent 文件内容
    * @param hdfsPath    文件路径
    * @return
    */
  def createFile(fileName: String, fileContent: String, hdfsPath: String): Boolean = {
    if (StringUtils.isBlank(fileName) && StringUtils.isBlank(hdfsPath)) {
      false
    } else {
      val fileSystem = FileSystem.get(URI.create(hdfsPath), new Configuration(), "hdfs")
      val fsDataOutputStream = fileSystem.create(new Path(hdfsPath + fileName))
      fsDataOutputStream.write(fileContent.getBytes("UTF-8"))
      fsDataOutputStream.close()
      fileSystem.close()
      true
    }
  }
 
  //createFile("test3.txt", "测试数据!!", "hdfs://172.17.2.135:8020/user/")
 
  /**
    * 读取HDFS上文件内容
    *
    * @param filePath HDFS的文件路径
    * @return Array[Byte]
    */
  def readFile(filePath: String): Option[Array[Byte]] = {
    if (StringUtils.isBlank(filePath)) {
      None
    } else {
      val fileSystem = FileSystem.get(URI.create(filePath), new Configuration(), "hdfs")
      val file = new Path(filePath)
      if (fileSystem.exists(file)) {
        val fSDataInputStream = fileSystem.open(file)
        //val fileStatus = fileSystem.getStatus(file)
        val fileStatus = fileSystem.getFileStatus(file)
        val bufferedByte = new Array[Byte](fileStatus.getLen.toInt)
 
        fSDataInputStream.readFully(0, bufferedByte)
        fSDataInputStream.close()
        fileSystem.close()
        Some(bufferedByte) //todo 返回的是Byte数组, 这里还有内容可以完善.
      } else {
        throw new Exception("Something wrong!!")
      }
    }
  }
 
  //println(readFile("hdfs://172.17.2.135:8020/user/test3.txt").get)
 
 
  /**
    * 向HDFS文件里面添加一些内容
    * @param filePath 文件路径
    * @param content 待添加内容
    * @return
    */
  def append(filePath: String, content: String): Boolean = {
    if (StringUtils.isBlank(filePath)) {
      false
    }
    if (StringUtils.isEmpty(content)) {
      true
    }
    val configuration = new Configuration()
    // todo solve the problem when appending at single datanode hadoop env?
    configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER")
    configuration.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true")
    val fileSystem = FileSystem.get(URI.create(filePath), configuration, "hdfs")
    if (fileSystem.exists(new Path(filePath))) {
      try {
        val inputStream = new ByteArrayInputStream(content.getBytes())
        val fsDataOutputStream = fileSystem.append(new Path(filePath))
        IOUtils.copyBytes(inputStream, fsDataOutputStream, 4096, true);
 
        fsDataOutputStream.close()
        inputStream.close()
        fileSystem.close()
      } catch {
        case e: Exception => e.printStackTrace()
      }
    } else{
      //createFile()
      true
    }
    true
  }
  //append("hdfs://172.17.2.135:8020/user/test3.txt","Dura!")
 
  // todo 用户环境变量 HADOOP_USER_NAME = hdfs , FileSystem会使用到这个变量,
}

posted @ 2023-04-03 17:08 duchaoqun 阅读(16) 评论(0) 编辑收藏举报

刷新页面返回顶部

登录后才能查看或发表评论，立即登录或者逛逛博客园首页

相关博文：

· tomcat9 启动报错，扫描jar的时候出现的问题。

· CentOS 服务器，清理 journal 目录中的日志

· 12.19熟悉常用的HDFS操作1

· Java操作HDFS

· Hadoop（二）Hdfs基本操作

阅读排行：
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布：重大改进与新特性概览！
· AI与.NET技术实操系列（二）：开始使用ML.NET
· 单线程的Redis速度为什么快？

公告

昵称： duchaoqun
园龄： 6年7个月
粉丝： 2
关注： 0

+加关注

2025年3月

日

一

二

三

四

五

六

随笔分类 (144)

随笔档案 (161)

阅读排行榜

评论排行榜

1. sbt - sbt 2 wrong checksum(2)

平凡之路

关注业界, 关注互联网...

2018年的时候写HDFS项目时候遗留的笔记，Scala简单操作HDFS。

公告

我的标签

积分与排名

随笔分类 (144)

随笔档案 (161)

阅读排行榜

评论排行榜

最新评论

	package info.aoye.hadoop

	import java.io.ByteArrayInputStream
	import java.net.URI

	import org.apache.hadoop.conf.Configuration
	import org.apache.hadoop.fs.{FileSystem, FileUtil, Path}
	import org.apache.commons.lang.StringUtils
	import org.apache.zookeeper.common.IOUtils

	import scala.collection.mutable.{ArrayBuffer, ListBuffer}


	/**
	* @author duchaoqun
	* @since 2018-08-09
	*/
	object DemoHdfs1 extends scala.App {
	val hdfsPath = "hdfs://172.17.2.135:8020/user/"

	/**
	* Make a new dir in the hdfs!
	*
	* @param dir dir name
	* @return true or false
	*/
	def createDir(dir: String): Boolean = {
	val dirPath = hdfsPath + dir // 留意拼接的这个dir, 如果前面的url 最后面没有斜杠, 这里会创建到上一级目录中.

	// 验证路径是否正确
	if (StringUtils.isBlank(dirPath)) {
	false
	} else {
	// 创建HDFS对象, todo 如何判断创建失败了?
	val fileSystem = FileSystem.get(URI.create(dirPath), new Configuration(), "hdfs")
	if (!fileSystem.exists(new Path(dirPath))) {
	fileSystem.mkdirs(new Path(dirPath))
	}
	fileSystem.close()
	true
	}
	}


	/**
	* 删除HDFS上的目录
	*
	* @param dir dir name
	* @return true or false
	*/
	def deleteDir(dir: String): Boolean = {
	val dirPath = hdfsPath + dir // 留意拼接的这个dir, 如果前面的url 最后面没有斜杠, 这里会创建到上一级目录中.
	if (StringUtils.isBlank(dir)) {
	false
	} else {
	val fileSystem = FileSystem.get(URI.create(dirPath), new Configuration(), "hdfs")
	//fileSystem.delete(new Path(dir),true)
	fileSystem.deleteOnExit(new Path(dirPath))
	fileSystem.close()
	true
	}
	}

	/**
	* 删除HDFS上的文件
	*
	* @param filePath HDFS上文件的绝对路径
	* @return true or false
	*/
	def deleteFile(filePath: String): Boolean = {
	if (StringUtils.isBlank(filePath)) {
	false
	} else {
	val fileSystem = FileSystem.get(URI.create(filePath), new Configuration(), "hdfs")
	fileSystem.deleteOnExit(new Path(filePath))
	fileSystem.close()
	true
	}
	}

	//deleteFile("hdfs://172.17.2.135:8020/user/test2.txt")

	def listAll(dirPath: String): Option[List[String]] = {
	if (StringUtils.isBlank(dirPath)) {
	None
	} else {
	val fileSystem = FileSystem.get(URI.create(dirPath), new Configuration(), "hdfs")
	val fileStatus = fileSystem.listStatus(new Path(dirPath))
	val statusListBuffer = new ListBuffer[String]
	fileStatus.foreach(x => statusListBuffer += x.getPath.toString)
	//fileStatus.foreach(x => println(x.getPath.toString))
	Some(statusListBuffer.toList)
	}
	}

	listAll("hdfs://172.17.2.135:8020/user/") match {
	case Some(s) => s.foreach(println(_))
	case None => println("Get Nothing!")
	}


	/**
	* 上传本地文件到HDFS上
	*
	* @param localFile 本地文件位置(包含绝对路径信息)
	* @param hdfsFile 在HDFS上保存的名字
	* @param hdfsPath 在HDFS上保存的路径
	* @return true or false
	*/
	def uploadLocalToHDFS(localFile: String, hdfsFile: String, hdfsPath: String): Boolean = {
	if (StringUtils.isBlank(localFile) && StringUtils.isBlank(hdfsFile) && StringUtils.isBlank(hdfsPath)) {
	false
	} else {
	val fileSystem = FileSystem.get(URI.create(hdfsPath), new Configuration(), "hdfs")
	fileSystem.copyFromLocalFile(new Path(localFile), new Path(hdfsPath + hdfsFile))
	fileSystem.close()
	true
	}
	}

	//uploadLocalToHDFS("C:\\Users\\ducha\\Documents\\test1.txt", "test1.txt", "hdfs://172.17.2.135:8020/user/")


	/**
	* 在hdfs上创建新文件
	*
	* @param fileName 文件名称
	* @param fileContent 文件内容
	* @param hdfsPath 文件路径
	* @return
	*/
	def createFile(fileName: String, fileContent: String, hdfsPath: String): Boolean = {
	if (StringUtils.isBlank(fileName) && StringUtils.isBlank(hdfsPath)) {
	false
	} else {
	val fileSystem = FileSystem.get(URI.create(hdfsPath), new Configuration(), "hdfs")
	val fsDataOutputStream = fileSystem.create(new Path(hdfsPath + fileName))
	fsDataOutputStream.write(fileContent.getBytes("UTF-8"))
	fsDataOutputStream.close()
	fileSystem.close()
	true
	}
	}

	//createFile("test3.txt", "测试数据!!", "hdfs://172.17.2.135:8020/user/")

	/**
	* 读取HDFS上文件内容
	*
	* @param filePath HDFS的文件路径
	* @return Array[Byte]
	*/
	def readFile(filePath: String): Option[Array[Byte]] = {
	if (StringUtils.isBlank(filePath)) {
	None
	} else {
	val fileSystem = FileSystem.get(URI.create(filePath), new Configuration(), "hdfs")
	val file = new Path(filePath)
	if (fileSystem.exists(file)) {
	val fSDataInputStream = fileSystem.open(file)
	//val fileStatus = fileSystem.getStatus(file)
	val fileStatus = fileSystem.getFileStatus(file)
	val bufferedByte = new Array[Byte](fileStatus.getLen.toInt)

	fSDataInputStream.readFully(0, bufferedByte)
	fSDataInputStream.close()
	fileSystem.close()
	Some(bufferedByte) //todo 返回的是Byte数组, 这里还有内容可以完善.
	} else {
	throw new Exception("Something wrong!!")
	}
	}
	}

	//println(readFile("hdfs://172.17.2.135:8020/user/test3.txt").get)


	/**
	* 向HDFS文件里面添加一些内容
	* @param filePath 文件路径
	* @param content 待添加内容
	* @return
	*/
	def append(filePath: String, content: String): Boolean = {
	if (StringUtils.isBlank(filePath)) {
	false
	}
	if (StringUtils.isEmpty(content)) {
	true
	}
	val configuration = new Configuration()
	// todo solve the problem when appending at single datanode hadoop env?
	configuration.set("dfs.client.block.write.replace-datanode-on-failure.policy", "NEVER")
	configuration.set("dfs.client.block.write.replace-datanode-on-failure.enable", "true")
	val fileSystem = FileSystem.get(URI.create(filePath), configuration, "hdfs")
	if (fileSystem.exists(new Path(filePath))) {
	try {
	val inputStream = new ByteArrayInputStream(content.getBytes())
	val fsDataOutputStream = fileSystem.append(new Path(filePath))
	IOUtils.copyBytes(inputStream, fsDataOutputStream, 4096, true);

	fsDataOutputStream.close()
	inputStream.close()
	fileSystem.close()
	} catch {
	case e: Exception => e.printStackTrace()
	}
	} else{
	//createFile()
	true
	}
	true
	}
	//append("hdfs://172.17.2.135:8020/user/test3.txt","Dura!")

	// todo 用户环境变量 HADOOP_USER_NAME = hdfs , FileSystem会使用到这个变量,
	}