使用Go语言操作HDFS
HDFS(Hadoop分布式文件系统)是Hadoop生态系统的一部分,它是一个可扩展的分布式文件系统,被设计用于在大规模数据集上运行的应用程序
安装相关package:
$ go get github.com/colinmarc/hdfs/v2
创建目录
命令:
$ hdfs dfs -mkdir <path>
示例代码:
package main
import (
"fmt"
"github.com/colinmarc/hdfs"
)
const address = "master:9000"
func main() {
client, err := hdfs.New(address)
if err != nil {
panic(err)
}
path := "/testdir"
err = client.MkdirAll(path, 0777) // 创建testdir目录
if err != nil {
panic(err)
}
fmt.Printf("Created directory: %s\n", path)
}
上传文件
命令:
$ hdfs dfs -put <localpath> <hdfspath>
示例代码:
package main
import (
"fmt"
"github.com/colinmarc/hdfs"
"io"
"os"
)
const address = "master:9000"
func main() {
client, err := hdfs.New(address)
if err != nil {
panic(err)
}
localPath := "./file.txt"
hdfsPath := "/testdir/file.txt"
// 打开本地文件
localFile, err := os.Open(localPath)
if err != nil {
panic(err)
}
defer localFile.Close()
// 创建HDFS文件
hdfsFile, err := client.Create(hdfsPath)
if err != nil {
panic(err)
}
defer hdfsFile.Close()
// 将本地文件复制到HDFS
_, err = io.Copy(hdfsFile, localFile)
if err != nil {
panic(err)
}
fmt.Printf("Uploaded file: %s\n", hdfsPath)
}
下载文件
$ hdfs dfs -get <hdfspath> <localpath>
示例代码:
package main
import (
"fmt"
"github.com/colinmarc/hdfs"
"io"
"os"
)
const address = "master:9000"
func main() {
client, err := hdfs.New(address)
if err != nil {
panic(err)
}
hdfsPath := "/test.txt"
localPath := "/home/ubuntu/workspace/hadoop/test.txt"
hdfsFile, err := client.Open(hdfsPath)
if err != nil {
panic(err)
}
defer hdfsFile.Close()
localFile, err := os.Create(localPath)
if err != nil {
panic(err)
}
defer localFile.Close()
_, err = io.Copy(localFile, hdfsFile)
if err != nil {
panic(err)
}
fmt.Printf("Downloaded file: %s\n", localPath)
}
查看文件列表
命令:
$ hdfs dfs -ls <path>
示例代码:
package main
import (
"fmt"
"github.com/colinmarc/hdfs"
)
const address = "master:9000"
func main() {
client, err := hdfs.New(address)
if err != nil {
panic(err)
}
hdfsPath := "/testdir"
files, err := client.ReadDir(hdfsPath)
if err != nil {
panic(err)
}
fmt.Printf("Files in %s:\n", hdfsPath)
for _, file := range files {
fmt.Printf("%s (size: %d)\n", file.Name(), file.Size())
}
}
删除文件
命令:
$ hdfs dfs -rm <path>
示例代码:
package main
import (
"fmt"
"github.com/colinmarc/hdfs"
)
const address = "master:9000"
func main() {
client, err := hdfs.New(address)
if err != nil {
panic(err)
}
hdfsPath := "/testdir"
err = client.Remove(hdfsPath)
if err != nil {
panic(err)
}
fmt.Printf("Deleted directory: %s\n", hdfsPath)
}