Ubuntu配置HDFS实验(英文版)【大数据处理技术】
1.Experimental environment
Version | |
---|---|
OS | Ubuntu 20.04.4 LTS |
JDK | 1.8.0_144 |
Hadoop | 2.7.2 |
Experiment Steps
2. Use HDFS commands for the following tasks:
2.1 Given a directory in HDFS, output the following information of all files in the directory: read and write permissions, file size and creation time
Commands:
./bin/hdfs dfs -ls ./input/
2.2 Display the content of a file in HDFS
Command
./bin/hdfs dfs -cat input/core-site.xml
2.3 Copy a text file from local file system to HDFS; if the file already exists in HDFS, append the content to the end of the existing file
Create file hello.txt in the directory /home/hadoop, and enter the following into the file
Upload the hello.txt file to HDFS
./bin/hdfs dfs -mkdir ./lab2
./bin/hdfs dfs -put /home/hadoop/hello.txt lab2
./bin/hdfs dfs -appendToFile ~/hello.txt ./lab2/hello.txt
2.4 Copy a file from HDFS to local file system
Command:
./bin/hdfs dfs -copyToLocal lab2/hello.txt ~/下载/
2.5 Delete a file in HDFS
Command:
./bin/hdfs dfs -rm lab2/hello.txt
3. Write a Java program using Hadoop Java API for the following tasks:
3.1 Install Java IDE IntelliJ IDEA
command
sudo snap install intellij-idea-community --classic
3.2 Configure project environment
(1) Create project
(2) Add jar packages needed for the project
The following jar packages need to be added to the Java project:
(1)All jar packages in the directory "/usr/local/hadoop/share/hadoop/common",exclude directory jdiff, lib, sources and webapps;
(2)All jar packages in the directory "/usr/local/hadoop/share/hadoop/common/lib";
(3)All jar packages in the directory "/usr/local/hadoop/share/hadoop/hdfs", exclude directory jdiff、lib、sources和webapps;
(4)All jar packages in the directory "/usr/local/hadoop/share/hadoop/hdfs/lib".
If you still get "Error: java: package org.apache.xxxxxxx does not exist" when running the project after adding the required jar packages, see Problems and Solutions section.
3.3 Copy a text file from local file system to HDFS; if the file already exists in HDFS, let the user specify whether to append the content to the end of the existing file, or overwrite the original file
3.3.1 Create class UploadMyFile
3.3.2 Enter the following code into the class
import java.io.*;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
/**
* Copy a text file from local file system to HDFS; if the file already exists in HDFS,
* let the user specify whether to append the content to the end of the existing file,
* or overwrite the original file.
*/
public class UploadMyFile {
Configuration conf = null;
public UploadMyFile() {
this.conf = new Configuration();
conf.set("fs.defaultFS","hdfs://localhost:9000");
conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
conf.set("dfs.client.block.write.replace-datanode-on-failure.policy","NEVER");
conf.set("dfs.client.block.write.replace-datanode-onfailure.enable","true");
}
public void upload(String localPath, String hdfsPath) throws IOException {
Path inPath = new Path(localPath);
Path outPath = new Path(hdfsPath);
int uploadMode = 0; // 0为上传, 1为覆盖, 2为追加
FileSystem fs = FileSystem.get(conf);
boolean fileExist = fs.exists(outPath);
if(fileExist){
Scanner sc = new Scanner(System.in);
System.out.println("待上传文件在HDFS中存在,输入y选择覆盖文件,输入n选择追加");
String choice = sc.next();
if(choice.equals("y") || choice.equals("Y")){
uploadMode = 1;
}else{
uploadMode = 2;
}
}
FileInputStream inputStream = new FileInputStream(localPath);
FSDataOutputStream outputStream;
if(uploadMode==0 || uploadMode==1){
// 上传 或 覆盖
outputStream = fs.create(outPath);
}else{
// 追加
outputStream = fs.append(outPath);
}
byte[] data = new byte[1024];
int read = -1;
while ((read = inputStream.read(data)) > 0) {
outputStream.write(data, 0, read);
}
inputStream.close();
outputStream.close();
if(uploadMode == 0){
System.out.println("上传文件成功");
} else if (uploadMode == 1) {
System.out.println("覆盖文件成功");
} else {
System.out.println("追加文件成功");
}
}
public static void main(String[] args) throws IOException {
UploadMyFile model = new UploadMyFile();
Scanner sc = new Scanner(System.in);
System.out.println("输入需要上传文件的路径: ");
String localPath = sc.next();
System.out.println("输入文件的保存路径: ");
String hdfsPath = sc.next();
model.upload(localPath, hdfsPath);
}
}
3.3.3 Check the contents of the file to be uploaded
3.3.4 Run the main method of Class Upload
3.3.5 Enter the following file path into console
/home/hadoop/hello.txt
hdfs://localhost:9000/user/hadoop/lab2/hello.txt
3.3.6 Overwrite Mode
Enter the following file path into console
/home/hadoop/hello.txt
hdfs://localhost:9000/user/hadoop/lab2/hello.txt
y
3.3.7 Append Mode
Enter the following file path into console
/home/hadoop/hello.txt
hdfs://localhost:9000/user/hadoop/lab2/hello.txt
n
3.4 Copy a file from HDFS to local file system; if there is a file with the same name in local file system, then rename it (the new file) automatically
3.4.1 Create class DownloadMyFile
3.3.2 Enter the following code into the class
import java.io.*;
import java.nio.file.Files;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;
public class DownloadMyFile {
Configuration conf = null;
public DownloadMyFile() {
this.conf = new Configuration();
conf.set("fs.defaultFS","hdfs://localhost:9000");
conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
conf.set("dfs.client.block.write.replace-datanode-on-failure.policy","NEVER");
}
public void download(String localPath, String hdfsPath) throws IOException {
Path inPath = new Path(hdfsPath);
String newLocalPath = localPath;
FileSystem fs = FileSystem.get(conf);
File f = new File(localPath);
int fileId = 0;
while (f.exists()){
fileId += 1;
newLocalPath = localPath.substring(0, localPath.lastIndexOf(".")) + "_" + fileId + "_" + localPath.substring(localPath.lastIndexOf(".")) ;
f = new File(newLocalPath);
}
Path outPath = new Path(newLocalPath);
fs.copyToLocalFile(inPath, outPath);
System.out.println("下载文件成功,文件保存路径为: " + f);
}
public static void main(String[] args) throws IOException {
DownloadMyFile model = new DownloadMyFile();
Scanner sc = new Scanner(System.in);
System.out.println("输入需要下载文件的路径: ");
String hdfsPath = sc.next();
System.out.println("输入文件的保存路径: ");
String localPath = sc.next();
model.download(localPath, hdfsPath);
}
}
3.4.3 Run the main method of Class Download
3.3.4 Enter the following file path into console
hdfs://localhost:9000/user/hadoop/lab2/hello.txt
/home/hadoop/hello.txt
4.Problems and Solutions
4.1Get "Error: java: package org.apache.xxxxxxx does not exist" when running the project
- Add the packages in the mapreduce folder and the yarn folder.
- Use Maven Project.