Loading

Ubuntu配置HDFS实验(英文版)【大数据处理技术】

1.Experimental environment

Version
OS Ubuntu 20.04.4 LTS
JDK 1.8.0_144
Hadoop 2.7.2

image.png

Experiment Steps

2. Use HDFS commands for the following tasks:

2.1 Given a directory in HDFS, output the following information of all files in the directory: read and write permissions, file size and creation time

Commands:

./bin/hdfs dfs -ls ./input/

image.png

2.2 Display the content of a file in HDFS

Command

./bin/hdfs dfs -cat input/core-site.xml

image.png

2.3 Copy a text file from local file system to HDFS; if the file already exists in HDFS, append the content to the end of the existing file

Create file hello.txt in the directory /home/hadoop, and enter the following into the file
image.png
Upload the hello.txt file to HDFS

./bin/hdfs dfs -mkdir ./lab2  
./bin/hdfs dfs -put /home/hadoop/hello.txt lab2 
./bin/hdfs dfs -appendToFile ~/hello.txt ./lab2/hello.txt

image.png

2.4 Copy a file from HDFS to local file system

Command:

./bin/hdfs dfs -copyToLocal  lab2/hello.txt ~/下载/

image.png

2.5 Delete a file in HDFS

Command:

./bin/hdfs dfs -rm lab2/hello.txt

image.png

3. Write a Java program using Hadoop Java API for the following tasks:

3.1 Install Java IDE IntelliJ IDEA

command

sudo snap install intellij-idea-community --classic

3.2 Configure project environment

(1) Create project
image.png
(2) Add jar packages needed for the project
The following jar packages need to be added to the Java project:
(1)All jar packages in the directory "/usr/local/hadoop/share/hadoop/common",exclude directory jdiff, lib, sources and webapps
(2)All jar packages in the directory "/usr/local/hadoop/share/hadoop/common/lib"
(3)All jar packages in the directory "/usr/local/hadoop/share/hadoop/hdfs", exclude directory jdiff、lib、sources和webapps;
(4)All jar packages in the directory "/usr/local/hadoop/share/hadoop/hdfs/lib".
image.png
image.png
If you still get "Error: java: package org.apache.xxxxxxx does not exist" when running the project after adding the required jar packages, see Problems and Solutions section.

3.3 Copy a text file from local file system to HDFS; if the file already exists in HDFS, let the user specify whether to append the content to the end of the existing file, or overwrite the original file

3.3.1 Create class UploadMyFile

image.png
image.png
image.png

3.3.2 Enter the following code into the class

import java.io.*;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;


/**
 *  Copy a text file from local file system to HDFS; if the file already exists in HDFS,
 *  let the user specify whether to append the content to the end of the existing file,
 *  or overwrite the original file.
 */
public class UploadMyFile {

    Configuration conf = null;

    public UploadMyFile() {
        this.conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://localhost:9000");
        conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
        conf.set("dfs.client.block.write.replace-datanode-on-failure.policy","NEVER");
        conf.set("dfs.client.block.write.replace-datanode-onfailure.enable","true");
    }
    public void upload(String localPath, String hdfsPath) throws IOException {
        Path inPath = new Path(localPath);
        Path outPath = new Path(hdfsPath);

        int uploadMode = 0;  // 0为上传, 1为覆盖, 2为追加

        FileSystem fs = FileSystem.get(conf);
        boolean fileExist = fs.exists(outPath);

        if(fileExist){
            Scanner sc = new Scanner(System.in);
            System.out.println("待上传文件在HDFS中存在,输入y选择覆盖文件,输入n选择追加");
            String choice = sc.next();
            if(choice.equals("y") || choice.equals("Y")){
                uploadMode = 1;
            }else{
                uploadMode = 2;
            }
        }

        FileInputStream inputStream = new FileInputStream(localPath);
        FSDataOutputStream outputStream;

        if(uploadMode==0 || uploadMode==1){
            // 上传 或 覆盖
            outputStream = fs.create(outPath);
        }else{
            // 追加
            outputStream = fs.append(outPath);
        }

        byte[] data = new byte[1024];
        int read = -1;
        while ((read = inputStream.read(data)) > 0) {
            outputStream.write(data, 0, read);
        }
        inputStream.close();
        outputStream.close();
        if(uploadMode == 0){
            System.out.println("上传文件成功");
        } else if (uploadMode == 1) {
            System.out.println("覆盖文件成功");
        } else {
            System.out.println("追加文件成功");
        }

    }
    public static void main(String[] args) throws IOException {
        UploadMyFile model = new UploadMyFile();
        Scanner sc = new Scanner(System.in);
        System.out.println("输入需要上传文件的路径: ");
        String localPath = sc.next();
        System.out.println("输入文件的保存路径: ");
        String hdfsPath = sc.next();
        model.upload(localPath, hdfsPath);
    }
}

3.3.3 Check the contents of the file to be uploaded

image.png

3.3.4 Run the main method of Class Upload

image.png

3.3.5 Enter the following file path into console

/home/hadoop/hello.txt
hdfs://localhost:9000/user/hadoop/lab2/hello.txt

image.png
image.png

3.3.6 Overwrite Mode

image.png
Enter the following file path into console

/home/hadoop/hello.txt
hdfs://localhost:9000/user/hadoop/lab2/hello.txt
y

image.png
image.png

3.3.7 Append Mode

Enter the following file path into console

/home/hadoop/hello.txt
hdfs://localhost:9000/user/hadoop/lab2/hello.txt
n

image.png
image.png

3.4 Copy a file from HDFS to local file system; if there is a file with the same name in local file system, then rename it (the new file) automatically

3.4.1 Create class DownloadMyFile

image.png

3.3.2 Enter the following code into the class

import java.io.*;
import java.nio.file.Files;
import java.util.Scanner;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.*;

public class DownloadMyFile {

    Configuration conf = null;

    public DownloadMyFile() {
        this.conf = new Configuration();
        conf.set("fs.defaultFS","hdfs://localhost:9000");
        conf.set("fs.hdfs.impl","org.apache.hadoop.hdfs.DistributedFileSystem");
        conf.set("dfs.client.block.write.replace-datanode-on-failure.policy","NEVER");
    }
    public void download(String localPath, String hdfsPath) throws IOException {
        Path inPath = new Path(hdfsPath);
        String newLocalPath = localPath;
        FileSystem fs = FileSystem.get(conf);
        File f = new File(localPath);
        int fileId = 0;

        while (f.exists()){
            fileId += 1;
            newLocalPath = localPath.substring(0, localPath.lastIndexOf(".")) + "_" + fileId + "_" + localPath.substring(localPath.lastIndexOf(".")) ;
            f = new File(newLocalPath);
        }
        Path outPath = new Path(newLocalPath);
        fs.copyToLocalFile(inPath, outPath);

        System.out.println("下载文件成功,文件保存路径为: " + f);
    }
    public static void main(String[] args) throws IOException {
        DownloadMyFile model = new DownloadMyFile();
        Scanner sc = new Scanner(System.in);
        System.out.println("输入需要下载文件的路径: ");
        String hdfsPath = sc.next();
        System.out.println("输入文件的保存路径: ");
        String localPath = sc.next();
        model.download(localPath, hdfsPath);
    }
}

3.4.3 Run the main method of Class Download

image.png

3.3.4 Enter the following file path into console

hdfs://localhost:9000/user/hadoop/lab2/hello.txt
/home/hadoop/hello.txt

image.png
image.png
image.png
image.png

4.Problems and Solutions

4.1Get "Error: java: package org.apache.xxxxxxx does not exist" when running the project

  1. Add the packages in the mapreduce folder and the yarn folder.
  2. Use Maven Project.
posted @ 2023-08-09 08:01  LateSpring  阅读(356)  评论(0编辑  收藏  举报