使用 C# 调用 Hadoop HttpFS

使用 C# 调用 Hadoop HttpFS

HttpFS 是 Hadoop 的 RESTful Web APi,Java可以直接调用 Hadoop 的 API,其它语言则要通过 WebHDFS 调用,貌似 Azure 有对应的 API?我也不懂,就自己写了

开启 HttpFS 步骤

参考链接:https://juejin.cn/post/7015007338830495780

怎么用这玩意儿可以去看看官方文档,但是写的真的太烂了,没案例,没讲解,网上基本也是用 Java 直接调用的 Hadoop API ,我只能用 Postman 慢慢试,只写了几个常用的,完整代码在下面
(都整分布式了,确实应该直接用 Java ,但是写 C# 实在是太爽了)

完整 C# 代码

IConfiguration 读取 appsettings.json
List a DirectoryIteratively List a DirectoryList a File,这三个我没有写,因为暂时用不上,而且这几个也挺简单的,如果用上了会补上代码

public class HadoopHelper
{
    private static IConfiguration _configuration = new ConfigurationBuilder().AddJsonFile("appsettings.json").Build();

    /// <summary>
    /// 查询 path 目录是否包含该文件
    /// </summary>
    /// <param name="fileName">需要查找的文件名,带后缀名</param>
    /// <param name="path">文件所在的目录,前后不要 “/”</param>
    /// <returns>true 表示文件已存在,false 表示文件不存在</returns>
    public async static Task<bool> IsExistenceAsync(string fileName, string path)
    {
        StringBuilder stringBuilder = new StringBuilder();

        string hostName = _configuration["Hadoop:HostName"];
        string httpfsPort = _configuration["Hadoop:HttpfsPort"];
        string username = _configuration["Hadoop:Username"];

        stringBuilder.Append("http://");
        stringBuilder.Append(hostName);
        stringBuilder.Append(":");
        stringBuilder.Append(httpfsPort);
        stringBuilder.Append("/webhdfs/v1/");
        stringBuilder.Append(path);
        stringBuilder.Append("/");
        stringBuilder.Append(fileName);
        stringBuilder.Append("?user.name=");
        stringBuilder.Append(username);
        stringBuilder.Append("&op=GETFILESTATUS");

        string requestUrl = stringBuilder.ToString();

        try
        {
            HttpClient httpClient = new HttpClient();
            var response = await httpClient.GetAsync(requestUrl);

            //解析响应字符串
            //string responseBody = await response.Content.ReadAsStringAsync();

            //RemoteException 表示文件不存在
            //var message = JObject.Parse(responseBody)["RemoteException"];

            //if (null == message)
            //{
            //    return true;
            //}

            if (response.StatusCode == HttpStatusCode.NotFound)
            {
                return false;
            }

            return true;
        }
        catch
        {
            return true;
        }
    }

    /// <summary>
    /// 在指定 path 目录下创建文件夹
    /// </summary>
    /// <param name="folderName">文件夹名称</param>
    /// <param name="path">文件夹所在目录,前后不要 “/”</param>
    /// <returns>true 表示创建成功,false 表示创建失败,由于重复创建也是 true,所以返回 false 也不确定会发生什么错误</returns>
    public async static Task<bool> CreateFolderAsync(string folderName, string path)
    {
        StringBuilder stringBuilder = new StringBuilder();

        string hostName = _configuration["Hadoop:HostName"];
        string httpfsPort = _configuration["Hadoop:HttpfsPort"];
        string username = _configuration["Hadoop:Username"];

        stringBuilder.Append("http://");
        stringBuilder.Append(hostName);
        stringBuilder.Append(":");
        stringBuilder.Append(httpfsPort);
        stringBuilder.Append("/webhdfs/v1/");
        stringBuilder.Append(path);
        stringBuilder.Append("/");
        stringBuilder.Append(folderName);
        stringBuilder.Append("?user.name=");
        stringBuilder.Append(username);
        stringBuilder.Append("&op=MKDIRS");

        string requestUrl = stringBuilder.ToString();

        HttpClient httpClient = new HttpClient();
        HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, null);
        if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
        {
            return false;
        }

        string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();

        string value = JObject.Parse(responseBody)["boolean"].ToString();
        return Boolean.Parse(value);
    }

    /// <summary>
    /// 重命名指定文件或文件夹
    /// </summary>
    /// <param name="oldFileOrFolderName">旧的文件或文件夹名称,需要后缀名</param>
    /// <param name="newFileOrFolderName">新的文件或文件夹名称,需要后缀名</param>
    /// <param name="path">指定文件或文件夹所在目录,前后不要 “/”</param>
    /// <returns>true 表示重命名成功,false 表示重命名失败,可能是文件或文件夹不存在,或者重命名前后文件同名</returns>
    public async static Task<bool> RenameFileOrFolderAsync(string oldFileOrFolderName, string newFileOrFolderName, string path)
    {
        StringBuilder stringBuilder = new StringBuilder();

        string hostName = _configuration["Hadoop:HostName"];
        string httpfsPort = _configuration["Hadoop:HttpfsPort"];
        string username = _configuration["Hadoop:Username"];

        stringBuilder.Append("http://");
        stringBuilder.Append(hostName);
        stringBuilder.Append(":");
        stringBuilder.Append(httpfsPort);
        stringBuilder.Append("/webhdfs/v1/");
        stringBuilder.Append(path);
        stringBuilder.Append("/");
        stringBuilder.Append(oldFileOrFolderName);
        stringBuilder.Append("?user.name=");
        stringBuilder.Append(username);
        stringBuilder.Append("&op=RENAME&destination=/");
        stringBuilder.Append(path);
        stringBuilder.Append("/");
        stringBuilder.Append(newFileOrFolderName);

        string requestUrl = stringBuilder.ToString();

        HttpClient httpClient = new HttpClient();
        HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, null);
        if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
        {
            return false;
        }

        string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();

        string value = JObject.Parse(responseBody)["boolean"].ToString();
        return Boolean.Parse(value);
    }

    /// <summary>
    /// 删除指定文件或文件夹
    /// </summary>
    /// <param name="fileOrFolderName">指定文件或文件夹的名称,需要后缀名</param>
    /// <param name="path">指定文件所在目录</param>
    /// <returns>true 表示删除成功,false 表示删除失败,可能是文件或文件夹不存在</returns>
    public async static Task<bool> DeleteFileOrFolderAsync(string fileOrFolderName, string path)
    {
        StringBuilder stringBuilder = new StringBuilder();

        string hostName = _configuration["Hadoop:HostName"];
        string httpfsPort = _configuration["Hadoop:HttpfsPort"];
        string username = _configuration["Hadoop:Username"];

        stringBuilder.Append("http://");
        stringBuilder.Append(hostName);
        stringBuilder.Append(":");
        stringBuilder.Append(httpfsPort);
        stringBuilder.Append("/webhdfs/v1/");
        stringBuilder.Append(path);
        stringBuilder.Append("/");
        stringBuilder.Append(fileOrFolderName);
        stringBuilder.Append("?user.name=");
        stringBuilder.Append(username);
        stringBuilder.Append("&op=DELETE");

        string requestUrl = stringBuilder.ToString();

        HttpClient httpClient = new HttpClient();
        HttpResponseMessage httpResponseMessage = await httpClient.DeleteAsync(requestUrl);
        if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
        {
            return false;
        }

        string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();

        string value = JObject.Parse(responseBody)["boolean"].ToString();
        return Boolean.Parse(value);
    }

    /// <summary>
    /// 读取指定 path 目录下的文件,仅支持小文件
    /// </summary>
    /// <param name="fileName">指定文件名称,需要后缀名</param>
    /// <param name="path">指定文件所在的目录,前后不要 “/”</param>
    /// <returns>返回文件 byte[] 数组</returns>
    public async static Task<byte[]> OpenAndReadFileAsync(string fileName, string path)
    {
        StringBuilder stringBuilder = new StringBuilder();

        string hostName = _configuration["Hadoop:HostName"];
        string httpfsPort = _configuration["Hadoop:HttpfsPort"];
        string username = _configuration["Hadoop:Username"];

        stringBuilder.Append("http://");
        stringBuilder.Append(hostName);
        stringBuilder.Append(":");
        stringBuilder.Append(httpfsPort);
        stringBuilder.Append("/webhdfs/v1/");
        stringBuilder.Append(path);
        stringBuilder.Append("/");
        stringBuilder.Append(fileName);
        stringBuilder.Append("?user.name=");
        stringBuilder.Append(username);
        stringBuilder.Append("&op=OPEN");

        string requestUrl = stringBuilder.ToString();
        HttpClient httpClient = new HttpClient();
        HttpResponseMessage httpResponseMessage = await httpClient.GetAsync(requestUrl);

        return await httpClient.GetByteArrayAsync(requestUrl);
    }

    /// <summary>
    /// 创建并写入一个指定的 Json 文件,以覆盖方式写入
    /// </summary>
    /// <param name="fileName">指定文件名称,需要后缀名</param>
    /// <param name="path">指定文件所在的目录,前后不要 “/”</param>
    /// <param name="message">Json 字符串</param>
    /// <returns>true 表示创建写入成功,false 表示创建写入失败</returns>
    public async static Task<bool> CreateAndWriteJsonFileAsync(string fileName, string path, string message)
    {
        StringBuilder stringBuilder = new StringBuilder();

        string hostName = _configuration["Hadoop:HostName"];
        string httpfsPort = _configuration["Hadoop:HttpfsPort"];
        string username = _configuration["Hadoop:Username"];

        stringBuilder.Append("http://");
        stringBuilder.Append(hostName);
        stringBuilder.Append(":");
        stringBuilder.Append(httpfsPort);
        stringBuilder.Append("/webhdfs/v1/");
        stringBuilder.Append(path);
        stringBuilder.Append("/");
        stringBuilder.Append(fileName);
        stringBuilder.Append("?user.name=");
        stringBuilder.Append(username);
        stringBuilder.Append("&op=CREATE");

        string requestUrl = stringBuilder.ToString();

        StringContent stringContent = new StringContent(message);
        //上传文件一定要改标头
        stringContent.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream");

        try
        {
            HttpClient httpClient = new HttpClient();
            //上传数据
            HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, stringContent);

            if (httpResponseMessage.StatusCode == HttpStatusCode.Created)
            {
                //返回 201 表示创建成功
                return true;
            }

            return false;
        }
        catch (Exception e)
        {
            return false;
        }
    }

    /// <summary>
    /// 以追加方式将 Json 数据写入 Json 文件
    /// </summary>
    /// <param name="fileName">指定文件名称,需要后缀名</param>
    /// <param name="path">指定文件所在的目录,前后不要 “/”</param>
    /// <param name="message">追加的内容</param>
    /// <returns>true 表示追加成功,false 表示追加失败</returns>
    public async static Task<bool> AppendWriteJsonFileAsync(string fileName, string path, string message)
    {
        StringBuilder stringBuilder = new StringBuilder();

        string hostName = _configuration["Hadoop:HostName"];
        string httpfsPort = _configuration["Hadoop:HttpfsPort"];
        string username = _configuration["Hadoop:Username"];

        stringBuilder.Append("http://");
        stringBuilder.Append(hostName);
        stringBuilder.Append(":");
        stringBuilder.Append(httpfsPort);
        stringBuilder.Append("/webhdfs/v1/");
        stringBuilder.Append(path);
        stringBuilder.Append("/");
        stringBuilder.Append(fileName);
        stringBuilder.Append("?user.name=");
        stringBuilder.Append(username);
        stringBuilder.Append("&op=APPEND");

        string requestUrl = stringBuilder.ToString();

        StringContent stringContent = new StringContent(message);
        stringContent.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream");

        try
        {
            HttpClient httpClient = new HttpClient();
            //上传数据
            HttpResponseMessage httpResponseMessage = await httpClient.PostAsync(requestUrl, stringContent);

            if (httpResponseMessage.StatusCode == HttpStatusCode.Created)
            {
                //返回 200 表示追加成功
                return true;
            }

            return false;
        }
        catch (Exception e)
        {
            return false;
        }
    }

    /// <summary>
    /// 创建并写入一个指定的 PNG 文件,即上传 PNG 文件,以覆盖方式写入,仅支持小文件
    /// </summary>
    /// <param name="fileName">指定文件名称,需要后缀名</param>
    /// <param name="path">指定文件所在的目录,前后不要 “/”</param>
    /// <param name="bytes">图片数据</param>
    /// <returns>true 表示创建写入成功,false 表示创建写入失败</returns>
    public async static Task<bool> CreateAndWritePngFileAsync(string fileName, string path, byte[] bytes)
    {
        StringBuilder stringBuilder = new StringBuilder();

        string hostName = _configuration["Hadoop:HostName"];
        string httpfsPort = _configuration["Hadoop:HttpfsPort"];
        string username = _configuration["Hadoop:Username"];

        stringBuilder.Append("http://");
        stringBuilder.Append(hostName);
        stringBuilder.Append(":");
        stringBuilder.Append(httpfsPort);
        stringBuilder.Append("/webhdfs/v1/");
        stringBuilder.Append(path);
        stringBuilder.Append("/");
        stringBuilder.Append(fileName);
        stringBuilder.Append("?user.name=");
        stringBuilder.Append(username);
        stringBuilder.Append("&op=CREATE");

        string requestUrl = stringBuilder.ToString();

        ByteArrayContent byteArrayContent = new ByteArrayContent(bytes);
        //上传文件一定要改标头
        byteArrayContent.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream");

        try
        {
            HttpClient httpClient = new HttpClient();
            //上传数据
            HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, byteArrayContent);

            if (httpResponseMessage.StatusCode == HttpStatusCode.Created)
            {
                //返回 201 表示创建成功
                return true;
            }

            return false;
        }
        catch (Exception e)
        {
            return false;
        }
    }
}

WebHDFS REST API

Status of a File/Directory

查看文件/文件夹的状态,GET请求,返回 404就是文件/文件夹不存在,文件/文件夹存在则返回具体信息的 Json 字符串

http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETFILESTATUS
public async static Task<bool> IsExistenceAsync(string fileName, string path)
{
    StringBuilder stringBuilder = new StringBuilder();

    string hostName = _configuration["Hadoop:HostName"];
    string httpfsPort = _configuration["Hadoop:HttpfsPort"];
    string username = _configuration["Hadoop:Username"];

    stringBuilder.Append("http://");
    stringBuilder.Append(hostName);
    stringBuilder.Append(":");
    stringBuilder.Append(httpfsPort);
    stringBuilder.Append("/webhdfs/v1/");
    stringBuilder.Append(path);
    stringBuilder.Append("/");
    stringBuilder.Append(fileName);
    stringBuilder.Append("?user.name=");
    stringBuilder.Append(username);
    stringBuilder.Append("&op=GETFILESTATUS");

    string requestUrl = stringBuilder.ToString();

    try
    {
        HttpClient httpClient = new HttpClient();
        var response = await httpClient.GetAsync(requestUrl);

        //解析响应字符串
        //string responseBody = await response.Content.ReadAsStringAsync();

        //RemoteException 表示文件不存在
        //var message = JObject.Parse(responseBody)["RemoteException"];

        //if (null == message)
        //{
        //    return true;
        //}

        if (response.StatusCode == HttpStatusCode.NotFound)
        {
            return false;
        }

        return true;
    }
    catch
    {
        return true;
    }
}

Make a Directory

创建文件夹,PUT请求,这个返回值是一个 bool 的 Json 字符串

http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=MKDIRS
                    [&permission=<OCTAL>]
public async static Task<bool> CreateFolderAsync(string folderName, string path)
{
    StringBuilder stringBuilder = new StringBuilder();

    string hostName = _configuration["Hadoop:HostName"];
    string httpfsPort = _configuration["Hadoop:HttpfsPort"];
    string username = _configuration["Hadoop:Username"];

    stringBuilder.Append("http://");
    stringBuilder.Append(hostName);
    stringBuilder.Append(":");
    stringBuilder.Append(httpfsPort);
    stringBuilder.Append("/webhdfs/v1/");
    stringBuilder.Append(path);
    stringBuilder.Append("/");
    stringBuilder.Append(folderName);
    stringBuilder.Append("?user.name=");
    stringBuilder.Append(username);
    stringBuilder.Append("&op=MKDIRS");

    string requestUrl = stringBuilder.ToString();

    HttpClient httpClient = new HttpClient();
    HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, null);
    if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
    {
        return false;
    }

    string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();

    string value = JObject.Parse(responseBody)["boolean"].ToString();
    return Boolean.Parse(value);
}

Rename a File/Directory

重命名文件/文件夹,PUT请求,这个返回值是一个 bool 的 Json 字符串

<HOST>:<PORT>/webhdfs/v1/<PATH>?op=RENAME&destination=<PATH>
public async static Task<bool> RenameFileOrFolderAsync(string oldFileOrFolderName, string newFileOrFolderName, string path)
{
    StringBuilder stringBuilder = new StringBuilder();

    string hostName = _configuration["Hadoop:HostName"];
    string httpfsPort = _configuration["Hadoop:HttpfsPort"];
    string username = _configuration["Hadoop:Username"];

    stringBuilder.Append("http://");
    stringBuilder.Append(hostName);
    stringBuilder.Append(":");
    stringBuilder.Append(httpfsPort);
    stringBuilder.Append("/webhdfs/v1/");
    stringBuilder.Append(path);
    stringBuilder.Append("/");
    stringBuilder.Append(oldFileOrFolderName);
    stringBuilder.Append("?user.name=");
    stringBuilder.Append(username);
    stringBuilder.Append("&op=RENAME&destination=/");
    stringBuilder.Append(path);
    stringBuilder.Append("/");
    stringBuilder.Append(newFileOrFolderName);

    string requestUrl = stringBuilder.ToString();

    HttpClient httpClient = new HttpClient();
    HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, null);
    if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
    {
        return false;
    }

    string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();

    string value = JObject.Parse(responseBody)["boolean"].ToString();
    return Boolean.Parse(value);
}

Delete a File/Directory

删除文件/文件夹,DELETE请求,这个返回值是一个 bool 的 Json 字符串

http://<host>:<port>/webhdfs/v1/<path>?op=DELETE
                    [&recursive=<true |false>]
public async static Task<bool> DeleteFileOrFolderAsync(string fileOrFolderName, string path)
{
    StringBuilder stringBuilder = new StringBuilder();

    string hostName = _configuration["Hadoop:HostName"];
    string httpfsPort = _configuration["Hadoop:HttpfsPort"];
    string username = _configuration["Hadoop:Username"];

    stringBuilder.Append("http://");
    stringBuilder.Append(hostName);
    stringBuilder.Append(":");
    stringBuilder.Append(httpfsPort);
    stringBuilder.Append("/webhdfs/v1/");
    stringBuilder.Append(path);
    stringBuilder.Append("/");
    stringBuilder.Append(fileOrFolderName);
    stringBuilder.Append("?user.name=");
    stringBuilder.Append(username);
    stringBuilder.Append("&op=DELETE");

    string requestUrl = stringBuilder.ToString();

    HttpClient httpClient = new HttpClient();
    HttpResponseMessage httpResponseMessage = await httpClient.DeleteAsync(requestUrl);
    if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
    {
        return false;
    }

    string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();

    string value = JObject.Parse(responseBody)["boolean"].ToString();
    return Boolean.Parse(value);
}

Open and Read a File

打开并且读取文件,GET请求,返回值可能是一些文件的信息,但是我不在乎

http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=OPEN
                    [&offset=<LONG>][&length=<LONG>][&buffersize=<INT>][&noredirect=<true|false>]
public async static Task<byte[]> OpenAndReadFileAsync(string fileName, string path)
{
    StringBuilder stringBuilder = new StringBuilder();

    string hostName = _configuration["Hadoop:HostName"];
    string httpfsPort = _configuration["Hadoop:HttpfsPort"];
    string username = _configuration["Hadoop:Username"];

    stringBuilder.Append("http://");
    stringBuilder.Append(hostName);
    stringBuilder.Append(":");
    stringBuilder.Append(httpfsPort);
    stringBuilder.Append("/webhdfs/v1/");
    stringBuilder.Append(path);
    stringBuilder.Append("/");
    stringBuilder.Append(fileName);
    stringBuilder.Append("?user.name=");
    stringBuilder.Append(username);
    stringBuilder.Append("&op=OPEN");

    string requestUrl = stringBuilder.ToString();
    HttpClient httpClient = new HttpClient();
    HttpResponseMessage httpResponseMessage = await httpClient.GetAsync(requestUrl);

    return await httpClient.GetByteArrayAsync(requestUrl);
}

Create and Write to a File 和 Append to a File

创建并写入文件,PUT请求

http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=CREATE
                  [&overwrite=<true |false>][&blocksize=<LONG>][&replication=<SHORT>]
                  [&permission=<OCTAL>][&buffersize=<INT>][&noredirect=<true|false>]

追加写入文件,POST请求

http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=APPEND
                    [&buffersize=<INT>][&noredirect=<true|false>]

这两个的代码就比较多样了,就稍微讲讲,它们有一个noredirect字段,就是自动重定向的,如果为true就需要发送两次请求,重定向之后的链接多了一个字段data=true,就是发送数据用的;noredirectfalse则只要一次就可以了,因为会自动重定向

使用 C# 调用 Hadoop HttpFS

posted @ 2021-11-02 19:44  .NET好耶  阅读(772)  评论(0编辑  收藏  举报