使用 C# 调用 Hadoop HttpFS
使用 C# 调用 Hadoop HttpFS
HttpFS 是 Hadoop 的 RESTful Web APi,Java可以直接调用 Hadoop 的 API,其它语言则要通过 WebHDFS 调用,貌似 Azure 有对应的 API?我也不懂,就自己写了
开启 HttpFS 步骤
怎么用这玩意儿可以去看看官方文档,但是写的真的太烂了,没案例,没讲解,网上基本也是用 Java 直接调用的 Hadoop API ,我只能用 Postman 慢慢试,只写了几个常用的,完整代码在下面
(都整分布式了,确实应该直接用 Java ,但是写 C# 实在是太爽了)
完整 C# 代码
IConfiguration
读取 appsettings.json
List a Directory
、Iteratively List a Directory
、List a File
,这三个我没有写,因为暂时用不上,而且这几个也挺简单的,如果用上了会补上代码
public class HadoopHelper
{
private static IConfiguration _configuration = new ConfigurationBuilder().AddJsonFile("appsettings.json").Build();
/// <summary>
/// 查询 path 目录是否包含该文件
/// </summary>
/// <param name="fileName">需要查找的文件名,带后缀名</param>
/// <param name="path">文件所在的目录,前后不要 “/”</param>
/// <returns>true 表示文件已存在,false 表示文件不存在</returns>
public async static Task<bool> IsExistenceAsync(string fileName, string path)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(fileName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=GETFILESTATUS");
string requestUrl = stringBuilder.ToString();
try
{
HttpClient httpClient = new HttpClient();
var response = await httpClient.GetAsync(requestUrl);
//解析响应字符串
//string responseBody = await response.Content.ReadAsStringAsync();
//RemoteException 表示文件不存在
//var message = JObject.Parse(responseBody)["RemoteException"];
//if (null == message)
//{
// return true;
//}
if (response.StatusCode == HttpStatusCode.NotFound)
{
return false;
}
return true;
}
catch
{
return true;
}
}
/// <summary>
/// 在指定 path 目录下创建文件夹
/// </summary>
/// <param name="folderName">文件夹名称</param>
/// <param name="path">文件夹所在目录,前后不要 “/”</param>
/// <returns>true 表示创建成功,false 表示创建失败,由于重复创建也是 true,所以返回 false 也不确定会发生什么错误</returns>
public async static Task<bool> CreateFolderAsync(string folderName, string path)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(folderName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=MKDIRS");
string requestUrl = stringBuilder.ToString();
HttpClient httpClient = new HttpClient();
HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, null);
if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
{
return false;
}
string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();
string value = JObject.Parse(responseBody)["boolean"].ToString();
return Boolean.Parse(value);
}
/// <summary>
/// 重命名指定文件或文件夹
/// </summary>
/// <param name="oldFileOrFolderName">旧的文件或文件夹名称,需要后缀名</param>
/// <param name="newFileOrFolderName">新的文件或文件夹名称,需要后缀名</param>
/// <param name="path">指定文件或文件夹所在目录,前后不要 “/”</param>
/// <returns>true 表示重命名成功,false 表示重命名失败,可能是文件或文件夹不存在,或者重命名前后文件同名</returns>
public async static Task<bool> RenameFileOrFolderAsync(string oldFileOrFolderName, string newFileOrFolderName, string path)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(oldFileOrFolderName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=RENAME&destination=/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(newFileOrFolderName);
string requestUrl = stringBuilder.ToString();
HttpClient httpClient = new HttpClient();
HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, null);
if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
{
return false;
}
string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();
string value = JObject.Parse(responseBody)["boolean"].ToString();
return Boolean.Parse(value);
}
/// <summary>
/// 删除指定文件或文件夹
/// </summary>
/// <param name="fileOrFolderName">指定文件或文件夹的名称,需要后缀名</param>
/// <param name="path">指定文件所在目录</param>
/// <returns>true 表示删除成功,false 表示删除失败,可能是文件或文件夹不存在</returns>
public async static Task<bool> DeleteFileOrFolderAsync(string fileOrFolderName, string path)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(fileOrFolderName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=DELETE");
string requestUrl = stringBuilder.ToString();
HttpClient httpClient = new HttpClient();
HttpResponseMessage httpResponseMessage = await httpClient.DeleteAsync(requestUrl);
if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
{
return false;
}
string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();
string value = JObject.Parse(responseBody)["boolean"].ToString();
return Boolean.Parse(value);
}
/// <summary>
/// 读取指定 path 目录下的文件,仅支持小文件
/// </summary>
/// <param name="fileName">指定文件名称,需要后缀名</param>
/// <param name="path">指定文件所在的目录,前后不要 “/”</param>
/// <returns>返回文件 byte[] 数组</returns>
public async static Task<byte[]> OpenAndReadFileAsync(string fileName, string path)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(fileName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=OPEN");
string requestUrl = stringBuilder.ToString();
HttpClient httpClient = new HttpClient();
HttpResponseMessage httpResponseMessage = await httpClient.GetAsync(requestUrl);
return await httpClient.GetByteArrayAsync(requestUrl);
}
/// <summary>
/// 创建并写入一个指定的 Json 文件,以覆盖方式写入
/// </summary>
/// <param name="fileName">指定文件名称,需要后缀名</param>
/// <param name="path">指定文件所在的目录,前后不要 “/”</param>
/// <param name="message">Json 字符串</param>
/// <returns>true 表示创建写入成功,false 表示创建写入失败</returns>
public async static Task<bool> CreateAndWriteJsonFileAsync(string fileName, string path, string message)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(fileName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=CREATE");
string requestUrl = stringBuilder.ToString();
StringContent stringContent = new StringContent(message);
//上传文件一定要改标头
stringContent.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream");
try
{
HttpClient httpClient = new HttpClient();
//上传数据
HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, stringContent);
if (httpResponseMessage.StatusCode == HttpStatusCode.Created)
{
//返回 201 表示创建成功
return true;
}
return false;
}
catch (Exception e)
{
return false;
}
}
/// <summary>
/// 以追加方式将 Json 数据写入 Json 文件
/// </summary>
/// <param name="fileName">指定文件名称,需要后缀名</param>
/// <param name="path">指定文件所在的目录,前后不要 “/”</param>
/// <param name="message">追加的内容</param>
/// <returns>true 表示追加成功,false 表示追加失败</returns>
public async static Task<bool> AppendWriteJsonFileAsync(string fileName, string path, string message)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(fileName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=APPEND");
string requestUrl = stringBuilder.ToString();
StringContent stringContent = new StringContent(message);
stringContent.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream");
try
{
HttpClient httpClient = new HttpClient();
//上传数据
HttpResponseMessage httpResponseMessage = await httpClient.PostAsync(requestUrl, stringContent);
if (httpResponseMessage.StatusCode == HttpStatusCode.Created)
{
//返回 200 表示追加成功
return true;
}
return false;
}
catch (Exception e)
{
return false;
}
}
/// <summary>
/// 创建并写入一个指定的 PNG 文件,即上传 PNG 文件,以覆盖方式写入,仅支持小文件
/// </summary>
/// <param name="fileName">指定文件名称,需要后缀名</param>
/// <param name="path">指定文件所在的目录,前后不要 “/”</param>
/// <param name="bytes">图片数据</param>
/// <returns>true 表示创建写入成功,false 表示创建写入失败</returns>
public async static Task<bool> CreateAndWritePngFileAsync(string fileName, string path, byte[] bytes)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(fileName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=CREATE");
string requestUrl = stringBuilder.ToString();
ByteArrayContent byteArrayContent = new ByteArrayContent(bytes);
//上传文件一定要改标头
byteArrayContent.Headers.ContentType = new MediaTypeHeaderValue("application/octet-stream");
try
{
HttpClient httpClient = new HttpClient();
//上传数据
HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, byteArrayContent);
if (httpResponseMessage.StatusCode == HttpStatusCode.Created)
{
//返回 201 表示创建成功
return true;
}
return false;
}
catch (Exception e)
{
return false;
}
}
}
WebHDFS REST API
Status of a File/Directory
查看文件/文件夹的状态,GET
请求,返回 404
就是文件/文件夹不存在,文件/文件夹存在则返回具体信息的 Json 字符串
http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=GETFILESTATUS
public async static Task<bool> IsExistenceAsync(string fileName, string path)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(fileName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=GETFILESTATUS");
string requestUrl = stringBuilder.ToString();
try
{
HttpClient httpClient = new HttpClient();
var response = await httpClient.GetAsync(requestUrl);
//解析响应字符串
//string responseBody = await response.Content.ReadAsStringAsync();
//RemoteException 表示文件不存在
//var message = JObject.Parse(responseBody)["RemoteException"];
//if (null == message)
//{
// return true;
//}
if (response.StatusCode == HttpStatusCode.NotFound)
{
return false;
}
return true;
}
catch
{
return true;
}
}
Make a Directory
创建文件夹,PUT
请求,这个返回值是一个 bool
的 Json 字符串
http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=MKDIRS
[&permission=<OCTAL>]
public async static Task<bool> CreateFolderAsync(string folderName, string path)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(folderName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=MKDIRS");
string requestUrl = stringBuilder.ToString();
HttpClient httpClient = new HttpClient();
HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, null);
if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
{
return false;
}
string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();
string value = JObject.Parse(responseBody)["boolean"].ToString();
return Boolean.Parse(value);
}
Rename a File/Directory
重命名文件/文件夹,PUT
请求,这个返回值是一个 bool
的 Json 字符串
<HOST>:<PORT>/webhdfs/v1/<PATH>?op=RENAME&destination=<PATH>
public async static Task<bool> RenameFileOrFolderAsync(string oldFileOrFolderName, string newFileOrFolderName, string path)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(oldFileOrFolderName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=RENAME&destination=/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(newFileOrFolderName);
string requestUrl = stringBuilder.ToString();
HttpClient httpClient = new HttpClient();
HttpResponseMessage httpResponseMessage = await httpClient.PutAsync(requestUrl, null);
if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
{
return false;
}
string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();
string value = JObject.Parse(responseBody)["boolean"].ToString();
return Boolean.Parse(value);
}
Delete a File/Directory
删除文件/文件夹,DELETE
请求,这个返回值是一个 bool
的 Json 字符串
http://<host>:<port>/webhdfs/v1/<path>?op=DELETE
[&recursive=<true |false>]
public async static Task<bool> DeleteFileOrFolderAsync(string fileOrFolderName, string path)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(fileOrFolderName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=DELETE");
string requestUrl = stringBuilder.ToString();
HttpClient httpClient = new HttpClient();
HttpResponseMessage httpResponseMessage = await httpClient.DeleteAsync(requestUrl);
if (httpResponseMessage.StatusCode != HttpStatusCode.OK)
{
return false;
}
string responseBody = await httpResponseMessage.Content.ReadAsStringAsync();
string value = JObject.Parse(responseBody)["boolean"].ToString();
return Boolean.Parse(value);
}
Open and Read a File
打开并且读取文件,GET
请求,返回值可能是一些文件的信息,但是我不在乎
http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=OPEN
[&offset=<LONG>][&length=<LONG>][&buffersize=<INT>][&noredirect=<true|false>]
public async static Task<byte[]> OpenAndReadFileAsync(string fileName, string path)
{
StringBuilder stringBuilder = new StringBuilder();
string hostName = _configuration["Hadoop:HostName"];
string httpfsPort = _configuration["Hadoop:HttpfsPort"];
string username = _configuration["Hadoop:Username"];
stringBuilder.Append("http://");
stringBuilder.Append(hostName);
stringBuilder.Append(":");
stringBuilder.Append(httpfsPort);
stringBuilder.Append("/webhdfs/v1/");
stringBuilder.Append(path);
stringBuilder.Append("/");
stringBuilder.Append(fileName);
stringBuilder.Append("?user.name=");
stringBuilder.Append(username);
stringBuilder.Append("&op=OPEN");
string requestUrl = stringBuilder.ToString();
HttpClient httpClient = new HttpClient();
HttpResponseMessage httpResponseMessage = await httpClient.GetAsync(requestUrl);
return await httpClient.GetByteArrayAsync(requestUrl);
}
Create and Write to a File 和 Append to a File
创建并写入文件,PUT
请求
http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=CREATE
[&overwrite=<true |false>][&blocksize=<LONG>][&replication=<SHORT>]
[&permission=<OCTAL>][&buffersize=<INT>][&noredirect=<true|false>]
追加写入文件,POST
请求
http://<HOST>:<PORT>/webhdfs/v1/<PATH>?op=APPEND
[&buffersize=<INT>][&noredirect=<true|false>]
这两个的代码就比较多样了,就稍微讲讲,它们有一个noredirect
字段,就是自动重定向的,如果为true
就需要发送两次请求,重定向之后的链接多了一个字段data=true
,就是发送数据用的;noredirect
为false
则只要一次就可以了,因为会自动重定向