C# Hadoop学习笔记(一)—环境安装
一、安装环境
1,前期准备:官网下载“NuGet Package Manager”,按自己已有的VS环境下载对应版本;
2,利用NuGet下载Hadoop For .NET SDK,地址“http://hadoopsdk.codeplex.com/”
3,安装。
4,通过HDInsight,安装Windows Azure,目前是预览版本。
5,参照网址“http://blogs.msdn.com/b/data_otaku/archive/2013/08/14/hadoop-for-net-developers.aspx” 系统学习API
二、测试DEMO
- using System;
- using System.Collections.Generic;
- using System.Linq;
- using System.Text;
- using System.Threading.Tasks;
- using Microsoft.Hadoop;
- using Microsoft.Hadoop.MapReduce;
- using Microsoft.Hadoop.WebClient.WebHCatClient;
- using System.Diagnostics;
- using System.IO;
- using System.IO.MemoryMappedFiles;
- namespace HadoopConsol
- {
- class Program
- {
- static void Main(string[] args)
- {
- Stopwatch sw = new Stopwatch();
- long hadoopTime=0;
- long normalTime=0;
- sw.Start();
- //start hadoop
- Console.WriteLine(" Hadoop Process Strating ....");
- #region Hadoop time
- #region hadoopconnet
- Console.WriteLine(" Hadoop Connect Strating ....");
- //establish job configuration
- HadoopJobConfiguration myConfig = new HadoopJobConfiguration();
- myConfig.InputPath = "/demo/simple/in";
- myConfig.OutputFolder = "/demo/simple/out";
- //connect to cluster
- Uri myUri = new Uri("http://localhost");
- string userName = "hadoop";
- string passWord = null;
- IHadoop myCluster = Hadoop.Connect(myUri, userName, passWord);
- hadoopTime += sw.ElapsedMilliseconds;
- Console.WriteLine(" Hadoop Connect End.");
- Console.WriteLine(" Hadoop Connect time:" + sw.ElapsedMilliseconds);
- #endregion
- #region hadoopmapreduce
- sw.Reset();
- sw.Start();
- Console.WriteLine(" Hadoop MapReduce Strating ....");
- //execute mapreduce job
- MapReduceResult jobResult =
- myCluster.MapReduceJob.Execute<MySimpleMapper, MySimpleReducer>(myConfig);
- hadoopTime += sw.ElapsedMilliseconds;
- Console.WriteLine(" Hadoop MapReduce End.");
- Console.WriteLine(" Hadoop MapReduce Time:"+sw.ElapsedMilliseconds);
- #endregion
- #region Hadoop End
- sw.Reset();
- sw.Start();
- Console.WriteLine(" Hadoop Endprocess Strating ....");
- //write job result to console
- int exitCode = jobResult.Info.ExitCode;
- string exitStatus = "Failure";
- if (exitCode == 0) exitStatus = "Success";
- exitStatus = exitCode + " (" + exitStatus + ")";
- Console.WriteLine();
- Console.Write("Exit Code = " + exitStatus);
- Console.WriteLine(" Hadoop Endprocess End.");
- hadoopTime += sw.ElapsedMilliseconds;
- Console.WriteLine(" Hadoop Exit Time:" + sw.ElapsedMilliseconds);
- Console.WriteLine(" Hadoop Process All Time:" + hadoopTime);
- #endregion
- #endregion
- #region Normal time
- //start Normal
- Console.WriteLine(" Normal Process Strating ....");
- sw.Reset();
- sw.Start();
- //normal process
- #region Normal Process
- int myevenCount = 0;
- int myeventSum = 0;
- int myoddCount = 0;
- int myoddSum = 0;
- StreamReader fs = new StreamReader(@"c:\TEMP\integers.txt");
- while (fs.Peek() >= 0)
- {
- string strTemp = fs.ReadLine();
- if (Int32.Parse(strTemp) % 2 == 0)
- {
- myevenCount++;
- myeventSum += Int32.Parse(strTemp);
- }
- else
- {
- myoddCount++;
- myoddSum += Int32.Parse(strTemp);
- }
- }
- //MemoryMappedFile m = MemoryMappedFile.
- Console.WriteLine("even:" + "\t" + myevenCount + "\t" + myeventSum);
- Console.WriteLine("odd:" + "\t" + myoddCount + "\t" + myoddSum);
- #endregion
- Console.WriteLine(" Normal Process End.");
- normalTime += sw.ElapsedMilliseconds;
- Console.WriteLine(" Normal Exit Time:" + sw.ElapsedMilliseconds);
- Console.WriteLine(" Normal Process All Time:" + normalTime);
- #endregion
- sw.Stop();
- Console.Read();
- }
- }
- public class MySimpleMapper : MapperBase
- {
- public override void Map(string inputLine, MapperContext context)
- {
- //interpret the incoming line as an integer value
- int value = int.Parse(inputLine);
- //determine whether value is even or odd
- string key = (value % 2 == 0) ? "even" : "odd";
- //output key assignment with value
- context.EmitKeyValue(key, value.ToString());
- }
- }
- public class MySimpleReducer : ReducerCombinerBase
- {
- public override void Reduce(
- string key, IEnumerable<string> values, ReducerCombinerContext context
- )
- {
- //initialize counters
- int myCount = 0;
- int mySum = 0;
- //count and sum incoming values
- foreach (string value in values)
- {
- mySum += int.Parse(value);
- myCount++;
- }
- //output results
- context.EmitKeyValue(key, myCount + "\t" + mySum);
- }
- }
- }