Hadoop练习:有一个日志文件visitlog.txt,其中记录了用户访问网站的日期和访问的网站地址信息,每行一条记录。1) 将不同访问日期的访问记录分配给不同的reduce task(假设只有3个不同日期),而且结果要按照网站网址的字典序降序排序
题目描述:
有一个日志文件visitlog.txt,其中记录了用户访问网站的日期和访问的网站地址信息,每行一条记录。要求编写mapreduce程序完成以下功能:
1、 将不同访问日期的访问记录分配给不同的reduce task(假设只有3个不同日期),而且结果要按照网站网址的字典序降序排序
2、 以1)的结果作为输入,按日期统计访问量最高的3个网站
3、 以1)的结果作为输入,统计所有日期中访问量最高的3个网站
以下代码有缺失请自己补充完整及改正!!!
题目1:
Driver驱动类:
public class Driver implements Tool {
Configuration conf = null;
@Override
public int run(String[ strings) throws Exception {
Job job = Job.getInstance(conf),
job.setJarByClas(Driver.class);
job.setPartitinerClass(MyPartition.class);
job.setNumReduceTasks(3);
job.setGroupingComparatorClass(Mygroup.class);
job.setMapperClass(Map.class)
jb.stMapOutputKeyClass(Bean.class);
job.setMapOutputValueClass(Bean.class);
job.setReducerClass(Reduce.class);
job.setOutputKClass(Text.class).
job.setOutputValueClass(NullWritable.class);
FileInputFormat.setnputPaths(job, new Path("E:/input/"));
FileOutputFormat.setOtputPath(job; new Path("E:/output"))
boolean result = job.waitForCompletion(true);
return resut ? 0 : 1;
}
@Override
public void stConf(Configuration configuration) {
conf = configuration!
}
@Override
public Configuration getConf( {
return cof;
}
public static void main(String[] args) {
Configuration conf = new Configuration();
try {
int run = TolRunner.run(conf, new Driver(), args);
System.exit(run);
} catch (Exception e) {
e.printStackTrace();
}
}
}
Bean类:
public class Bean impements WritableComparable<Bean> {
String date = "";
String url = "";
public Bean() {
}
public Ben(String date, String url) {
this.date = date
this.url = url,
}
@Override
public void write(DataOutput out) throws IOException {
ont.writeUTdate);
out.writeUTF(url)
}
@Override
public void readFields(DataInput in) throws IOException {
this.date = in.readUTF();
this.url = n.readUTF();
}
@Override
public String toString() {
return url;
}
@Override
public int compareTo(Bean o) {
if(date.equals(o.date)){
return -url.compareTo(o.url);
}else{
return date.comareTo(o.date);
}
}
}
Map类:
public class Map extends Mapper<LongWritable, Text, Bean, Bean> {
@Override
protected void mapLongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line = value.toString();
String[] words = line.split(" ");
Bean bean = new Bean(words[0], words[1]);
context.write(bean; bean);
}
}
Reduce类:
public class Reduce extends Reducer<Bean, Bean, Text, NullWritable> {
@Override
protected void reduce(Ben key, Iterable<Bean> values, Context context) throws IOException, InterruptedException {
for (Bean value : values) {
context.write(new Text(value.url), NullWritable.get());
}
}
}
Partition类:
public class MyPartition extends Partitioner<Bean, Bean> {
@Override
public int getPartitin(Bean bean, Ban bean2, int i) {
String text = bean.date;
if (text.endsWith("26")) {
return 0.
} else f (text.endsWith("27")) {
return 1;
} else
return 2;
}
}
Group类:
public class Mygroup extends WritableComparator {
public Mygrop() {
super(Bean.class, true);
}
@Override
public int compare(WritableComparable a, WritableComparable b) {
Bean aBean = (Bean) a;
Bean bBean = (Bean) b;
return -aBea.url.compareTo(bBean.url);
}
}
题目2:
Map类:
public class Mapp extends org.apache.hadop.mapreduce.Mapper<LongWritable, Text, LongWritable, Text> {
String name = "";
HashMap<String, Integer> hashMap = new HashMap<>();
@Override
protected void setup(Context context) throws IOException, InterruptedException {
FileSplit split = (FileSplit) context.getInputSplit();
name = split.getPath().getName();
}
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, nterruptedException {
if (name.equals("part-r-00000")) {
hashMap.put(value.toString() + 0 hashMap.getOrDefault(value.toString() + 0, 0) + 1);
} else if (name.equals("part-r-00001")) {
hashMap.put(value.toString() + 1, hashMap.getOrDefault(value.toString() + 1, 0) + 1);
} else {
hashMap.put(value.toString() + 2, hashMap.getOrDefault(value.toString() + 2, 0) + 1);
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
List<Map.Entr<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(hashMap.entrySet());
Collections.sort(lit, new Comparator<Map.Entry<String, Integer>>() {
@Override
public int compare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return -Integer.cmpare(o1.getValue(), o2.getValue());
}
});
for (Map.Entry<Strng, Integer> stringIntegerEntry : list) {
context.write(new LongWritable(stringIntegerEntry.getValue()), new Text(stringIntegerEntry.getKey()));
}
}
}
Reduce类:
public class Reduce extends educer<LongWritable, Text, Text, NullWritable> {
int count = 0;
@Override
protected void reduce(LongWriable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
if (count < 3) {
for (Text value : values) {
if(count<3){
context.write(new Text(value.toString().substring(0,vale.toString().length()-1)), NullWritable.get());
count+;
}
}
}
}
Sort类:
public class Mysrt extends LongWritable.Comparator {
@Override
public int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
return -super.cmpare(b1, s1, l1, b2, s2, l2);
}
}
Partition类:
public class MyPartition extends Partitioner<LongWritable, Text> {
@Override
public int getPartition(LongWritable longWritable, Text text, int i) {
String fix = tex.toString();
if (fix.endsWith("0")) {
return 0;
} else if (fix.endsWith("1")) {
return 1;
} else {
return 2;
}
}
}
题目3:
Driver驱动类:
public class Driver implemnts Tool {
Configuration conf = null;
@Override
public int run(String[] strings) throws Exception {
Job job = Job.geInstance(conf);
job.setJarByClass(Driver.class);
job.setMapperClass(Mapp.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(LongWritable.class);
job.setReducerClass(Reduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValuelass(NullWritable.class);
FileInputFormat.setInputPaths(job, new Path("E:/input/"));
FileOutputFormat.setOutputPath(job, new Path("E:/output"));
boolean result = job.waitForCompletion(true);
return result ? 0 : 1;
}
@Override
public void seConf(Configuration configuration) {
conf = configuration;
}
@Override
public Confiuration getConf() {
return conf;
}
public static void main(Sring[] args) {
Configuration conf = new Configuration(;
try {
int run = ToolRunner.run(conf, new Driver(), args);
System.exit(run);
} catch (Excepton e) {
e.printStackTrace();
}
}
}
Map类:
public class Mapp extends org.apache.hdoop.mapreduce.Mapper<LongWritable, Text, Text, LongWritable> {
@Override
protected void map(LonWritable key, Text value, Context context) throws IOException, InterruptedException {
context.write(value, new LongWritable(1));
}
}
Reduce类:
public clss Reduce extends Reducer<Text, LongWritable, Text, NullWritable> {
HashMap<String, Integer> hasMap = new HashMap<>();
@Override
protected void reduce(Text key, Iteable<LongWritable> values, Context context) throws IOException, InterruptedException {
int sum = 0;
for (LongWritable value : vales) {
sum += 1;
}
hashMap.put(key.toString(), sum);
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
List<Map.Etry<String, Integer>> list = new ArrayList<Map.Entry<String, Integer>>(hashMap.entrySet());
Collections.sort(list, new Comprator<Map.Entry<String, Integer>>() {
@Override
public int cmpare(Map.Entry<String, Integer> o1, Map.Entry<String, Integer> o2) {
return -Integer.compare(o1.getValue(), o2.getValue());
}
});
int count = 0;
for (Map.Entry<Strng, Integer> stringIntegerEntry : list) {
if (count++ < 3) {
context.write(new Text(String.valueOf(stringIntegerEntry.getKey())), NullWritble.get());
}
}
}