Hadoop 系列(四)自定义数据类
一:自定义数据类
1.为什么需要自定义数据类
上一篇文章里面我们自己写了一个mapreduce 里面的Top N的https://www.cnblogs.com/wuxiaolong4/p/12733518.html,我们可以从代码里面看出来,里面用|作分隔符,这种方法不好,会由于你自己的失误导致读取字段出错或者把自己搞晕了。这时候我们就需要自定义类,定义属于自己的实体类,有点像操作数据库里面的entity。
2.怎么定义自己的数据类
class MinMaxtemperature implements WritableComparable<MinMaxtemperature> {
public int compareTo(MinMaxtemperature otherMinMaxtemperature) {
if(date.equals(otherMinMaxtemperature.getDate())) return 1;
else return 0;
}
public void write(DataOutput out) throws IOException {
out.writeInt(max);
out.writeInt(min);
}
public void readFields(DataInput in) throws IOException {
max = in.readInt();
min = in.readInt();
}
private String date = "";
private int min =0 ;
private int max =0 ;
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public int getMin() {
return min;
}
public void setMin(int min) {
this.min = min;
}
public int getMax() {
return max;
}
public void setMax(int max) {
this.max = max;
}
@Override
public String toString() {
return "MinMaxtemperature{" +
"date='" + date + '\'' +
", min=" + min +
", max=" + max +
'}';
}
}
3.怎么使用自己的类
数据:
2020040112 1 2020040113 3 2020040114 4 2020040115 5 2020040116 6 2020040117 7 2020040118 8 2020040119 9 2020040312 1 2020040313 3 2020040314 4 2020040315 5 2020040316 6 2020040317 7 2020040318 8 2020040319 9 2020040412 1 2020040413 3 2020040414 4 2020040415 5 2020040416 6 2020040417 7 2020040418 8 2020040419 9
代码1 :输出最高温度和最低温度:
package org.example;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
class MinMaxtemperature implements WritableComparable<MinMaxtemperature> {
private String date = "";
private int min =0 ;
private int max =0 ;
public MinMaxtemperature() {
}
public MinMaxtemperature(String date, int max, int min) {
this.date = date;
this.min = min;
this.max = max;
}
public int compareTo(MinMaxtemperature otherMinMaxtemperature) {
if(date.equals(otherMinMaxtemperature.getDate())) return 1;
else return 0;
}
public void write(DataOutput out) throws IOException {
out.writeInt(max);
out.writeInt(min);
}
public void readFields(DataInput in) throws IOException {
max = in.readInt();
min = in.readInt();
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public int getMin() {
return min;
}
public void setMin(int min) {
this.min = min;
}
public int getMax() {
return max;
}
public void setMax(int max) {
this.max = max;
}
@Override
public String toString() {
return "MinMaxtemperature{" +
"date='" + date + '\'' +
", min=" + min +
", max=" + max +
'}';
}
}
class WordcountMapper extends Mapper<LongWritable, Text, Text, MinMaxtemperature> {
private MinMaxtemperature dataMap = new MinMaxtemperature();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line[] = value.toString().split("\\ ");
String date = line[0].substring(0,line[0].length()-2);
int temperature = Integer.parseInt(line[1]);
if(temperature > dataMap.getMax()){
dataMap.setMax(temperature);
}
if(temperature < dataMap.getMin()){
dataMap.setMin(temperature);
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
context.write(new Text(dataMap.getDate()),dataMap);
}
}
class WordcountReducer extends Reducer<Text,MinMaxtemperature,Text,Text> {
private MinMaxtemperature dataMap = new MinMaxtemperature();
@Override
protected void reduce(Text key, Iterable<MinMaxtemperature> values, Context context) throws IOException, InterruptedException {
String date = key.toString();
System.out.println("reduce:"+date);
for (MinMaxtemperature value : values ) {
if(value.getMax() > dataMap.getMax()){
dataMap.setMax(value.getMax());
}
if(value.getMin() < dataMap.getMin()){
dataMap.setMin(value.getMin());
}
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
context.write(new Text("max:min"),new Text(dataMap.getMax()+":"+dataMap.getMin()));
}
}
public class WordcountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "file:///");
FileSystem fs= FileSystem.get(conf);
String outputPath = "/software/java/data/output/";
if(fs.exists(new Path(outputPath))) fs.delete(new Path(outputPath),true);
Job job = Job.getInstance(conf);
job.setJarByClass(WordcountDriver.class);
job.setMapperClass(WordcountMapper.class);
job.setReducerClass(WordcountReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(MinMaxtemperature.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path("/software/java/data/input/"));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
//将job配置的参数,以及job所用的java类所在的jar包提交给yarn去运行
//job.submit();
boolean res = job.waitForCompletion(true);
}
}
代码2 分组内输出最高温度和最低温度:
package org.example;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
class MinMaxtemperature implements WritableComparable<MinMaxtemperature> {
private String date = "";
private int min =0 ;
private int max =0 ;
public MinMaxtemperature() {
}
public MinMaxtemperature(String date, int max, int min) {
this.date = date;
this.min = min;
this.max = max;
}
public int compareTo(MinMaxtemperature otherMinMaxtemperature) {
if(date.equals(otherMinMaxtemperature.getDate())) return 1;
else return 0;
}
public void write(DataOutput out) throws IOException {
out.writeInt(max);
out.writeInt(min);
}
public void readFields(DataInput in) throws IOException {
max = in.readInt();
min = in.readInt();
}
public String getDate() {
return date;
}
public void setDate(String date) {
this.date = date;
}
public int getMin() {
return min;
}
public void setMin(int min) {
this.min = min;
}
public int getMax() {
return max;
}
public void setMax(int max) {
this.max = max;
}
@Override
public String toString() {
return "MinMaxtemperature{" +
"date='" + date + '\'' +
", min=" + min +
", max=" + max +
'}';
}
}
class WordcountMapper extends Mapper<LongWritable, Text, Text, MinMaxtemperature> {
private Map<String,MinMaxtemperature> minmaxMap = new HashMap<String,MinMaxtemperature>();
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String line[] = value.toString().split("\\ ");
String date = line[0].substring(0,line[0].length()-2);
int temperature = Integer.parseInt(line[1]);
if(minmaxMap.containsKey(date)){
MinMaxtemperature existsTemperature = minmaxMap.get(date);
if(temperature > existsTemperature.getMax()){
existsTemperature.setMax(temperature);
}
if(temperature < existsTemperature.getMin()){
existsTemperature.setMin(temperature);
}
}
else{
minmaxMap.put(date,new MinMaxtemperature(date,temperature,temperature));
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
for (Map.Entry<String, MinMaxtemperature> data : minmaxMap.entrySet()) {
context.write(new Text(data.getKey()),data.getValue());
}
}
}
class WordcountReducer extends Reducer<Text,MinMaxtemperature,Text,Text> {
private Map<String,MinMaxtemperature> minmaxMap = new HashMap<String,MinMaxtemperature>();
@Override
protected void reduce(Text key, Iterable<MinMaxtemperature> values, Context context) throws IOException, InterruptedException {
String date = key.toString();
System.out.println("reduce:"+date);
for (MinMaxtemperature value : values ) {
if(minmaxMap.containsKey(date)){
MinMaxtemperature exists = minmaxMap.get(date);
if(value.getMax() > exists.getMax()){
exists.setMax(value.getMax());
}
if(value.getMin() < exists.getMin()){
exists.setMin(value.getMin());
}
}
else{
minmaxMap.put(date,value);
}
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
for (Map.Entry<String, MinMaxtemperature> data :minmaxMap.entrySet()) {
context.write(new Text(data.getKey()),new Text(data.getValue().toString()));
}
}
}
public class WordcountDriver {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
conf.set("fs.defaultFS", "file:///");
FileSystem fs= FileSystem.get(conf);
String outputPath = "/software/java/data/output/";
if(fs.exists(new Path(outputPath))) fs.delete(new Path(outputPath),true);
Job job = Job.getInstance(conf);
job.setJarByClass(WordcountDriver.class);
job.setMapperClass(WordcountMapper.class);
job.setReducerClass(WordcountReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(MinMaxtemperature.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path("/software/java/data/input/"));
FileOutputFormat.setOutputPath(job, new Path(outputPath));
//将job配置的参数,以及job所用的java类所在的jar包提交给yarn去运行
//job.submit();
boolean res = job.waitForCompletion(true);
}
}
代码3和代码4就不写了,因为差不多。
搬砖多年终不得要领,遂载源码看之望得真经。
分类:
hadoop
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列(二):开始使用ML.NET
· 记一次.NET内存居高不下排查解决与启示
· 探究高空视频全景AR技术的实现原理
· 理解Rust引用及其生命周期标识(上)
· 浏览器原生「磁吸」效果!Anchor Positioning 锚点定位神器解析
· DeepSeek 开源周回顾「GitHub 热点速览」
· 物流快递公司核心技术能力-地址解析分单基础技术分享
· .NET 10首个预览版发布:重大改进与新特性概览!
· AI与.NET技术实操系列(二):开始使用ML.NET
· 单线程的Redis速度为什么快?