Maven依赖
源头
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.8</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-planner_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-api-java-bridge_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.8.0</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table-common</artifactId>
<version>1.8.0</version>
</dependency>
</dependencies>
改版
<dependencies>
<dependency>
<groupId>org.projectlombok</groupId>
<artifactId>lombok</artifactId>
<version>1.18.8</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-table_2.11</artifactId>
<version>1.7.2</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-streaming-scala_2.11</artifactId>
<version>1.8.0</version>
</dependency>
</dependencies>
SQL语句
SELECT COUNT(*) FROM T13_REF_AIRPORT_SAT;--11008
--HUB_ID IATA_CD NAME_CN NAME_EN
SELECT COUNT(*) FROM T13_REF_AIRPORT_CITY_LINK;--9676
--*******LINK_ID AIRPORT_HUB_ID CITY_HUB_ID
SELECT COUNT(*) FROM T13_REF_CITY_SAT;--9624
--HUB_ID CITY_CD NAME_CN NAME_EN
SELECT COUNT(*) FROM T13_REF_CITY_COUNTRY_LINK;--9062
--*******LINK_ID COUNTRY_HUB_ID CITY_HUB_ID
SELECT COUNT(*) FROM T13_REF_COUNTRY_SAT;--356
--HUB_ID COUNTRY_CD NAME_CN NAME_EN
SELECT *
FROM T13_REF_AIRPORT_SAT X1,T13_REF_AIRPORT_CITY_LINK X2,T13_REF_CITY_SAT X3,T13_REF_CITY_COUNTRY_LINK X4,T13_REF_COUNTRY_SAT X5
WHERE X1.HUB_ID=X2.AIRPORT_HUB_ID
AND X2.CITY_HUB_ID=X3.HUB_ID
AND X3.HUB_ID=X4.CITY_HUB_ID
AND X4.COUNTRY_HUB_ID=X5.HUB_ID;
SELECT COUNT(*)
FROM T13_REF_AIRPORT_SAT X1,T13_REF_AIRPORT_CITY_LINK X2,T13_REF_CITY_SAT X3,T13_REF_CITY_COUNTRY_LINK X4,T13_REF_COUNTRY_SAT X5
WHERE X1.HUB_ID=X2.AIRPORT_HUB_ID
AND X2.CITY_HUB_ID=X3.HUB_ID
AND X3.HUB_ID=X4.CITY_HUB_ID
AND X4.COUNTRY_HUB_ID=X5.HUB_ID;--16759
SELECT X5.NAME_CN COUNTRY_CN_NAME,COUNT(X1.HUB_ID) COUNT_AIRPORT
FROM T13_REF_AIRPORT_SAT X1,T13_REF_AIRPORT_CITY_LINK X2,T13_REF_CITY_SAT X3,T13_REF_CITY_COUNTRY_LINK X4,T13_REF_COUNTRY_SAT X5
WHERE X1.HUB_ID=X2.AIRPORT_HUB_ID
AND X2.CITY_HUB_ID=X3.HUB_ID
AND X3.HUB_ID=X4.CITY_HUB_ID
AND X4.COUNTRY_HUB_ID=X5.HUB_ID
GROUP BY X5.NAME_CN
ORDER BY COUNT_AIRPORT DESC;--254
SELECT
X5.COUNTRY_CD,
X5.NAME_CN COUNTRY_NAME_CN,
X5.NAME_EN COUNTRY_NAME_EN,
X3.CITY_CD,
X3.NAME_CN CITY_CN_NAME,
X3.NAME_EN CITY_EN_NAME,
COUNT(X1.HUB_ID) COUNT_AIRPORT
FROM T13_REF_AIRPORT_SAT X1,T13_REF_AIRPORT_CITY_LINK X2,T13_REF_CITY_SAT X3,T13_REF_CITY_COUNTRY_LINK X4,T13_REF_COUNTRY_SAT X5
WHERE X1.HUB_ID=X2.AIRPORT_HUB_ID
AND X2.CITY_HUB_ID=X3.HUB_ID
AND X3.HUB_ID=X4.CITY_HUB_ID
AND X4.COUNTRY_HUB_ID=X5.HUB_ID
GROUP BY X5.COUNTRY_CD,X5.NAME_CN,X5.NAME_EN,X3.CITY_CD,X3.NAME_CN,X3.NAME_EN
ORDER BY COUNT_AIRPORT DESC;--13030
SELECT
X5.COUNTRY_CD,
X5.NAME_CN COUNTRY_NAME_CN,
X5.NAME_EN COUNTRY_NAME_EN,
X3.CITY_CD,
X3.NAME_CN CITY_CN_NAME,
X3.NAME_EN CITY_EN_NAME,
COUNT(X1.HUB_ID) COUNT_AIRPORT
FROM T13_REF_AIRPORT_SAT X1,T13_REF_AIRPORT_CITY_LINK X2,T13_REF_CITY_SAT X3,T13_REF_CITY_COUNTRY_LINK X4,T13_REF_COUNTRY_SAT X5
WHERE X1.HUB_ID=X2.AIRPORT_HUB_ID
AND X2.CITY_HUB_ID=X3.HUB_ID
AND X3.HUB_ID=X4.CITY_HUB_ID
AND X4.COUNTRY_HUB_ID=X5.HUB_ID
AND X3.NAME_EN IS NULL
GROUP BY X5.COUNTRY_CD,X5.NAME_CN,X5.NAME_EN,X3.CITY_CD,X3.NAME_CN,X3.NAME_EN
ORDER BY COUNT_AIRPORT DESC;
--COUNTRY_NAME_EN=NULL 19
--CITY_CN_NAME=NULL 1
--CITY_EN_NAME=NULL 1501
Airport_Sat
import lombok.Data;
@Data
public class AirportSat
{
private String hub_id;
}
Airport_City_Link
import lombok.Data;
@Data
public class AirportCityLink
{
private String airport_hub_id;
private String city_hub_id;
}
City_Sat
import lombok.Data;
@Data
public class CitySat
{
private String hub_id;
private String city_cd;
private String name_cn;
private String name_en;
}
City_Country_Link
import lombok.Data;
@Data
public class CityCountryLink
{
private String country_hub_id;
private String city_hub_id;
}
Country_Sat
import lombok.Data;
@Data
public class CountrySat
{
private String hub_id;
private String country_cd;
private String name_cn;
private String name_en;
}
Flink_Csv
点击查看Flink_Csv代码
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.operators.Order;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.operators.MapOperator;
import org.apache.flink.api.java.operators.SortPartitionOperator;
import org.apache.flink.api.java.tuple.Tuple1;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple7;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.table.api.Table;
import org.apache.flink.table.api.java.BatchTableEnvironment;
import java.text.SimpleDateFormat;
import java.util.Date;
public class FlinkCsv
{
public static void main(String[] args) throws Exception
{
long s4 = System.currentTimeMillis();
t4();
System.out.println((System.currentTimeMillis() - s4) + "u");
long s5 = System.currentTimeMillis();
t5();
System.out.println((System.currentTimeMillis() - s5) + "d");
}
private static void t5() throws Exception
{
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
BatchTableEnvironment table_env = BatchTableEnvironment.getTableEnvironment(env);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH-mm-ss SSS");
DataSet<AirportSat> data_airportsat = env.readCsvFile("D:\\T13_REF_AIRPORT_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true/*, true, false, true, true*/)
.pojoType(AirportSat.class, "hub_id"/*, "iata_cd", "name_cn", "name_en"*/);
DataSet<AirportCityLink> data_airportcitylink = env.readCsvFile("D:\\T13_REF_AIRPORT_CITY_LINK.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(false, true, true)
.pojoType(AirportCityLink.class, "airport_hub_id", "city_hub_id");
DataSet<CitySat> data_citysat = env.readCsvFile("D:\\T13_REF_CITY_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true, true, true, true)
.pojoType(CitySat.class, "hub_id", "city_cd", "name_cn", "name_en");
DataSet<CityCountryLink> data_citycountrylink = env.readCsvFile("D:\\T13_REF_CITY_COUNTRY_LINK.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(false, true, true)
.pojoType(CityCountryLink.class, "country_hub_id", "city_hub_id");
DataSet<CountrySat> data_countrysat = env.readCsvFile("D:\\T13_REF_COUNTRY_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true, true, false, false, true, true)
.pojoType(CountrySat.class, "hub_id", "country_cd", "name_cn", "name_en");
table_env.registerTable("t13_ref_airport_sat", table_env.fromDataSet(data_airportsat));
table_env.registerTable("t13_ref_airport_city_link", table_env.fromDataSet(data_airportcitylink));
table_env.registerTable("t13_ref_city_sat", table_env.fromDataSet(data_citysat));
table_env.registerTable("t13_ref_city_country_link", table_env.fromDataSet(data_citycountrylink));
table_env.registerTable("t13_ref_country_sat", table_env.fromDataSet(data_countrysat));
String sql = "select count(*) \n" +
"\tfrom t13_ref_airport_sat x1,t13_ref_airport_city_link x2,\n" +
"\tt13_ref_city_sat x3,t13_ref_city_country_link x4,t13_ref_country_sat x5\n" +
"\twhere x1.hub_id=x2.airport_hub_id\n" +
"\t\tand x2.city_hub_id=x3.hub_id\n" +
"\t\tand x3.hub_id=x4.city_hub_id\n" +
"\t\tand x4.country_hub_id=x5.hub_id";
String sql_country = "select x5.name_cn country_cn_name,count(x1.hub_id) count_airport\n" +
"\tfrom t13_ref_airport_sat x1,t13_ref_airport_city_link x2,\n" +
"\tt13_ref_city_sat x3,t13_ref_city_country_link x4,t13_ref_country_sat x5\n" +
"\twhere x1.hub_id=x2.airport_hub_id\n" +
"\t\tand x2.city_hub_id=x3.hub_id\n" +
"\t\tand x3.hub_id=x4.city_hub_id\n" +
"\t\tand x4.country_hub_id=x5.hub_id\n" +
"\tgroup by x5.name_cn\n" +
"\torder by count_airport desc";
String sql_all = "select \n" +
"\tx5.country_cd,\n" +
"\tx5.name_cn country_name_cn,\n" +
"\tx5.name_en country_name_en,\n" +
"\tx3.city_cd,\n" +
"\tx3.name_cn city_cn_name,\n" +
"\tx3.name_en city_en_name,\n" +
"count(x1.hub_id) count_airport\n" +
"\tfrom t13_ref_airport_sat x1,t13_ref_airport_city_link x2,t13_ref_city_sat x3,t13_ref_city_country_link x4,t13_ref_country_sat x5\n" +
"\twhere x1.hub_id=x2.airport_hub_id\n" +
"\t\tand x2.city_hub_id=x3.hub_id\n" +
"\t\tand x3.hub_id=x4.city_hub_id\n" +
"\t\tand x4.country_hub_id=x5.hub_id\n" +
"\tgroup by x5.country_cd,x5.name_cn,x5.name_en,x3.city_cd,x3.name_cn,x3.name_en\n" +
"\torder by count_airport desc";
DataSet<Tuple1<Long>> map = table_env.toDataSet(table_env.sqlQuery(sql),
TypeInformation.of(new TypeHint<Tuple1<Long>>()
{
}));
map.print();
DataSet<Tuple2<String, Long>> map_country = table_env.toDataSet(table_env.sqlQuery(sql_country),
TypeInformation.of(new TypeHint<Tuple2<String, Long>>()
{
}));
System.out.println(map_country.count());
map_country.print();
Table result_country = table_env.sqlQuery(sql_country);
DataSet<Tuple7<String, String, String, String, String, String, Long>> map_all = table_env.toDataSet(table_env.sqlQuery(sql_all),
TypeInformation.of(new TypeHint<Tuple7<String, String, String, String, String, String, Long>>()
{
}));
System.out.println(map_all.count());
map_all.print();
map.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("T打印完成______map...");
map_country.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map_country.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("T打印完成______map_country...");
map_all.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map_all.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("T打印完成______map_all...");
env.execute("Hello!@ Fuck...");
}
private static void t4() throws Exception
{
ExecutionEnvironment env = ExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH-mm-ss SSS");
DataSet<AirportSat> data_airportsat = env.readCsvFile("D:\\T13_REF_AIRPORT_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true/*, true, false, true, true*/)
.pojoType(AirportSat.class, "hub_id"/*, "iata_cd", "name_cn", "name_en"*/);
DataSet<AirportCityLink> data_airportcitylink = env.readCsvFile("D:\\T13_REF_AIRPORT_CITY_LINK.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(false, true, true)
.pojoType(AirportCityLink.class, "airport_hub_id", "city_hub_id");
DataSet<CitySat> data_citysat = env.readCsvFile("D:\\T13_REF_CITY_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true, true, true, true)
.pojoType(CitySat.class, "hub_id", "city_cd", "name_cn", "name_en");
DataSet<CityCountryLink> data_citycountrylink = env.readCsvFile("D:\\T13_REF_CITY_COUNTRY_LINK.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(false, true, true)
.pojoType(CityCountryLink.class, "country_hub_id", "city_hub_id");
DataSet<CountrySat> data_countrysat = env.readCsvFile("D:\\T13_REF_COUNTRY_SAT.csv")
.fieldDelimiter(",").ignoreFirstLine().includeFields(true, true, false, false, true, true)
.pojoType(CountrySat.class, "hub_id", "country_cd", "name_cn", "name_en");
MapOperator<Tuple2<Tuple2<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, CityCountryLink>, CountrySat>,
Tuple7<String, String, String, String, String, String, Long>> map = data_airportsat
.join(data_airportcitylink).where("hub_id").equalTo("airport_hub_id")
.join(data_citysat).where(new KeySelector<Tuple2<AirportSat, AirportCityLink>, String>()
{
@Override
public String getKey(Tuple2<AirportSat, AirportCityLink> t) throws Exception
{
return t.f1.getCity_hub_id();
}
}).equalTo("hub_id")
.join(data_citycountrylink).where(new KeySelector<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, String>()
{
@Override
public String getKey(Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat> t) throws Exception
{
return t.f1.getHub_id();
}
}).equalTo("city_hub_id")
.join(data_countrysat).where(new KeySelector<Tuple2<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, CityCountryLink>, String>()
{
@Override
public String getKey(Tuple2<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, CityCountryLink> t) throws Exception
{
return t.f1.getCountry_hub_id();
}
}).equalTo("hub_id")
.map(new MapFunction<Tuple2<Tuple2<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, CityCountryLink>, CountrySat>,
Tuple7<String, String, String, String, String, String, Long>>()
{
@Override
public Tuple7<String, String, String, String, String, String, Long> map(
Tuple2<Tuple2<Tuple2<Tuple2<AirportSat, AirportCityLink>, CitySat>, CityCountryLink>, CountrySat> t) throws Exception
{
String country_cd = t.f1.getCountry_cd();
String country_cn_name = t.f1.getName_cn();
String country_en_name = t.f1.getName_en();
String city_cd = t.f0.f0.f1.getCity_cd();
String city_cn_name = t.f0.f0.f1.getName_cn();
String city_en_name = t.f0.f0.f1.getName_en();
long airport = 1L;
return new Tuple7<>(country_cd, country_cn_name, country_en_name, city_cd, city_cn_name, city_en_name, airport);
}
});
//--------------------------------------------------------------------------------------------------------------
System.out.println("总数量: " + map.count());
SortPartitionOperator<Tuple2<String, Long>> map_country = map
.map(new MapFunction<Tuple7<String, String, String, String, String, String, Long>, Tuple2<String, Long>>()
{
@Override
public Tuple2<String, Long> map(Tuple7<String, String, String, String, String, String, Long> t) throws Exception
{
return new Tuple2<>(t.f1, t.f6);
}
}).groupBy(0).sum(1).sortPartition(1, Order.DESCENDING);
System.out.println("国家分总数量: " + map_country.count());
//map_country.print();
SortPartitionOperator<Tuple7<String, String, String, String, String, String, Long>> map_all = map
.groupBy(0, 1, 2, 3, 4, 5).sum(6).sortPartition(6, Order.DESCENDING);
System.out.println("全分总数量: " + map_all.count());
//map_all.print();
map.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("打印完成______map...");
map_country.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map_country.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("打印完成______map_country...");
map_all.writeAsCsv("D:\\Flink_CSV\\" + sdf.format(new Date()) + "______map_all.csv",
"\n", ",", FileSystem.WriteMode.OVERWRITE).setParallelism(1);
System.out.println("打印完成______map_all...");
env.execute("Hello!@ Fuck...");
}
}