一次可行性测试

Windows:

IDEA、Navicat Premium15

Linux:

Hadoop伪分布式集群、PostgreSQL(Timescale)、Hive、Sqoop、Presto

创建关系表

hadoop01:

新建demo数据库:
su postgres
psql
create database demo;
\c demo
create extension timescaledb;

image-20211110101126734

-- 用户表
-- id(主键)、姓名、性别、年龄、出生日期
create table person(
	id int primary key,
	name char(20) not null,
	sex char(20) not null,
	age int not null,
	birthday char(20) not null
	);

image-20211110142708532

-- 地区表
-- 邮编、省份、城市
create table address(
	zipcode int primary key,
	province char(20) not null,
	city char(20) not null
	);

image-20211110133518868

-- 订单表
-- 订单编号、商品、价格、邮编
create table orderID(
	noun char(20) primary key,
	goods char(20) not null,
	price int not null,
	zipcode int not null,
	foreign key(zipcode) references address(zipcode)
	);

image-20211110134219328

-- 记录表
-- time、id、订单编号、电话
create table record(
	time timestamptz not null,
	id int not null,
	noun char(20) not null,
	tel char(20) not null,
	foreign key(id) references person(id),
	foreign key(noun) references orderID(noun),
	primary key(time,noun)
	);
	
SELECT create_hypertable('record', 'time');

image-20211111103410534

在navicat中查看数据库与表是否创建完成

image-20211110140120241

插入数据

向用户表插入数据

public class Person {
    public static void main(String[] args) throws ClassNotFoundException, SQLException {

        Class.forName("org.postgresql.Driver");
        Connection conn = DriverManager.getConnection("jdbc:postgresql://192.168.115.130:5432/demo", "postgres", "201314");
        PreparedStatement st = null;
        for (int i = 1000; i < 10000; i++) {

            String a =getBirthday();
            String sql = "INSERT INTO person(id,name,sex,age,birthday) VALUES (?,?,?,?,?)";
            st = conn.prepareStatement(sql);        //预编译
            st.setInt(1, i);
            st.setString(2, getName());
            st.setString(3, getSex());
            st.setInt(4, getAge(a));
            st.setString(5, getBirthday());
            st.executeUpdate();

        }
        System.out.println("完成");
        st.close();
        conn.close();
    }

    //随机姓名
    public static String getName() {
        Random random = new Random();
        String[] Surname = "赵, 钱, 孙, 李, 周, 吴, 郑, 王, 冯, 陈, 褚, 卫, 蒋, 沈, 韩, 杨, 朱, 秦, 尤, 许".split(",");
        int index = random.nextInt(Surname.length - 1);
        String name = Surname[index]; // 获得一个随机的姓氏
        /* 从常用字中选取一个或两个字作为名 */
        if (random.nextBoolean()) {
            name += Person.getChinese() + Person.getChinese();
        } else {
            name += Person.getChinese();
        }
        return name;
    }
    public static String getChinese() {
        String str = null;
        int highPos, lowPos;
        Random random = new Random();
        highPos = (176 + Math.abs(random.nextInt(71)));// 区码,0xA0打头,从第16区开始,即0xB0=11*16=176,16~55一级汉字,56~87二级汉字
        random = new Random();
        lowPos = 161 + Math.abs(random.nextInt(94));// 位码,0xA0打头,范围第1~94列
        byte[] bArr = new byte[2];
        bArr[0] = (new Integer(highPos)).byteValue();
        bArr[1] = (new Integer(lowPos)).byteValue();
        try {
            str = new String(bArr, "GB2312"); // 区位码组合成汉字
        } catch (UnsupportedEncodingException e) {
            e.printStackTrace();
        }
        return str;
    }

    //随机性别
    public static String getSex() {
        int randNum = new Random().nextInt(2) + 1;
        return randNum == 1 ? "男" : "女";
    }

    //年龄
    public static Integer getAge(String birthday) {
        if ("".equals(birthday) || birthday == null) {
            return null;
        }
        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
        String nowDate = sdf.format(new Date());
        String[] nowDates = nowDate.split("-");// 当前时间
        String[] dates = birthday.split("-");// 分割时间线 - 年[0],月[1],日[2]
        int age = Integer.parseInt(nowDates[0]) - Integer.parseInt(dates[0]);// 年龄默认为当前时间和生日相减
        if (dates.length >= 2) {// 根据月推算出年龄是否需要增加
            // 如果当前月份大于生日月份,岁数不变,否则加一
            Integer nowMonth = Integer.parseInt(nowDate.substring(5,7));
            Integer birthMonth = Integer.parseInt(birthday.substring(5,7));
            if (nowMonth < birthMonth)
                age++;
            if (!"dates.length >= 3 && nowMonth".equals(birthMonth)) {// 月份相同才计算对应年龄
                // 如果天数大于当前生日月份,岁数不变,否则加一
                Integer nowDay = Integer.parseInt(nowDates[2]);// 当前天数
                Integer birDay = Integer.parseInt(dates[2]);// 生日天数
                if (nowDay < birDay)
                    age++;
            }
        }
        return age;
    }

    //出生日期
    public static String getBirthday(){
        Random rand = new Random();
        SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd");
        Calendar cal = Calendar.getInstance();
        cal.set(1961, 0, 1);
        long start = cal.getTimeInMillis();
        cal.set(2001, 0, 1);
        long end = cal.getTimeInMillis();
        java.util.Date d = new java.util.Date(start + (long)(rand.nextDouble() * (end - start)));
        return format.format(d);
    }
}

image-20211110145251332

向地区表插入数据:

public class Address {
    public static void main(String[] args) throws ClassNotFoundException, SQLException {

        Class.forName("org.postgresql.Driver");
        Connection conn = DriverManager.getConnection("jdbc:postgresql://192.168.115.130:5432/demo", "postgres", "201314");
        PreparedStatement st = null;

        String[] zipcode = ("100000,100010,100032,100080,100020," +
                "300041,300171,300100,300202,300300," +
                "400010,400080,404000,408000,402260," +
                "430000,441000,434000,438000,442400," +
                "210000,214000,213000,225000,215000," +
                "510000,518000,519000,515000,512000," +
                "610000,643000,621000,629000,510400," +
                "310000,312000,314000,315000,325000," +
                "201900,200003,201800,201200,200040," +
                "140100,140200,140300,140400,140500").split(",");
        String[] provice = "北京市,天津市,重庆市,湖北省,江苏省,广东省,四川省,浙江省,上海市,山西省".split(",");
        String[] city = ("北京市,东城区,西城区,海淀区,朝阳区," +
                "和平区,河东区,南开区,河西区,东丽区," +
                "渝中区,大渡口,万州区,南川区,江津区," +
                "武汉市,襄樊市,荆州市,黄冈市,神农架区," +
                "南京市,无锡市,常州市,扬州市,苏州市," +
                "广州市,深圳市,珠海市,汕头市,韶关市," +
                "成都市,自贡市,绵阳市,遂宁市,攀枝花市," +
                "杭州市,绍兴市,嘉兴市,宁波市,温州市," +
                "宝山区,黄埔区,嘉定区,浦东新区,静安区," +
                "太原市,大同市,阳泉市,长治市,晋城市").split(",");

        for (int i = 0; i < 50; i++) {

            String sql = "INSERT INTO address(zipcode,province,city) VALUES (?,?,?)";
            st = conn.prepareStatement(sql);
            st.setInt(1, Integer.valueOf(zipcode[i]));
            st.setString(2, provice[i / 5]);
            st.setString(3, city[i]);
            st.executeUpdate();
        }
        System.out.println("完成");
        st.close();
        conn.close();
    }
}

image-20211111085724873

向订单表插入数据:

public class OrderID {

    public static void main(String[] args) throws ClassNotFoundException, SQLException {

        Class.forName("org.postgresql.Driver");
        Connection conn = DriverManager.getConnection("jdbc:postgresql://192.168.115.130:5432/demo", "postgres", "201314");
        PreparedStatement st = null;

        String[] zipcode = ("100000,100010,100032,100080,100020," +
                "300041,300171,300100,300202,300300," +
                "400010,400080,404000,408000,402260," +
                "430000,441000,434000,438000,442400," +
                "210000,214000,213000,225000,215000," +
                "510000,518000,519000,515000,512000," +
                "610000,643000,621000,629000,510400," +
                "310000,312000,314000,315000,325000," +
                "201900,200003,201800,201200,200040," +
                "140100,140200,140300,140400,140500").split(",");
        String[] g = "苹果,雪梨,西瓜,柠檬,金桔,榴莲,火龙果,龙眼,青蛇果,芒果,桂圆,菠萝,车厘子,山竹,草莓".split(",");
        int[] p = {4,2,1,4,3,6,5,5,9,7,10,2,25,16,12};

        for (int i = 10000; i < 100000; i++) {

            int randNum1 = new Random().nextInt(15);
            int randNum2 = new Random().nextInt(50);
            String sql = "INSERT INTO orderid(noun,goods,price,zipcode) VALUES (?,?,?,?)";
            st = conn.prepareStatement(sql);
            st.setString(1,"sf"+i);
            st.setString(2,g[randNum1]);
            st.setInt(3, p[randNum1]);
            st.setInt(4, Integer.valueOf(zipcode[randNum2]));
            st.executeUpdate();
        }
        System.out.println("完成");
        st.close();
        conn.close();
    }
}

image-20211111094031280

向记录表插入数据:

public class Record {
    public static void main(String[] args) throws ClassNotFoundException, SQLException, InterruptedException {

        Class.forName("org.postgresql.Driver");
        Connection conn = DriverManager.getConnection("jdbc:postgresql://192.168.115.130:5432/demo", "postgres", "201314");
        PreparedStatement st = null;

        int a[] = new int[10000];
        String b[] = new String[10000];
        for (int i = 0; i < 10000; i++) {
            a[i]=i;
            b[i] = getTel();
        }

        for (int i = 10000; i < 50000; i++) {

            Random random = new Random();
            int s = random.nextInt(9999)%(9000) + 1000;

            String sql = "INSERT INTO record(time,id,noun,tel) VALUES (NOW(),?,?,?)";
            st = conn.prepareStatement(sql);
            st.setInt(1,a[s]);
            st.setString(2,"sf"+i);
            st.setString(3, b[s]);
            st.executeUpdate();

            Thread.sleep(100);
        }
        System.out.println("完成");
        st.close();
        conn.close();
    }

    //随机电话号码
    private static String getTel() {
        String[] telFirst="134,135,136,137,138,139,150,151,152,157,158,159,130,131,132,155,156,133,153".split(",");
        int index=getNum(0,telFirst.length-1);
        String first=telFirst[index];
        String second=String.valueOf(getNum(1,888)+10000).substring(1);
        String third=String.valueOf(getNum(1,9100)+10000).substring(1);
        return first+second+third;
    }
    public static int getNum(int start,int end)
    {
        return (int)(Math.random()*(end-start+1)+start);
    }
}

image-20211111140945962

数据迁移

用sqoop将数据迁移到hdfs中

cd /usr/local/sqoop/bin
./sqoop list-tables --connect jdbc:postgresql://hadoop01:5432/demo --driver org.postgresql.Driver --username postgres --password 201314

image-20211112084747169

查看数据表中数据
./sqoop eval --connect jdbc:postgresql://hadoop01:5432/demo --driver org.postgresql.Driver --username postgres --password 201314 -e 'select * from person'
hdfs dfs -mkdir /user/demo
hdfs dfs -chmod 777 /user/demo
hdfs dfs -ls /user

image-20211112092137148

导入使用查询语句查询的数据到指定目录,并指定分隔符
./sqoop import --connect jdbc:postgresql://hadoop01:5432/demo --username postgres --password 201314 --query 'select * from person where $CONDITIONS and 1=1' --target-dir /user/demo/person --fields-terminated-by '\t' --m 1

image-20211112093801106

进入浏览器输入网址http://hadoop01:9870/explorer.html#/user/demo查看

image-20211112094350575

image-20211112094505319

image-20211112094546355

继续导入:
./sqoop import --connect jdbc:postgresql://hadoop01:5432/demo --username postgres --password 201314 --query 'select * from address where $CONDITIONS and 1=1' --target-dir /user/demo/address --fields-terminated-by '\t' --m 1

./sqoop import --connect jdbc:postgresql://hadoop01:5432/demo --username postgres --password 201314 --query 'select * from orderid where $CONDITIONS and 1=1' --target-dir /user/demo/orderid --fields-terminated-by '\t' --m 1

./sqoop import --connect jdbc:postgresql://hadoop01:5432/demo --username postgres --password 201314 --query 'select * from record where $CONDITIONS and 1=1' --target-dir /user/demo/record --fields-terminated-by '\t' --m 1

image-20211112095136934

image-20211112095217066

完成。

用sqoop将数据迁移到hive中

cp /usr/local/hive/lib/hive-common-3.1.2.jar /usr/local/sqoop/lib
./sqoop import --connect jdbc:postgresql://hadoop01:5432/demo --username postgres --password 201314 --table person --hive-import --hive-overwrite --lines-terminated-by "\n" --fields-terminated-by "\t" --m 1

进入浏览器输入网址查看:http://hadoop01:9870/explorer.html#/user/hive/warehouse/

image-20211112144143124

image-20211112144631388

在navicat中查看hive数据库

image-20211112143934954

继续导入:
./sqoop import --connect jdbc:postgresql://hadoop01:5432/demo --username postgres --password 201314 --table address --hive-import --hive-overwrite --lines-terminated-by "\n" --fields-terminated-by "\t" --m 1

./sqoop import --connect jdbc:postgresql://hadoop01:5432/demo --username postgres --password 201314 --table orderid --hive-import --hive-overwrite --lines-terminated-by "\n" --fields-terminated-by "\t" --m 1

./sqoop import --connect jdbc:postgresql://hadoop01:5432/demo --username postgres --password 201314 --table record --hive-import --hive-overwrite --lines-terminated-by "\n" --fields-terminated-by "\t" --m 1

image-20211112150835060

image-20211112160025427

完成。

查询数据

使用presto查询pgsql中的数据,idea连接presto查询pgsql中的数据

修改配置文件postgresql.properties
cd /usr/local/presto/etc/catalog
vim postgresql.properties
useUnicode=true&characterEncoding=utf8

image-20211116163445696

launcher start	//打开presto
presto --server hadoop01:8880 --catalog postgresql --schema demo	//连接pgsql

单表:

1.查询用户表的前10条记录

select * from postgresql.public.person limit 10;

image-20211116163857316

2.统计用户表中有多少条数据,其中男性多少,女性多少

select count(*) from postgresql.public.person;
select count(*) from postgresql.public.person where sex like '男%';
select count(*) from postgresql.public.person where sex like '女%';


select count(*) from person
union
select count(*) from person where sex like '男%'
union
select count(*) from person where sex like '女%';

image-20211117095510669

此处不能使用sex='男',查询结果为0

3.查询地区表中的所有省份

select distinct province from postgresql.public.address;

image-20211117100548598

4.查询订单表中价格高于10块的订单有多少条

select count(*) from postgresql.public.orderid where price>10;

image-20211117103342815

5.查询记录表中id为8888的订单

select * from postgresql.public.record where id=8888;

image-20211117104014625

多表:

6.查询id为8888的顾客的总消费

select record.noun,goods,price from postgresql.public.record,postgresql.public.orderid 
where orderid.noun=record.noun and record.id=8888;

image-20211117110231137

select sum(price) from postgresql.public.record,postgresql.public.orderid 
where orderid.noun=record.noun and record.id=8888;

image-20211117110513171

7.查询有多少人购买了西瓜

select count(*) from orderid where goods like '西瓜%';

image-20211117132225499

select count(distinct id) from postgresql.public.record,postgresql.public.orderid 
where orderid.noun=record.noun and orderid.goods like '西瓜%';

image-20211117132957130

8.所有购买西瓜的这些人中,有多少来自北京的男性

select distinct person.id,sex,province from
postgresql.public.record,postgresql.public.orderid,postgresql.public.person,postgresql.public.address
where orderid.noun=record.noun and person.id=record.id 
and address.zipcode=orderid.zipcode and orderid.goods like '西瓜%' 
and person.sex like '男%' and address.province like '北京%';

image-20211117134629049

统计人数
select count(distinct person.id) as count from
postgresql.public.record,postgresql.public.orderid,postgresql.public.person,postgresql.public.address
where orderid.noun=record.noun and person.id=record.id 
and address.zipcode=orderid.zipcode and orderid.goods like '西瓜%' 
and person.sex like '男%' and address.province like '北京%';

image-20211117134808840

9.重庆渝中区的人在10:30~10:40这个时间段里购买最多的商品是什么,购买了多少次

select goods,count(goods) from orderid,address
where orderid.zipcode=address.zipcode and address.province like '重庆%' and address.city like '渝中%' group by goods order by count(goods) desc;

image-20211117153536704

select goods,count(goods) from orderid,address,record
where orderid.zipcode=address.zipcode and address.province like '重庆%' and address.city like '渝中%'
and orderid.noun=record.noun and record.time Between '2021-11-15 10:30:00' AND '2021-11-15 10:40:00' 
group by goods order by count(goods) desc;

image-20211117153426852

select * from  postgresql.public.record where time > cast('2021-11-15 10:30:00' as timestamp) and time < cast('2021-11-15 10:40:00' as timestamp);

select * from  postgresql.public.record where time between cast('2021-11-15 10:30:00' as timestamp) and cast('2021-11-15 10:40:00' as timestamp);

select goods,count(goods) from postgresql.public.orderid,postgresql.public.address,postgresql.public.record
where orderid.zipcode=address.zipcode and address.province like '重庆%' and address.city like '渝中%'
and orderid.noun=record.noun and record.time between cast('2021-11-15 10:30:00' as timestamp) and cast('2021-11-15 10:40:00' as timestamp) 
group by goods order by count(orderid.goods) desc limit 1;

image-20211117161820176

10.查询id为1000的顾客的所有信息

select * from person,record,orderid,address where record.noun=orderid.noun and
address.zipcode=orderid.zipcode and person.id=record.id and person.id=1000;

image-20211117163502634

select * from postgresql.public.person,postgresql.public.record,postgresql.public.orderid,postgresql.public.address 
where record.noun=orderid.noun and address.zipcode=orderid.zipcode and person.id=record.id and person.id=1000;

image-20211117163715099

发现问题:单个顾客的不同订单所在地址不同。在设计表时,应该将邮编与用户绑定。

查询hive

hdfs dfsadmin -safemode leave	//关闭hdfs的安全模式
hive	//打开hive

image-20211118090639611

退出hive,使用presto进行连接查询
nohup hive --service metastore >/dev/null 2>&1 &	//启动hive的metastore服务
launcher start		//启动presto
presto --server hadoop01:8880 --catalog hive --schema default

image-20211118091958140

show schemas;
show tables from default;
select * from person limit 10;

image-20211118093721493

发现莫名其妙查不到数据,检查一番后,原因是hive里面的数据被清空了,所有重新导入了一次

再次查询:
1.查询用户表的前10条数据
select * from person limit 10;

image-20211119093611741

2.统计用户表中有多少条数据,其中男性多少,女性多少

select count(*) from person
union
select count(*) from person where sex like '男%'
union
select count(*) from person where sex like '女%';

image-20211119094557851

3.查询地区表中的所有省份

select distinct province from address;

image-20211119094749012

4.查询订单表中价格高于10块的订单有多少条

select count(*) from orderid where price>10;

image-20211119094835863

5.查询记录表中id为8888的订单

select * from record where id=8888;

image-20211119094928319

6.查询id为8888的顾客的总消费

select sum(price) from record,orderid where orderid.noun=record.noun and id=8888;

image-20211119095130679

7.查询有多少人购买了西瓜

select count(distinct id) from record,orderid where orderid.noun=record.noun and goods like '西瓜%';

image-20211119095301151

8.所有购买西瓜的这些人中,有多少来自北京的男性

select count(distinct person.id) as count from record,orderid,person,address
where orderid.noun=record.noun and person.id=record.id and address.zipcode=orderid.zipcode 
and goods like '西瓜%' and sex like '男%' and province like '北京%';

image-20211119095515618

9.重庆渝中区的人在10:30~10:40这个时间段里购买最多的商品是什么,购买了多少次

select goods,count(goods) from orderid,address,record
where orderid.zipcode=address.zipcode and province like '重庆%' and city like '渝中%'
and orderid.noun=record.noun and time between '2021-11-15 10:30:00' and '2021-11-15 10:40:00' 
group by goods order by count(orderid.goods) desc limit 1;

image-20211119095940647

10.查询id为1000的顾客的所有信息

select * from person,record,orderid,address where record.noun=orderid.noun and address.zipcode=orderid.zipcode and person.id=record.id and person.id=1000;

image-20211119100204470

可以看出在使用presto在查询hive的数据时,语句比查询postgresql要简洁一些,可以直接使用sql语句,同时在查询时间时不再需要使用cast()函数进行数据类型转换。

JDBC连接

连接postgresql:

image-20211118162706377

测试:
public class JdbcDemo01 {

    public static void main(String[] args) throws SQLException, ClassNotFoundException{
        Connection conn = null;
        //String url = "jdbc:postgresql://hadoop01:5432/demo";
        //String user = "postgres";
        //String pwd = "201314";
        //conn = DriverManager.getConnection(url, user, pwd);
        conn = DriverManager.getConnection("jdbc:postgresql://hadoop01:5432/demo", "postgres", "201314");

        Class.forName("org.postgresql.Driver");
        Statement st = conn.createStatement();
        ResultSet rs = st.executeQuery("select id,name from Person limit 10");

        while (rs.next()) {
            System.out.println("id="+rs.getString(1)+",name is "+rs.getString(2));
        }
        rs.close();
        st.close();
        conn.close();
    }
}

image-20211119090833369

连接presto:

image-20211118162814928

测试:
public class PrestoDemo01 {

    public static void main(String[] args) throws SQLException, ClassNotFoundException {
        Class.forName("com.facebook.presto.jdbc.PrestoDriver");
        Connection connection = DriverManager.getConnection("jdbc:presto://hadoop01:8880/postgres/demo", "root", null);
        Statement st = connection.createStatement();
        ResultSet rs = st.executeQuery("select id,name from postgresql.public.person limit 10");
        while (rs.next()) {
            System.out.println("id="+rs.getString(1)+",name is "+rs.getString(2));
        }
        rs.close();
        st.close();
        connection.close();
    }
    }

image-20211119090743737

posted @ 2021-11-22 08:34  萘汝  阅读(62)  评论(0编辑  收藏  举报
我发了疯似的祝你好!