SSB数据集

Hive建表语句

CREATE database ssb;
create table ssb.customer ( c_custkey integer,
c_name string not null,
c_address string not null,
c_city string not null,
c_nation string not null,
c_region string not null,
c_phone string not null,
c_mktsegment string not null)
partitioned by (pt string)
row format delimited fields terminated by ',' lines terminated by '\n' stored as orc;
 
 
 
create table ssb.dates ( d_datekey integer,
d_date string not null,
d_dayofweek string not null,
d_month string not null,
d_year integer not null,
d_yearmonthnum integer,
d_yearmonth string not null,
d_daynuminweek integer,
d_daynuminmonth integer,
d_daynuminyear integer,
d_monthnuminyear integer,
d_weeknuminyear integer,
d_sellingseason string not null,
d_lastdayinweekfl integer,
d_lastdayinmonthfl integer,
d_holidayfl integer,
d_weekdayfl integer)
partitioned by (pt string)
row format delimited fields terminated by ',' lines terminated by '\n' stored as orc;
 
 
 
create table ssb.part ( p_partkey integer,
p_name string not null,
p_mfgr string not null,
p_category string not null,
p_brand string not null,
p_color string not null,
p_type string not null,
p_size integer not null,
p_container string not null)
partitioned by (pt string)
row format delimited fields terminated by ',' lines terminated by '\n' stored as orc;
 
 
 
create table ssb.supplier ( s_suppkey integer,
s_name string not null,
s_address string not null,
s_city string not null,
s_nation string not null,
s_region string not null,
s_phone string not null)
partitioned by (pt string)
row format delimited fields terminated by ',' lines terminated by '\n' stored as orc;
 
 
 
create table ssb.lineorder ( lo_orderkey bigint,
lo_linenumber bigint,
lo_custkey integer not null,
lo_partkey integer not null,
lo_suppkey integer not null,
lo_orderdate integer not null,
lo_orderpriotity string not null,
lo_shippriotity integer,
lo_quantity bigint,
lo_extendedprice bigint,
lo_ordtotalprice bigint,
lo_discount bigint,
lo_revenue bigint,
lo_supplycost bigint,
lo_tax bigint,
lo_commitdate integer not null,
lo_shipmode string not null,
lo_date timestamp
)
partitioned by (pt string)
row format delimited fields terminated by ',' lines terminated by '\n' stored as orc;

Mysql建表语句

create table customer ( c_custkey     integer,
                            c_name        varchar(25) not null,
                            c_address     varchar(40) not null,
                            c_city        varchar(10) not null,
                            c_nation      varchar(15) not null,
                            c_region      varchar(12) not null,
                            c_phone       varchar(15) not null,
                            c_mktsegment  varchar(10) not null);
 
create table dates ( d_datekey          integer,
                         d_date             varchar(18) not null,
                         d_dayofweek        varchar(18) not null,
                         d_month            varchar(9) not null,
                         d_year             integer not null,
                         d_yearmonthnum     integer,
                         d_yearmonth        varchar(7) not null,
                         d_daynuminweek     integer,
                         d_daynuminmonth    integer,
                         d_daynuminyear     integer,
                         d_monthnuminyear   integer,
                         d_weeknuminyear    integer,
                         d_sellingseason    varchar(12) not null,
                         d_lastdayinweekfl  integer,
                         d_lastdayinmonthfl integer,
                         d_holidayfl        integer,
                         d_weekdayfl        integer);
                          
create table part  ( p_partkey     integer,
                         p_name        varchar(22) not null,
                         p_mfgr        varchar(6) not null,
                         p_category    varchar(7) not null,
                         p_brand       varchar(9) not null,
                         p_color       varchar(11) not null,
                         p_type        varchar(25) not null,
                         p_size        integer not null,
                         p_container   varchar(10) not null);
 
create table supplier ( s_suppkey     integer,
                            s_name        varchar(25) not null,
                            s_address     varchar(25) not null,
                            s_city        varchar(10) not null,
                            s_nation      varchar(15) not null,
                            s_region      varchar(12) not null,
                            s_phone       varchar(15) not null);
 
create table lineorder ( lo_orderkey       bigint,
                         lo_linenumber     bigint,
                         lo_custkey        integer not null,
                         lo_partkey        integer not null,
                         lo_suppkey        integer not null,
                         lo_orderdate      integer not null,
                         lo_orderpriotity  varchar(15) not null,
                         lo_shippriotity   integer,
                         lo_quantity       bigint,
                         lo_extendedprice  bigint,
                         lo_ordtotalprice  bigint,
                         lo_discount       bigint,
                         lo_revenue        bigint,
                         lo_supplycost     bigint,
                         lo_tax            bigint,
                         lo_commitdate     integer not null,
                         lo_shipmode       varchar(10) not null
                         ); 

*.tbl格式数据导入mysql数据表 (路径换成自己的本地路径)

set autocommit=off;
load data infile '/dbgen/lineorder.tbl' into table LINEORDER fields terminated by '|' lines terminated by '|';
commit;

load data infile '/dbgen/supplier.tbl' into table SUPPLIER fields terminated by '|' lines terminated by '|';
commit;

load data infile '/dbgen/customer.tbl' into table CUSTOMER fields terminated by '|' lines terminated by '|';
commit;

load data infile '/dbgen/part.tbl' into table PART fields terminated by '|' lines terminated by '|';
commit;

load data infile '/dbgen/date.tbl' into table DATES fields terminated by '|' lines terminated by '|';
commit;

 oracle建表语句

CREATE USER ssb IDENTIFIED BY ssb;

GRANT CREATE SESSION,
      CREATE TABLE,
      CREATE ANY DIRECTORY,
      UNLIMITED TABLESPACE
    TO ssb;

CREATE OR REPLACE DIRECTORY ssb_dir AS '/home/oracle/ssb';

GRANT READ, WRITE ON DIRECTORY ssb_dir TO ssb;

CREATE TABLE ssb.ext_lineorder
(
    lo_orderkey        INTEGER,
    lo_linenumber      NUMBER(1, 0),
    lo_custkey         INTEGER,
    lo_partkey         INTEGER,
    lo_suppkey         INTEGER,
    lo_orderdate       INTEGER,
    lo_orderpriority   CHAR(15),
    lo_shippriority    CHAR(1),
    lo_quantity        NUMBER(2, 0),
    lo_extendedprice   NUMBER,
    lo_ordtotalprice   NUMBER,
    lo_discount        NUMBER(2, 0),
    lo_revenue         NUMBER,
    lo_supplycost      NUMBER,
    --lo_ordsupplycost   NUMBER, -- this is mentioned in 2.2 Notes(c) but isn't in the layout or sample queries, so not needed?
    lo_tax             NUMBER(1, 0),
    lo_commitdate      INTEGER,
    lo_shipmode        CHAR(10)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY ssb_dir
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('lineorder.tbl*'))
          PARALLEL 4;

CREATE TABLE ssb.lineorder
(
    lo_orderkey        INTEGER NOT NULL,
    lo_linenumber      NUMBER(1, 0) NOT NULL,
    lo_custkey         INTEGER NOT NULL,
    lo_partkey         INTEGER NOT NULL,
    lo_suppkey         INTEGER NOT NULL,
    lo_orderdate       NUMBER(8,0) NOT NULL,
    lo_orderpriority   CHAR(15) NOT NULL,
    lo_shippriority    CHAR(1) NOT NULL,
    lo_quantity        NUMBER(2, 0) NOT NULL,
    lo_extendedprice   NUMBER NOT NULL,
    lo_ordtotalprice   NUMBER NOT NULL,
    lo_discount        NUMBER(2, 0) NOT NULL,
    lo_revenue         NUMBER NOT NULL,
    lo_supplycost      NUMBER NOT NULL,
    --lo_ordsupplycost   NUMBER not null, -- this is mentioned in 2.2 Notes(c) but isn't in the layout or sample queries, so not needed?
    lo_tax             NUMBER(1, 0) NOT NULL,
    lo_commitdate      NUMBER(8,0) NOT NULL,
    lo_shipmode        CHAR(10) NOT NULL
);

CREATE TABLE ssb.ext_part
(
    p_partkey     INTEGER,
    p_name        VARCHAR2(22),
    p_mfgr        CHAR(6),
    p_category    CHAR(7),
    p_brand1      CHAR(9),
    p_color       VARCHAR2(11),
    p_type        VARCHAR2(25),
    p_size        NUMBER(2, 0),
    p_container   CHAR(10)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY ssb_dir
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('part.tbl'));

CREATE TABLE ssb.part
(
    p_partkey     INTEGER NOT NULL,
    p_name        VARCHAR2(22) NOT NULL,
    p_mfgr        CHAR(6) NOT NULL,
    p_category    CHAR(7) NOT NULL,
    p_brand1      CHAR(9) NOT NULL,
    p_color       VARCHAR2(11) NOT NULL,
    p_type        VARCHAR2(25) NOT NULL,
    p_size        NUMBER(2, 0) NOT NULL,
    p_container   CHAR(10) NOT NULL
);

CREATE TABLE ssb.ext_supplier
(
    s_suppkey   INTEGER,
    s_name      CHAR(25),
    s_address   VARCHAR2(25),
    s_city      CHAR(10),
    s_nation    CHAR(15),
    s_region    CHAR(12),
    s_phone     CHAR(15)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY ssb_dir
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('supplier.tbl'));

CREATE TABLE ssb.supplier
(
    s_suppkey   INTEGER NOT NULL,
    s_name      CHAR(25) NOT NULL,
    s_address   VARCHAR2(25) NOT NULL,
    s_city      CHAR(10) NOT NULL,
    s_nation    CHAR(15) NOT NULL,
    s_region    CHAR(12) NOT NULL,
    s_phone     CHAR(15) NOT NULL
);

CREATE TABLE ssb.ext_customer
(
    c_custkey      INTEGER,
    c_name         VARCHAR2(25),
    c_address      VARCHAR2(25),
    c_city         CHAR(10),
    c_nation       CHAR(15),
    c_region       CHAR(12),
    c_phone        CHAR(15),
    c_mktsegment   CHAR(10)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY ssb_dir
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('customer.tbl'));

CREATE TABLE ssb.customer
(
    c_custkey      INTEGER NOT NULL,
    c_name         VARCHAR2(25) NOT NULL,
    c_address      VARCHAR2(25) NOT NULL,
    c_city         CHAR(10) NOT NULL,
    c_nation       CHAR(15) NOT NULL,
    c_region       CHAR(12) NOT NULL,
    c_phone        CHAR(15) NOT NULL,
    c_mktsegment   CHAR(10) NOT NULL
);

CREATE TABLE ssb.ext_date_dim
(
    d_datekey            NUMBER(8,0),
    d_date               CHAR(18),
    d_dayofweek          CHAR(9),    -- defined in Section 2.6 as Size 8, but Wednesday is 9 letters
    d_month              CHAR(9),
    d_year               NUMBER(4, 0),
    d_yearmonthnum       NUMBER(6, 0),
    d_yearmonth          CHAR(7),
    d_daynuminweek       NUMBER(1, 0),
    d_daynuminmonth      NUMBER(2, 0),
    d_daynuminyear       NUMBER(3, 0),
    d_monthnuminyear     NUMBER(2, 0),
    d_weeknuminyear      NUMBER(2, 0),
    d_sellingseason      CHAR(12),
    d_lastdayinweekfl    NUMBER(1, 0),
    d_lastdayinmonthfl   NUMBER(1, 0),
    d_holidayfl          NUMBER(1, 0),
    d_weekdayfl          NUMBER(1, 0)
)
ORGANIZATION EXTERNAL
    (TYPE oracle_loader
          DEFAULT DIRECTORY ssb_dir
              ACCESS PARAMETERS (
                  FIELDS
                      TERMINATED BY '|'
                  MISSING FIELD VALUES ARE NULL
              )
          LOCATION('date.tbl'));

CREATE TABLE ssb.date_dim
(
    d_datekey            NUMBER(8,0) NOT NULL,
    d_date               CHAR(18) NOT NULL,
    d_dayofweek          CHAR(9) NOT NULL,    -- defined in Section 2.6 as Size 8, but Wednesday is 9 letters
    d_month              CHAR(9) NOT NULL,
    d_year               NUMBER(4, 0) NOT NULL,
    d_yearmonthnum       NUMBER(6, 0) NOT NULL,
    d_yearmonth          CHAR(7) NOT NULL,
    d_daynuminweek       NUMBER(1, 0) NOT NULL,
    d_daynuminmonth      NUMBER(2, 0) NOT NULL,
    d_daynuminyear       NUMBER(3, 0) NOT NULL,
    d_monthnuminyear     NUMBER(2, 0) NOT NULL,
    d_weeknuminyear      NUMBER(2, 0) NOT NULL,
    d_sellingseason      CHAR(12) NOT NULL,
    d_lastdayinweekfl    NUMBER(1, 0) NOT NULL,
    d_lastdayinmonthfl   NUMBER(1, 0) NOT NULL,
    d_holidayfl          NUMBER(1, 0) NOT NULL,
    d_weekdayfl          NUMBER(1, 0) NOT NULL
);

 将数据导入oracle数据表

TRUNCATE TABLE ssb.lineorder;
TRUNCATE TABLE ssb.part;
TRUNCATE TABLE ssb.supplier;
TRUNCATE TABLE ssb.customer;
TRUNCATE TABLE ssb.date_dim;

ALTER TABLE ssb.lineorder PARALLEL 4;
ALTER SESSION ENABLE PARALLEL DML;

INSERT /*+ APPEND */ INTO  ssb.part      SELECT * FROM ssb.ext_part;
commit;
INSERT /*+ APPEND */ INTO  ssb.supplier  SELECT * FROM ssb.ext_supplier;
commit;
INSERT /*+ APPEND */ INTO  ssb.customer  SELECT * FROM ssb.ext_customer;
commit;
INSERT /*+ APPEND */ INTO  ssb.date_dim  SELECT * FROM ssb.ext_date_dim;
commit;
INSERT /*+ APPEND */ INTO  ssb.lineorder SELECT * FROM ssb.ext_lineorder;
commit;

最后,添加索引和约束  --这里需要修改,应为将*.tbl文件导入oracle

ALTER TABLE ssb.lineorder
    ADD CONSTRAINT pk_lineorder PRIMARY KEY(lo_orderkey, lo_linenumber);

ALTER TABLE ssb.part
    ADD CONSTRAINT pk_part PRIMARY KEY(p_partkey);

ALTER TABLE ssb.supplier
    ADD CONSTRAINT pk_supplier PRIMARY KEY(s_suppkey);

ALTER TABLE ssb.customer
    ADD CONSTRAINT pk_customer PRIMARY KEY(c_custkey);

ALTER TABLE ssb.date_dim
    ADD CONSTRAINT pk_date_dim PRIMARY KEY(d_datekey);

---

ALTER TABLE ssb.lineorder
    ADD CONSTRAINT fk_lineitem_customer FOREIGN KEY(lo_custkey) REFERENCES ssb.customer(c_custkey);

ALTER TABLE ssb.lineorder
    ADD CONSTRAINT fk_lineitem_part FOREIGN KEY(lo_partkey) REFERENCES ssb.part(p_partkey);

ALTER TABLE ssb.lineorder
    ADD CONSTRAINT fk_lineitem_supplier FOREIGN KEY(lo_suppkey) REFERENCES ssb.supplier(s_suppkey);

ALTER TABLE ssb.lineorder
    ADD CONSTRAINT fk_lineitem_orderdate FOREIGN KEY(lo_orderdate) REFERENCES ssb.date_dim(d_datekey);

ALTER TABLE ssb.lineorder
    ADD CONSTRAINT fk_lineitem_commitdate FOREIGN KEY(lo_commitdate) REFERENCES ssb.date_dim(d_datekey);

 ssb的视图查询语句

SELECT
  dwdate_hier.member_name as year,
  part_hier.member_name as part,
  customer_hier.c_region,
  customer_hier.member_name as customer,
  lo_quantity,
  lo_revenue
FROM  ssb.ssb_av
  HIERARCHIES (
    dwdate_hier,
    part_hier,
    customer_hier)
WHERE
  dwdate_hier.d_year = '1998'
  AND dwdate_hier.level_name = 'MONTH'
  AND part_hier.level_name = 'MANUFACTURER'
  AND customer_hier.c_region = 'AMERICA'
  AND customer_hier.level_name = 'NATION'
ORDER BY
  dwdate_hier.hier_order,
  part_hier.hier_order,
  customer_hier.hier_order;
 
SELECT
  dwdate_hier.member_name as time,
  part_hier.member_name as part,
  customer_hier.member_name as customer,
  supplier_hier.member_name as supplier,
  lo_quantity,
  lo_supplycost
FROM  ssb.ssb_av
  HIERARCHIES (
    dwdate_hier,
    part_hier,
    customer_hier,
    supplier_hier)
WHERE
  dwdate_hier.d_year = '1998'
  AND dwdate_hier.level_name = 'MONTH'
  AND part_hier.level_name = 'MANUFACTURER'
  AND customer_hier.c_region = 'AMERICA'
  AND customer_hier.c_nation = 'CANADA'
  AND customer_hier.level_name = 'CITY'
  AND supplier_hier.s_region = 'ASIA'
  AND supplier_hier.level_name = 'REGION'
ORDER BY
  dwdate_hier.hier_order,
  part_hier.hier_order,
  customer_hier.hier_order,
  supplier_hier.hier_order; 
 
SELECT
  dwdate_hier.member_name as year,
  part_hier.member_name as part,
  customer_hier.member_name as customer,
  supplier_hier.member_name as supplier,
  lo_quantity,
  lo_revenue,
  lo_supplycost  
FROM  ssb.ssb_av
  HIERARCHIES (
    dwdate_hier,
    part_hier,
    customer_hier,
    supplier_hier)
WHERE
  dwdate_hier.d_yearmonth = 'Apr1998'
  AND dwdate_hier.level_name = 'DAY'
  AND part_hier.level_name = 'MANUFACTURER'
  AND customer_hier.c_region = 'AMERICA'
  AND customer_hier.c_nation = 'CANADA'
  AND customer_hier.level_name = 'CITY'
  AND supplier_hier.level_name = 'REGION'
ORDER BY
  dwdate_hier.hier_order,
  part_hier.hier_order,
  customer_hier.hier_order,
  supplier_hier.hier_order;
  
SELECT
  dwdate_hier.member_name as year,
  part_hier.member_name as part,
  supplier_hier.member_name as supplier,
  lo_quantity,
  lo_extendedprice,
  lo_ordtotalprice,
  lo_revenue,
  lo_supplycost  
FROM  ssb.ssb_av
  HIERARCHIES (
    dwdate_hier,
    part_hier,
    supplier_hier)
WHERE
  dwdate_hier.level_name = 'YEAR'
  AND part_hier.level_name = 'MANUFACTURER'
  AND supplier_hier.level_name = 'SUPPLIER'
  AND supplier_hier.s_suppkey = '23997';  
  
SELECT
  dwdate_hier.member_name as time,
  part_hier.p_container,
  part_hier.member_name as part,
  lo_quantity,
  lo_extendedprice,
  lo_ordtotalprice,
  lo_revenue,
  lo_supplycost  
FROM  ssb.ssb_av
  HIERARCHIES (
    dwdate_hier,
    part_hier)
WHERE
  dwdate_hier.member_name = 'June 10, 1998     '
  AND dwdate_hier.level_name = 'DAY'
  AND part_hier.level_name = 'PART'
  AND part_hier.p_size = 32;

SELECT
  dwdate_hier.member_name as time,
  part_hier.member_name as part,
  part_hier.p_name,
  part_hier.p_color,
  lo_quantity,
  lo_revenue,
  lo_supplycost,
  lo_revenue - lo_supplycost as profit
FROM  ssb.ssb_av
  HIERARCHIES (
    dwdate_hier,
    part_hier)
WHERE
  dwdate_hier.d_yearmonth = 'Aug1996'
  AND dwdate_hier.d_dayofweek = 'Friday   '
  AND dwdate_hier.level_name = 'DAY'
  AND part_hier.level_name = 'PART'
  AND part_hier.p_color in ('ivory','coral')
ORDER BY
  dwdate_hier.hier_order,
  part_hier.hier_order;

 

posted @ 2022-08-11 10:05  一只艾米果  阅读(214)  评论(0编辑  收藏  举报