SSB数据集
Hive建表语句
CREATE database ssb; create table ssb.customer ( c_custkey integer, c_name string not null, c_address string not null, c_city string not null, c_nation string not null, c_region string not null, c_phone string not null, c_mktsegment string not null) partitioned by (pt string) row format delimited fields terminated by ',' lines terminated by '\n' stored as orc; create table ssb.dates ( d_datekey integer, d_date string not null, d_dayofweek string not null, d_month string not null, d_year integer not null, d_yearmonthnum integer, d_yearmonth string not null, d_daynuminweek integer, d_daynuminmonth integer, d_daynuminyear integer, d_monthnuminyear integer, d_weeknuminyear integer, d_sellingseason string not null, d_lastdayinweekfl integer, d_lastdayinmonthfl integer, d_holidayfl integer, d_weekdayfl integer) partitioned by (pt string) row format delimited fields terminated by ',' lines terminated by '\n' stored as orc; create table ssb.part ( p_partkey integer, p_name string not null, p_mfgr string not null, p_category string not null, p_brand string not null, p_color string not null, p_type string not null, p_size integer not null, p_container string not null) partitioned by (pt string) row format delimited fields terminated by ',' lines terminated by '\n' stored as orc; create table ssb.supplier ( s_suppkey integer, s_name string not null, s_address string not null, s_city string not null, s_nation string not null, s_region string not null, s_phone string not null) partitioned by (pt string) row format delimited fields terminated by ',' lines terminated by '\n' stored as orc; create table ssb.lineorder ( lo_orderkey bigint, lo_linenumber bigint, lo_custkey integer not null, lo_partkey integer not null, lo_suppkey integer not null, lo_orderdate integer not null, lo_orderpriotity string not null, lo_shippriotity integer, lo_quantity bigint, lo_extendedprice bigint, lo_ordtotalprice bigint, lo_discount bigint, lo_revenue bigint, lo_supplycost bigint, lo_tax bigint, lo_commitdate integer not null, lo_shipmode string not null, lo_date timestamp ) partitioned by (pt string) row format delimited fields terminated by ',' lines terminated by '\n' stored as orc;
Mysql建表语句
create table customer ( c_custkey integer, c_name varchar(25) not null, c_address varchar(40) not null, c_city varchar(10) not null, c_nation varchar(15) not null, c_region varchar(12) not null, c_phone varchar(15) not null, c_mktsegment varchar(10) not null); create table dates ( d_datekey integer, d_date varchar(18) not null, d_dayofweek varchar(18) not null, d_month varchar(9) not null, d_year integer not null, d_yearmonthnum integer, d_yearmonth varchar(7) not null, d_daynuminweek integer, d_daynuminmonth integer, d_daynuminyear integer, d_monthnuminyear integer, d_weeknuminyear integer, d_sellingseason varchar(12) not null, d_lastdayinweekfl integer, d_lastdayinmonthfl integer, d_holidayfl integer, d_weekdayfl integer); create table part ( p_partkey integer, p_name varchar(22) not null, p_mfgr varchar(6) not null, p_category varchar(7) not null, p_brand varchar(9) not null, p_color varchar(11) not null, p_type varchar(25) not null, p_size integer not null, p_container varchar(10) not null); create table supplier ( s_suppkey integer, s_name varchar(25) not null, s_address varchar(25) not null, s_city varchar(10) not null, s_nation varchar(15) not null, s_region varchar(12) not null, s_phone varchar(15) not null); create table lineorder ( lo_orderkey bigint, lo_linenumber bigint, lo_custkey integer not null, lo_partkey integer not null, lo_suppkey integer not null, lo_orderdate integer not null, lo_orderpriotity varchar(15) not null, lo_shippriotity integer, lo_quantity bigint, lo_extendedprice bigint, lo_ordtotalprice bigint, lo_discount bigint, lo_revenue bigint, lo_supplycost bigint, lo_tax bigint, lo_commitdate integer not null, lo_shipmode varchar(10) not null );
*.tbl格式数据导入mysql数据表 (路径换成自己的本地路径)
set autocommit=off; load data infile '/dbgen/lineorder.tbl' into table LINEORDER fields terminated by '|' lines terminated by '|'; commit; load data infile '/dbgen/supplier.tbl' into table SUPPLIER fields terminated by '|' lines terminated by '|'; commit; load data infile '/dbgen/customer.tbl' into table CUSTOMER fields terminated by '|' lines terminated by '|'; commit; load data infile '/dbgen/part.tbl' into table PART fields terminated by '|' lines terminated by '|'; commit; load data infile '/dbgen/date.tbl' into table DATES fields terminated by '|' lines terminated by '|'; commit;
oracle建表语句
CREATE USER ssb IDENTIFIED BY ssb; GRANT CREATE SESSION, CREATE TABLE, CREATE ANY DIRECTORY, UNLIMITED TABLESPACE TO ssb; CREATE OR REPLACE DIRECTORY ssb_dir AS '/home/oracle/ssb'; GRANT READ, WRITE ON DIRECTORY ssb_dir TO ssb; CREATE TABLE ssb.ext_lineorder ( lo_orderkey INTEGER, lo_linenumber NUMBER(1, 0), lo_custkey INTEGER, lo_partkey INTEGER, lo_suppkey INTEGER, lo_orderdate INTEGER, lo_orderpriority CHAR(15), lo_shippriority CHAR(1), lo_quantity NUMBER(2, 0), lo_extendedprice NUMBER, lo_ordtotalprice NUMBER, lo_discount NUMBER(2, 0), lo_revenue NUMBER, lo_supplycost NUMBER, --lo_ordsupplycost NUMBER, -- this is mentioned in 2.2 Notes(c) but isn't in the layout or sample queries, so not needed? lo_tax NUMBER(1, 0), lo_commitdate INTEGER, lo_shipmode CHAR(10) ) ORGANIZATION EXTERNAL (TYPE oracle_loader DEFAULT DIRECTORY ssb_dir ACCESS PARAMETERS ( FIELDS TERMINATED BY '|' MISSING FIELD VALUES ARE NULL ) LOCATION('lineorder.tbl*')) PARALLEL 4; CREATE TABLE ssb.lineorder ( lo_orderkey INTEGER NOT NULL, lo_linenumber NUMBER(1, 0) NOT NULL, lo_custkey INTEGER NOT NULL, lo_partkey INTEGER NOT NULL, lo_suppkey INTEGER NOT NULL, lo_orderdate NUMBER(8,0) NOT NULL, lo_orderpriority CHAR(15) NOT NULL, lo_shippriority CHAR(1) NOT NULL, lo_quantity NUMBER(2, 0) NOT NULL, lo_extendedprice NUMBER NOT NULL, lo_ordtotalprice NUMBER NOT NULL, lo_discount NUMBER(2, 0) NOT NULL, lo_revenue NUMBER NOT NULL, lo_supplycost NUMBER NOT NULL, --lo_ordsupplycost NUMBER not null, -- this is mentioned in 2.2 Notes(c) but isn't in the layout or sample queries, so not needed? lo_tax NUMBER(1, 0) NOT NULL, lo_commitdate NUMBER(8,0) NOT NULL, lo_shipmode CHAR(10) NOT NULL ); CREATE TABLE ssb.ext_part ( p_partkey INTEGER, p_name VARCHAR2(22), p_mfgr CHAR(6), p_category CHAR(7), p_brand1 CHAR(9), p_color VARCHAR2(11), p_type VARCHAR2(25), p_size NUMBER(2, 0), p_container CHAR(10) ) ORGANIZATION EXTERNAL (TYPE oracle_loader DEFAULT DIRECTORY ssb_dir ACCESS PARAMETERS ( FIELDS TERMINATED BY '|' MISSING FIELD VALUES ARE NULL ) LOCATION('part.tbl')); CREATE TABLE ssb.part ( p_partkey INTEGER NOT NULL, p_name VARCHAR2(22) NOT NULL, p_mfgr CHAR(6) NOT NULL, p_category CHAR(7) NOT NULL, p_brand1 CHAR(9) NOT NULL, p_color VARCHAR2(11) NOT NULL, p_type VARCHAR2(25) NOT NULL, p_size NUMBER(2, 0) NOT NULL, p_container CHAR(10) NOT NULL ); CREATE TABLE ssb.ext_supplier ( s_suppkey INTEGER, s_name CHAR(25), s_address VARCHAR2(25), s_city CHAR(10), s_nation CHAR(15), s_region CHAR(12), s_phone CHAR(15) ) ORGANIZATION EXTERNAL (TYPE oracle_loader DEFAULT DIRECTORY ssb_dir ACCESS PARAMETERS ( FIELDS TERMINATED BY '|' MISSING FIELD VALUES ARE NULL ) LOCATION('supplier.tbl')); CREATE TABLE ssb.supplier ( s_suppkey INTEGER NOT NULL, s_name CHAR(25) NOT NULL, s_address VARCHAR2(25) NOT NULL, s_city CHAR(10) NOT NULL, s_nation CHAR(15) NOT NULL, s_region CHAR(12) NOT NULL, s_phone CHAR(15) NOT NULL ); CREATE TABLE ssb.ext_customer ( c_custkey INTEGER, c_name VARCHAR2(25), c_address VARCHAR2(25), c_city CHAR(10), c_nation CHAR(15), c_region CHAR(12), c_phone CHAR(15), c_mktsegment CHAR(10) ) ORGANIZATION EXTERNAL (TYPE oracle_loader DEFAULT DIRECTORY ssb_dir ACCESS PARAMETERS ( FIELDS TERMINATED BY '|' MISSING FIELD VALUES ARE NULL ) LOCATION('customer.tbl')); CREATE TABLE ssb.customer ( c_custkey INTEGER NOT NULL, c_name VARCHAR2(25) NOT NULL, c_address VARCHAR2(25) NOT NULL, c_city CHAR(10) NOT NULL, c_nation CHAR(15) NOT NULL, c_region CHAR(12) NOT NULL, c_phone CHAR(15) NOT NULL, c_mktsegment CHAR(10) NOT NULL ); CREATE TABLE ssb.ext_date_dim ( d_datekey NUMBER(8,0), d_date CHAR(18), d_dayofweek CHAR(9), -- defined in Section 2.6 as Size 8, but Wednesday is 9 letters d_month CHAR(9), d_year NUMBER(4, 0), d_yearmonthnum NUMBER(6, 0), d_yearmonth CHAR(7), d_daynuminweek NUMBER(1, 0), d_daynuminmonth NUMBER(2, 0), d_daynuminyear NUMBER(3, 0), d_monthnuminyear NUMBER(2, 0), d_weeknuminyear NUMBER(2, 0), d_sellingseason CHAR(12), d_lastdayinweekfl NUMBER(1, 0), d_lastdayinmonthfl NUMBER(1, 0), d_holidayfl NUMBER(1, 0), d_weekdayfl NUMBER(1, 0) ) ORGANIZATION EXTERNAL (TYPE oracle_loader DEFAULT DIRECTORY ssb_dir ACCESS PARAMETERS ( FIELDS TERMINATED BY '|' MISSING FIELD VALUES ARE NULL ) LOCATION('date.tbl')); CREATE TABLE ssb.date_dim ( d_datekey NUMBER(8,0) NOT NULL, d_date CHAR(18) NOT NULL, d_dayofweek CHAR(9) NOT NULL, -- defined in Section 2.6 as Size 8, but Wednesday is 9 letters d_month CHAR(9) NOT NULL, d_year NUMBER(4, 0) NOT NULL, d_yearmonthnum NUMBER(6, 0) NOT NULL, d_yearmonth CHAR(7) NOT NULL, d_daynuminweek NUMBER(1, 0) NOT NULL, d_daynuminmonth NUMBER(2, 0) NOT NULL, d_daynuminyear NUMBER(3, 0) NOT NULL, d_monthnuminyear NUMBER(2, 0) NOT NULL, d_weeknuminyear NUMBER(2, 0) NOT NULL, d_sellingseason CHAR(12) NOT NULL, d_lastdayinweekfl NUMBER(1, 0) NOT NULL, d_lastdayinmonthfl NUMBER(1, 0) NOT NULL, d_holidayfl NUMBER(1, 0) NOT NULL, d_weekdayfl NUMBER(1, 0) NOT NULL );
将数据导入oracle数据表
TRUNCATE TABLE ssb.lineorder; TRUNCATE TABLE ssb.part; TRUNCATE TABLE ssb.supplier; TRUNCATE TABLE ssb.customer; TRUNCATE TABLE ssb.date_dim; ALTER TABLE ssb.lineorder PARALLEL 4; ALTER SESSION ENABLE PARALLEL DML; INSERT /*+ APPEND */ INTO ssb.part SELECT * FROM ssb.ext_part; commit; INSERT /*+ APPEND */ INTO ssb.supplier SELECT * FROM ssb.ext_supplier; commit; INSERT /*+ APPEND */ INTO ssb.customer SELECT * FROM ssb.ext_customer; commit; INSERT /*+ APPEND */ INTO ssb.date_dim SELECT * FROM ssb.ext_date_dim; commit; INSERT /*+ APPEND */ INTO ssb.lineorder SELECT * FROM ssb.ext_lineorder; commit;
最后,添加索引和约束 --这里需要修改,应为将*.tbl文件导入oracle
ALTER TABLE ssb.lineorder ADD CONSTRAINT pk_lineorder PRIMARY KEY(lo_orderkey, lo_linenumber); ALTER TABLE ssb.part ADD CONSTRAINT pk_part PRIMARY KEY(p_partkey); ALTER TABLE ssb.supplier ADD CONSTRAINT pk_supplier PRIMARY KEY(s_suppkey); ALTER TABLE ssb.customer ADD CONSTRAINT pk_customer PRIMARY KEY(c_custkey); ALTER TABLE ssb.date_dim ADD CONSTRAINT pk_date_dim PRIMARY KEY(d_datekey); --- ALTER TABLE ssb.lineorder ADD CONSTRAINT fk_lineitem_customer FOREIGN KEY(lo_custkey) REFERENCES ssb.customer(c_custkey); ALTER TABLE ssb.lineorder ADD CONSTRAINT fk_lineitem_part FOREIGN KEY(lo_partkey) REFERENCES ssb.part(p_partkey); ALTER TABLE ssb.lineorder ADD CONSTRAINT fk_lineitem_supplier FOREIGN KEY(lo_suppkey) REFERENCES ssb.supplier(s_suppkey); ALTER TABLE ssb.lineorder ADD CONSTRAINT fk_lineitem_orderdate FOREIGN KEY(lo_orderdate) REFERENCES ssb.date_dim(d_datekey); ALTER TABLE ssb.lineorder ADD CONSTRAINT fk_lineitem_commitdate FOREIGN KEY(lo_commitdate) REFERENCES ssb.date_dim(d_datekey);
ssb的视图查询语句
SELECT dwdate_hier.member_name as year, part_hier.member_name as part, customer_hier.c_region, customer_hier.member_name as customer, lo_quantity, lo_revenue FROM ssb.ssb_av HIERARCHIES ( dwdate_hier, part_hier, customer_hier) WHERE dwdate_hier.d_year = '1998' AND dwdate_hier.level_name = 'MONTH' AND part_hier.level_name = 'MANUFACTURER' AND customer_hier.c_region = 'AMERICA' AND customer_hier.level_name = 'NATION' ORDER BY dwdate_hier.hier_order, part_hier.hier_order, customer_hier.hier_order; SELECT dwdate_hier.member_name as time, part_hier.member_name as part, customer_hier.member_name as customer, supplier_hier.member_name as supplier, lo_quantity, lo_supplycost FROM ssb.ssb_av HIERARCHIES ( dwdate_hier, part_hier, customer_hier, supplier_hier) WHERE dwdate_hier.d_year = '1998' AND dwdate_hier.level_name = 'MONTH' AND part_hier.level_name = 'MANUFACTURER' AND customer_hier.c_region = 'AMERICA' AND customer_hier.c_nation = 'CANADA' AND customer_hier.level_name = 'CITY' AND supplier_hier.s_region = 'ASIA' AND supplier_hier.level_name = 'REGION' ORDER BY dwdate_hier.hier_order, part_hier.hier_order, customer_hier.hier_order, supplier_hier.hier_order; SELECT dwdate_hier.member_name as year, part_hier.member_name as part, customer_hier.member_name as customer, supplier_hier.member_name as supplier, lo_quantity, lo_revenue, lo_supplycost FROM ssb.ssb_av HIERARCHIES ( dwdate_hier, part_hier, customer_hier, supplier_hier) WHERE dwdate_hier.d_yearmonth = 'Apr1998' AND dwdate_hier.level_name = 'DAY' AND part_hier.level_name = 'MANUFACTURER' AND customer_hier.c_region = 'AMERICA' AND customer_hier.c_nation = 'CANADA' AND customer_hier.level_name = 'CITY' AND supplier_hier.level_name = 'REGION' ORDER BY dwdate_hier.hier_order, part_hier.hier_order, customer_hier.hier_order, supplier_hier.hier_order; SELECT dwdate_hier.member_name as year, part_hier.member_name as part, supplier_hier.member_name as supplier, lo_quantity, lo_extendedprice, lo_ordtotalprice, lo_revenue, lo_supplycost FROM ssb.ssb_av HIERARCHIES ( dwdate_hier, part_hier, supplier_hier) WHERE dwdate_hier.level_name = 'YEAR' AND part_hier.level_name = 'MANUFACTURER' AND supplier_hier.level_name = 'SUPPLIER' AND supplier_hier.s_suppkey = '23997'; SELECT dwdate_hier.member_name as time, part_hier.p_container, part_hier.member_name as part, lo_quantity, lo_extendedprice, lo_ordtotalprice, lo_revenue, lo_supplycost FROM ssb.ssb_av HIERARCHIES ( dwdate_hier, part_hier) WHERE dwdate_hier.member_name = 'June 10, 1998 ' AND dwdate_hier.level_name = 'DAY' AND part_hier.level_name = 'PART' AND part_hier.p_size = 32; SELECT dwdate_hier.member_name as time, part_hier.member_name as part, part_hier.p_name, part_hier.p_color, lo_quantity, lo_revenue, lo_supplycost, lo_revenue - lo_supplycost as profit FROM ssb.ssb_av HIERARCHIES ( dwdate_hier, part_hier) WHERE dwdate_hier.d_yearmonth = 'Aug1996' AND dwdate_hier.d_dayofweek = 'Friday ' AND dwdate_hier.level_name = 'DAY' AND part_hier.level_name = 'PART' AND part_hier.p_color in ('ivory','coral') ORDER BY dwdate_hier.hier_order, part_hier.hier_order;