【TPC-H】ClickHouse多集群部署测试

一、测试流程

1. 搭建和配置clickhouse环境:参见文章 https://www.cnblogs.com/syw20170419/p/16250500.html
2. 安装TPC-H环境,参见文章 https://www.cnblogs.com/syw20170419/p/16262806.html
3. 生成测试数据
  a. cd /root/tpcH/tools/tpch-kit/dbgen
  b. ./dbgen -vf -s 1
  c. ls | grep '.*.tbl'
  d. mv *.tbl /root/tpcH/data/1G    将数据移入到data文件下
4. 进入clickhouse:clickhouse-client -h 192.168.30.117 --port 9000 -u test_clickhouse --password Aa123456
5. 创建数据库:create database tpch;
6. 手动常见8张表,创建表的sql见create_table.sql
7. 导入数据到tpch库。数据量小,比如1G,则可用“数据导入-小数据”中的脚本,如果数据量大,如1T,则用“数据导入-大数据”的脚本
8. 执行SQL查询,并记录查询时间
  a. 创建目录:/root/tpcH/script/query/ck
  b. ck文件夹中,存放每个SQL,如query1.sql、query2.sql……
  c. 创建目录文件/root/tpcH/script/query/query_AutoTest.sh
  d. query_AutoTest.sh文件中添加脚本,修改数据库的配置参数
  e. Chmod u+x query_AutoTest.sh
  f. query目录下,执行 ./query_AutoTest.sh
  g. 即可看到执行的结果
 

二、创建表(本地表) 

#本地表,每台节点(118、117、116、115)的数据库中都要去执行
create table tpch.lineitem_local ( 
l_orderkey    bigint,
l_partkey     bigint,
l_suppkey     bigint,
l_linenumber  bigint,
l_quantity    decimal(15,2) ,
l_extendedprice  decimal(15,2) ,
l_discount    decimal(15,2) ,
l_tax         decimal(15,2) ,
l_returnflag  char(1) ,
l_linestatus  char(1) ,
l_shipdate    date ,
l_commitdate  date ,
l_receiptdate date ,
l_shipinstruct char(25) ,
l_shipmode     char(10) ,
l_comment      varchar(44) )engine=MergeTree
order by (l_shipdate,l_returnflag,l_linestatus);
 
 
create table tpch.nation_local  ( 
n_nationkey  bigint,
n_name       char(25) ,
n_regionkey  bigint,
n_comment    varchar(152)
)engine=MergeTree order by (n_name,n_regionkey);
 
 
create table tpch.region_local  ( 
r_regionkey  bigint,
r_name       char(25) ,
r_comment    varchar(152)
)engine=MergeTree order by (r_name);
 
 
create table tpch.part_local  ( 
p_partkey     bigint,
p_name        varchar(55) ,
p_mfgr        char(25) ,
p_brand       char(10) ,
p_type        varchar(25) ,
p_size        bigint,
p_container   char(10) ,
p_retailprice decimal(15,2) ,
p_comment     varchar(23)  
)engine=MergeTree order by (p_name,p_mfgr);
  
create table tpch.supplier_local ( 
s_suppkey     bigint,
s_name        char(25) ,
s_address     varchar(40) ,
s_nationkey   bigint,
s_phone       char(15) ,
s_acctbal     decimal(15,2) ,
s_comment     varchar(101) 
)engine=MergeTree order by (s_suppkey,s_name);
 
 
create table tpch.partsupp_local ( 
ps_partkey     bigint,
ps_suppkey     bigint,
ps_availqty    bigint,
ps_supplycost  decimal(15,2)  ,
ps_comment     varchar(199)  
)engine=MergeTree order by (ps_partkey,ps_suppkey);
 
create table tpch.customer_local ( 
c_custkey     bigint,
c_name        varchar(25) ,
c_address     varchar(40) ,
c_nationkey   bigint,
c_phone       char(15) ,
c_acctbal     decimal(15,2)   ,
c_mktsegment  char(10) ,
c_comment     varchar(117) 
)engine=MergeTree order by (c_custkey,c_name);
 
create table tpch.orders_local  ( 
o_orderkey       bigint,
o_custkey        bigint,
o_orderstatus    char(1) ,
o_totalprice     decimal(15,2) ,
o_orderdate      date ,
o_orderpriority  char(15) ,  
o_clerk          char(15) , 
o_shippriority   bigint,
o_comment        varchar(79) 
)engine=MergeTree order by (o_orderkey,o_custkey);
 
#用于测试query 15 (如果是分布式表,在分布式表创建后,再次执行此条SQL)
create view revenue as
   select
        l_suppkey as supplier_no,
        sum(l_extendedprice * (1 - l_discount)) as total_revenue
    from
        lineitem
   where
       l_shipdate >= toDate('1994-08-01')
       and l_shipdate < date_sub(month,-3,toDate('1994-08-01')) 
    group by
       l_suppkey;

三、创建表(分布式表)

#分布式表,只需要在118上执行即可
CREATE TABLE tpch.lineitem AS lineitem_local ENGINE = Distributed(ck_cluster, tpch, lineitem_local, rand());
 
CREATE TABLE tpch.nation AS nation_local ENGINE = Distributed(ck_cluster, tpch, nation_local, rand());
 
CREATE TABLE tpch.region AS region_local ENGINE = Distributed(ck_cluster, tpch, region_local, rand());
 
CREATE TABLE tpch.part AS part_local ENGINE = Distributed(ck_cluster, tpch, part_local, rand());
 
CREATE TABLE tpch.supplier AS supplier_local ENGINE = Distributed(ck_cluster, tpch, supplier_local, rand());
 
CREATE TABLE tpch.partsupp AS partsupp_local ENGINE = Distributed(ck_cluster, tpch, partsupp_local, rand());
 
CREATE TABLE tpch.customer AS customer_local ENGINE = Distributed(ck_cluster, tpch, customer_local, rand());
 
CREATE TABLE tpch.orders AS orders_local ENGINE = Distributed(ck_cluster, tpch, orders_local, rand());

四、导入数据

#!/bin/bash
 
echo "region---start test run at" `date "+%Y-%m-%d %H:%M:%S"`
clickhouse-client -h 192.168.30.118 --port 9000 -u default --password "" -d tpch_single --format_csv_delimiter="|" --query="insert into tpch_single.region format CSV" < /data8/tpcH/data/10g/region.tbl;
echo "region---end test run at" `date "+%Y-%m-%d %H:%M:%S"`
 
echo "nation---start test run at" `date "+%Y-%m-%d %H:%M:%S"`
clickhouse-client -h 192.168.30.118 --port 9000 -u default --password "" -d tpch_single --format_csv_delimiter="|" --query="insert into tpch_single.nation format CSV" < /data8/tpcH/data/10g/nation.tbl;
echo "nation---end test run at" `date "+%Y-%m-%d %H:%M:%S"`
 
echo "supplier---start test run at" `date "+%Y-%m-%d %H:%M:%S"`
clickhouse-client -h 192.168.30.118 --port 9000 -u default --password "" -d tpch_single --format_csv_delimiter="|" --query="insert into tpch_single.supplier format CSV" < /data8/tpcH/data/10g/supplier.tbl;
echo "supplier---end test run at" `date "+%Y-%m-%d %H:%M:%S"`
 
echo "part---start test run at" `date "+%Y-%m-%d %H:%M:%S"`
clickhouse-client -h 192.168.30.118 --port 9000 -u default --password "" -d tpch_single --format_csv_delimiter="|" --query="insert into tpch_single.part format CSV" < /data8/tpcH/data/10g/part.tbl;
echo "part---end test run at" `date "+%Y-%m-%d %H:%M:%S"`
 
echo "customer---start test run at" `date "+%Y-%m-%d %H:%M:%S"`
clickhouse-client -h 192.168.30.118 --port 9000 -u default --password "" -d tpch_single --format_csv_delimiter="|" --query="insert into tpch_single.customer format CSV" < /data8/tpcH/data/10g/customer.tbl;
echo "customer---end test run at" `date "+%Y-%m-%d %H:%M:%S"`
 
echo "lineitem---start test run at" `date "+%Y-%m-%d %H:%M:%S"`
clickhouse-client -h 192.168.30.118 --port 9000 -u default --password "" -d tpch_single --format_csv_delimiter="|" --query="insert into tpch_single.lineitem format CSV" < /data8/tpcH/data/10g/lineitem.tbl;
echo "lineitem---end test run at" `date "+%Y-%m-%d %H:%M:%S"`
 
echo "orders---start test run at" `date "+%Y-%m-%d %H:%M:%S"`
clickhouse-client -h 192.168.30.118 --port 9000 -u default --password "" -d tpch_single --format_csv_delimiter="|" --query="insert into tpch_single.orders format CSV" < /data8/tpcH/data/10g/orders.tbl;
echo "orders---end test run at" `date "+%Y-%m-%d %H:%M:%S"`
 
echo "partsupp---start test run at" `date "+%Y-%m-%d %H:%M:%S"`
clickhouse-client -h 192.168.30.118 --port 9000 -u default --password "" -d tpch_single --format_csv_delimiter="|" --query="insert into tpch_single.partsupp format CSV" < /data8/tpcH/data/10g/partsupp.tbl;
echo "partsupp---end test run at" `date "+%Y-%m-%d %H:%M:%S"`

五、22条查询

-- TPC-H/TPC-R Pricing Summary Report Query (Q1)
select l_returnflag,
       l_linestatus,
       sum(l_quantity)                                       as sum_qty,
       sum(l_extendedprice)                                  as sum_base_price,
       sum(l_extendedprice * (1 - l_discount))               as sum_disc_price,
       sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
       avg(l_quantity)                                       as avg_qty,
       avg(l_extendedprice)                                  as avg_price,
       avg(l_discount)                                       as avg_disc,
       count(*)                                              as count_order
from lineitem
where l_shipdate between  date_add(cast('1998-12-01' as date), interval -120 day) and cast('1998-12-01' as date)
group by l_returnflag,
         l_linestatus
order by l_returnflag,
         l_linestatus;
 
-- TPC-H/TPC-R Minimum Cost Supplier Query (Q2)
select
    s.s_acctbal,
    s.s_name,
    n.n_name,
    p.p_partkey,
    p.p_mfgr,
    s.s_address,
    s.s_phone,
    s.s_comment
from
    part as p
    global join partsupp as ps on p.p_partkey = ps.ps_partkey
    global join supplier as s on ps.ps_suppkey = s.s_suppkey 
    global join nation as n on s.s_nationkey = n.n_nationkey
    global join region as r on n.n_regionkey = r.r_regionkey
where
    p.p_size = 15
    and p.p_type like '%BRASS'
    and r.r_name = 'EUROPE'
    and ps.ps_supplycost =(
  select
      min(ps.ps_supplycost)
    from
      partsupp as ps
      global join supplier as s on s.s_suppkey = ps.ps_suppkey
      global join nation as n on s.s_nationkey = n.n_nationkey
      global join region as r on n.n_regionkey = r.r_regionkey
      global join  part as p on p.p_partkey = ps.ps_partkey
      where r.r_name = 'EUROPE'
    )
order by
    s.s_acctbal desc,
    n.n_name,
    s.s_name,
    p.p_partkey
limit 100;       
 
-- TPC-H/TPC-R Shipping Priority Query (Q3)
select l.l_orderkey,
       sum(l.l_extendedprice * (1 - l.l_discount)) as revenue,
       o.o_orderdate,
       o.o_shippriority
from customer as c
     global join orders as o on c.c_custkey = o.o_custkey
     global join lineitem as l on l.l_orderkey = o.o_orderkey
where c.c_mktsegment = 'BUILDING'
  and o.o_orderdate < date '1998-03-15'
  and l.l_shipdate > date '1998-03-15'
group by l.l_orderkey,
         o.o_orderdate,
         o.o_shippriority
order by revenue desc,
         o.o_orderdate
limit 10;
 
-- TPC-H/TPC-R Order Priority Checking Query (Q4) 
select o.o_orderpriority,
       count(*) as order_count
from orders as o
         global JOIN lineitem as l ON l.l_orderkey = o.o_orderkey
where o.o_orderdate between cast('1996-07-01' as date)  and (date_add(cast('1996-07-01' as date), interval 90 day))
group by o.o_orderpriority
order by o.o_orderpriority;
 
-- TPC-H/TPC-R Local Supplier Volume Query (Q5)
select supp_nation,
       cust_nation,
       l_year,
       sum(volume) as revenue
from (
         select n1.n_name as supp_nation,
                n2.n_name as cust_nation,
                extract(year from l.l_shipdate) as l_year,
                l.l_extendedprice * (1 - l.l_discount) as volume
         from supplier as s
              global join lineitem as l on s.s_suppkey = l.l_suppkey
              global join orders as o on o.o_orderkey = l.l_orderkey
              global join customer as c on c.c_custkey = o.o_custkey
              global join nation as n1 on s.s_nationkey = n1.n_nationkey
              global join nation as n2 on c.c_nationkey = n2.n_nationkey
         where 
            (
                 (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
                 or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
             )
           and l.l_shipdate between date '1997-01-01' and date '1998-12-31'
     ) as shipping
group by supp_nation,
         cust_nation,
         l_year
order by supp_nation,
         cust_nation,
         l_year;
 
-- TPC-H/TPC-R Forecasting Revenue Change Query (Q6)
select sum(l_extendedprice * l_discount) as revenue
from lineitem
where l_shipdate between cast('1994-01-01' as date)  and (date_add(cast('1994-01-01' as date), interval 360 day))
  and l_discount between 0.06 - 0.01 and 0.06 + 0.01
  and l_quantity < 24;
  
-- TPC-H/TPC-R Volume Shipping Query (Q7)
select supp_nation,
       cust_nation,
       l_year,
       sum(volume) as revenue
from (
         select n1.n_name                          as supp_nation,
                n2.n_name                          as cust_nation,
                extract(year from l.l_shipdate)      as l_year,
                l.l_extendedprice * (1 - l.l_discount) as volume
         from supplier as s
              global join lineitem as l on s.s_suppkey = l.l_suppkey
              global join orders as o on o.o_orderkey = l.l_orderkey
              global join customer as c on c.c_custkey = o.o_custkey
              global join nation as n1 on s.s_nationkey = n1.n_nationkey
              global join nation as n2 on c.c_nationkey = n2.n_nationkey
         where (
                 (n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
                 or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
             )
           and l.l_shipdate between date '1997-01-01' and date '1998-12-31'
     ) as shipping
group by supp_nation,
         cust_nation,
         l_year
order by supp_nation,
         cust_nation,
         l_year;
 
-- TPC-H/TPC-R National Market Share Query (Q8)
select o_year,
       sum(case
               when nation = 'BRAZIL' then volume
               else 0
           end) / sum(volume) as mkt_share
from (
         select extract(year from o.o_orderdate)     as o_year,
                l.l_extendedprice * (1 - l.l_discount) as volume,
                n2.n_name                          as nation
         from part as p
              global join lineitem as l on p.p_partkey = l.l_partkey
              global join supplier as s on s.s_suppkey = l.l_suppkey
              global join orders as o on l.l_orderkey = o.o_orderkey
              global join customer as c on o.o_custkey = c.c_custkey
              global join nation as n1 on c.c_nationkey = n1.n_nationkey
              global join nation as n2 on s.s_nationkey = n2.n_nationkey
              global join region as r on n1.n_regionkey = r.r_regionkey
         where 
           r.r_name = 'AMERICA'
           and o.o_orderdate between date '1995-01-01' and date '1996-12-31'
           and p.p_type = 'ECONOMY ANODIZED STEEL'
     ) as all_nations
group by o_year
order by o_year;
 
-- TPC-H/TPC-R Product Type Profit Measure Query (Q9)
select nation,
       o_year,
       sum(amount) as sum_profit
from (
         select n.n_name                                                          as nation,
                extract(year from o.o_orderdate)                                  as o_year,
                l.l_extendedprice * (1 - l.l_discount) - ps.ps_supplycost * l.l_quantity as amount
         from part as p
           GLOBAL INNER JOIN lineitem AS l on p.p_partkey = l.l_partkey
              GLOBAL INNER JOIN supplier AS s ON s.s_suppkey = l.l_suppkey
              GLOBAL INNER JOIN partsupp as ps on ps.ps_partkey = l.l_partkey
              GLOBAL INNER JOIN orders AS o ON o.o_orderkey = l.l_orderkey
              GLOBAL INNER JOIN nation AS n on s.s_nationkey = n.n_nationkey
         where 
         p.p_name like '%green%'
     ) as profit
group by nation,
         o_year
order by nation,
         o_year desc;
--备注:ps.ps_suppkey = l.l_suppkey 未使用      
   
-- TPC-H/TPC-R Returned Item Reporting Query (Q10)
select c.c_custkey,
       c.c_name,
       sum(l.l_extendedprice * (1 - l.l_discount)) as revenue,
       c.c_acctbal,
       n.n_name,
       c.c_address,
       c.c_phone,
       c.c_comment
from customer as c
     global join orders as o on c.c_custkey = o.o_custkey
     global join lineitem as l on l.l_orderkey = o.o_orderkey
     global join nation as n on c.c_nationkey = n.n_nationkey
where 
  o.o_orderdate between cast('1993-10-01' as date)  and (date_add(cast('1993-10-01' as date), interval 90 day))
  and l.l_returnflag = 'R'
group by c.c_custkey,
         c.c_name,
         c.c_acctbal,
         c.c_phone,
         n.n_name,
         c.c_address,
         c.c_comment
order by revenue desc
limit 20;
 
-- TPC-H/TPC-R Important Stock Identification Query (Q11)
 
select ps.ps_partkey,
       sum(ps.ps_supplycost * ps.ps_availqty) as value
from
    partsupp as ps
    global join supplier as s on ps.ps_suppkey = s.s_suppkey
    global join nation as n on s.s_nationkey = n.n_nationkey
where
  n.n_name = 'GERMANY'
group by
    ps.ps_partkey
having
    sum (ps.ps_supplycost * ps.ps_availqty)
     > (
    select
    sum (ps.ps_supplycost * ps.ps_availqty) * 0.0000010000
    from
     partsupp as ps
     global join supplier as s on ps.ps_suppkey = s.s_suppkey
     global join nation as n on s.s_nationkey = n.n_nationkey
    where
     n.n_name = 'GERMANY'
    )
order by
    value desc; 
   
-- TPC-H/TPC-R Shipping Modes and Order Priority Query (Q12)
select l.l_shipmode,
       sum(case
               when o.o_orderpriority = '1-URGENT'
                   or o.o_orderpriority = '2-HIGH'
                   then 1
               else 0
           end) as high_line_count,
       sum(case
               when o.o_orderpriority <> '1-URGENT'
                   and o.o_orderpriority <> '2-HIGH'
                   then 1
               else 0
           end) as low_line_count
from orders as o
     global join lineitem as l on o.o_orderkey = l.l_orderkey
where 
   l.l_shipmode in ('MAIL', 'SHIP')
  and l.l_commitdate < l.l_receiptdate
  and l.l_shipdate < l.l_commitdate
  and l.l_receiptdate between cast('1994-01-01' as date)  and (date_add(cast('1994-01-01' as date), interval 360 day))
group by l.l_shipmode
order by l.l_shipmode;
 
-- TPC-H/TPC-R Customer Distribution Query (Q13)  
select c_count,count(*) as custdist
from(
        select
            c.c_custkey,count(o.o_orderkey) as c_count
        from
            customer as c
            global join orders as o on c.c_custkey = o.o_custkey
            where not like(o.o_comment,'%unusual%requests%')
            group by
                c.c_custkey
    ) as c_orders
group by c_count
order by
    custdist desc,
    c_count desc;
         
-- TPC-H/TPC-R Promotion Effect Query (Q14)
select 100.00 * sum(case
when p.p_type like 'PROMO%'
then l.l_extendedprice * (1 - l.l_discount)
else 0
    end) / sum(l.l_extendedprice * (1 - l.l_discount)) as promo_revenue
from lineitem as l
     global join part as p on l.l_partkey = p.p_partkey
where 
  l.l_shipdate between cast('1995-09-01' as date)  and (date_add(cast('1995-09-01' as date), interval 30 day))
; 
 
-- TPC-H/TPC-R Top Supplier Query (Q15)  
 
--create view revenue as
--   select
--        l_suppkey as supplier_no,
--        sum(l_extendedprice * (1 - l_discount)) as total_revenue
--    from
--        lineitem
--    where
--        l_shipdate >= toDate('1994-08-01')
--        and l_shipdate < date_sub(month,-3,toDate('1994-08-01')) 
--    group by
 --       l_suppkey;
 
-- where l_shipdate between cast('1996-01-01' as date)  and (date_add(cast('1996-01-01' as date), interval 90 day))
-- group by l_suppkey;
select
    s.s_suppkey,
    s.s_name,
    s.s_address,
    s.s_phone,
    r.total_revenue
from
    supplier as s
    global join revenue as r on s.s_suppkey = r.supplier_no
where
 r.total_revenue = (
        select
            max(r.total_revenue)
        from
            revenue as r
    )
order by
    s.s_suppkey;  
-- drop view revenue;
 
-- TPC-H/TPC-R Parts/Supplier Relationship Query (Q16)
select p.p_brand,
       p.p_type,
       p.p_size,
       count(distinct ps.ps_suppkey) as supplier_cnt
from partsupp as ps
     global join part as p on p.p_partkey = ps.ps_partkey
where 
  p.p_brand <> 'Brand#45'
  and p.p_type not like 'MEDIUM POLISHED%'
  and p.p_size global in (49, 14, 23, 45, 19, 3, 36, 9)
  and ps.ps_suppkey global not in (
    select s.s_suppkey
    from supplier as s
    where s.s_comment like '%Customer%Complaints%'
)
group by p.p_brand,
         p.p_type,
         p.p_size
order by supplier_cnt desc,
         p.p_brand,
         p.p_type,
         p.p_size;
         
-- TPC-H/TPC-R Small-Quantity-Order Revenue Query (Q17) 
select sum(l.l_extendedprice) / toDecimal64(7.0,2) as avg_yearly 
from
    lineitem as l
    global join part as p on p.p_partkey = l.l_partkey
where
p.p_brand = 'Brand#23'
    and p.p_container = 'MED BOX'
    and l.l_quantity < (
        select
            toDecimal64(0.2 * avg(l.l_quantity),2)
        from
            lineitem as l 
            global join part as p on l.l_partkey = p.p_partkey
            
    );
 
-- TPC-H/TPC-R Large Volume Customer Query (Q18)
select c.c_name,
       c.c_custkey,
       o.o_orderkey,
       o.o_orderdate,
       o.o_totalprice,
       sum(l.l_quantity)
from customer as c
     global join orders as o on c.c_custkey = o.o_custkey
     global join lineitem as l on o.o_orderkey = l.l_orderkey
where o.o_orderkey global in (
    select l.l_orderkey
    from lineitem as l
    group by l.l_orderkey
    having sum(l.l_quantity) > 300
)
group by c.c_name,
         c.c_custkey,
         o.o_orderkey,
         o.o_orderdate,
         o.o_totalprice
order by o.o_totalprice desc,
         o.o_orderdate
limit 100;
 
-- TPC-H/TPC-R Discounted Revenue Query (Q19)
select sum(l.l_extendedprice * (1 - l.l_discount)) as revenue
from lineitem as l
     global join part as p on p.p_partkey = l.l_partkey
where (
p.p_brand = 'Brand#12'
        and p.p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
        and l.l_quantity >= 1 and l.l_quantity <= 1 + 10
        and p.p_size between 1 and 5
        and l.l_shipmode in ('AIR', 'AIR REG')
        and l.l_shipinstruct = 'DELIVER IN PERSON'
    )
   or (
        p.p_brand = 'Brand#23'
        and p.p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
        and l.l_quantity >= 10 and l.l_quantity <= 10 + 10
        and p.p_size between 1 and 10
        and l.l_shipmode in ('AIR', 'AIR REG')
        and l.l_shipinstruct = 'DELIVER IN PERSON'
    )
   or (
        p.p_brand = 'Brand#34'
        and p.p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
        and l.l_quantity >= 20 and l.l_quantity <= 20 + 10
        and p.p_size between 1 and 15
        and l.l_shipmode in ('AIR', 'AIR REG')
        and l.l_shipinstruct = 'DELIVER IN PERSON'
    );
    
-- TPC-H/TPC-R Potential Part Promotion Query (Q20)
select s.s_name,
       s.s_address
from supplier as s
     global join nation as n on s.s_nationkey = n.n_nationkey
where s.s_suppkey global in (
    select ps.ps_suppkey
    from partsupp as ps
    where ps.ps_partkey global in (
        select p_partkey
        from part
        where p_name like 'forest%'
    )
      and ps.ps_availqty > (
        select 0.5 * sum(l_quantity)
        from lineitem as l
        global join partsupp as ps on l.l_partkey = ps.ps_partkey and l.l_suppkey = ps.ps_suppkey
        where 
      l.l_shipdate between cast('1994-01-01' as date)  and (date_add(cast('1994-01-01' as date), interval 360 day)
          )
    )
)
and n_name = 'CANADA'
order by s.s_name;
 
-- TPC-H/TPC-R Suppliers Who Kept Orders Waiting Query (Q21) 
select s.s_name,
    count(1) as numwait
from
    supplier as s
    global join lineitem as l1 on s.s_suppkey = l1.l_suppkey
    global join orders as o on o.o_orderkey = l1.l_orderkey
    global join nation as n on s_nationkey = n_nationkey
where
o.o_orderstatus = 'F'
    and l1.l_receiptdate > l1.l_commitdate
    and l1.l_orderkey global in (
        select
            l_orderkey
        from
            lineitem as l2
            global join lineitem as l1 on l2.l_orderkey = l1.l_orderkey
        where
            l2.l_suppkey <> l1.l_suppkey
    )
    and l1.l_orderkey global not in (
        select
            l_orderkey
        from
            lineitem as l3
            global join lineitem as l1 on l3.l_orderkey = l1.l_orderkey
        where
  l3.l_suppkey <> l1.l_suppkey
            and l3.l_receiptdate > l3.l_commitdate
    )
    and n.n_name = 'SAUDI ARABIA'
group by
 s.s_name
order by
    numwait desc,
    s.s_name;
 
-- TPC-H/TPC-R Global Sales Opportunity Query (Q22) 
select cntrycode,
       count(*)       as numcust,
       sum(c_acctbal) as totacctbal
from (
         select substring(c_phone from 1 for 2) as cntrycode,
                c_acctbal
         from customer
         where substring(c_phone from 1 for 2) global in
               ('13', '31', '23', '29', '30', '18', '17')
           and c_acctbal > (
             select avg(c_acctbal)
             from customer
             where c_acctbal > 0.00
               and substring(c_phone from 1 for 2) global in
                   ('13', '31', '23', '29', '30', '18', '17')
         )
           and c_custkey global not in(
                 select o_custkey
                 from orders
             )
     ) as custsale
group by cntrycode
order by cntrycode;

 

posted @ 2022-06-28 21:15  Syw_文  阅读(771)  评论(0编辑  收藏  举报