006.PGSQL-数据倾斜、数据各节点分布情况;创建序列、创建分布式表distribute by hash(列名)

数据倾斜、数据各节点分布情况、创建分布式表

查看表的分布节点

 

-- 查看表是否数据倾斜
select table_skewness('b_st_yz_aj_cgajfj');

-- 在哪个节点分布

-- 查看表是否数据倾斜 
select table_skewness('b_st_yz_aj_cgajfj');
 
 -- 在哪个节点分布
select 
xc_node_id, count(1) 
from tablename 
group by xc_node_id 
order by xc_node_id desc;

 

创建序列、创建分布式表

创建序列

 

CREATE SEQUENCE "ioc_theme"."m_ss_qyqy_move_warn_success_rid_seq" 
INCREMENT 1
MINVALUE  1
MAXVALUE 9223372036854775807
START 1
CACHE 1
CYCLE ;

SELECT setval('"ioc_theme"."m_ss_qyqy_move_warn_success_rid_seq"', -1, false);

ALTER SEQUENCE "ioc_theme"."m_ss_qyqy_move_warn_success_rid_seq" OWNER TO "zsj_qh";

 

删除序列  修改序列

----删除前先解除 id 对该序列的依赖
ALTER TABLE tablename ALTER COLUMN id SET DEFAULT null;
DROP SEQUENCE IF EXISTS sequence_name;

修改自增序列开始值
---- id_max 即 id 目前的最大值,可写为1,可通过 “SELECT MAX(id) FROM tablename” 得到

CREATE SEQUENCE sequence_name START WITH id_max;
修改自增序列所属的表和字段
ALTER TABLE ioc_theme.m_ss_yqjc_sbryxxb_51_af ALTER COLUMN rid SET DEFAULT nextval('ioc_theme.m_ss_yqjc_sbryxxb_51_af_rid_seq'::regclass);

 

 

查询表的序列

select table_schema, 
       table_name,
       column_name,
       data_type,
       column_default,
       is_nullable
  from information_schema.columns
 where table_name = 'm_gd_yqjc_sbryxxb_0501_0520';

 

创建分布式表

CREATE TABLE "ioc_theme"."m_hz_cyrk_rk_education" (
  "rid" int4 NOT NULL DEFAULT nextval('"ioc_theme".m_hz_cyrk_rk_education_rid_seq'::regclass),
  "occur_period" int4,
  "occur_period_year" int4,
  "occur_period_month" int4,
  "area_code_abode" varchar(20) COLLATE "pg_catalog"."default",
  "area_code_work" varchar(20) COLLATE "pg_catalog"."default",
  "degree" varchar(40) COLLATE "pg_catalog"."default",
  "employ_num" int4,
  "employ_ratio" numeric(20,2),
  "create_time" timestamp(6) DEFAULT pg_systimestamp(),
  "update_time" timestamp(6) DEFAULT pg_systimestamp(),
  CONSTRAINT "m_hz_cyrk_rk_education_pkey" PRIMARY KEY ("rid")
)distribute by hash(rid)
;

ALTER TABLE "ioc_theme"."m_hz_cyrk_rk_education" 
  OWNER TO "zsj_qh";

COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."rid" IS '行ID';

COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."occur_period" IS '报告期';

COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."occur_period_year" IS '报告期(年)';

COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."occur_period_month" IS '报告期(月)';

COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."area_code_abode" IS '居住地行政区划代码(社区)';

COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."area_code_work" IS '工作地行政区划代码';

COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."degree" IS '学历';

COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."employ_num" IS '人数';

COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."employ_ratio" IS '占比';

COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."create_time" IS '数据创建时间';

COMMENT ON COLUMN "ioc_theme"."m_hz_cyrk_rk_education"."update_time" IS '更新时间';

COMMENT ON TABLE "ioc_theme"."m_hz_cyrk_rk_education" IS '人才专题接口';

 创建分布列表   

6.尝试选择staff_ID、FIRST_NAME和LAST_NAME的组合作为分布列,创建目标表staffs。
CREATE TABLE staffs
(  
  staff_ID       NUMBER(6) not null,
  FIRST_NAME     VARCHAR2(20),
  LAST_NAME      VARCHAR2(25),
  EMAIL          VARCHAR2(25),
  PHONE_NUMBER   VARCHAR2(20),
  HIRE_DATE      DATE,
  employment_ID  VARCHAR2(10),
  SALARY         NUMBER(8,2),
  COMMISSION_PCT NUMBER(2,2),
  MANAGER_ID     NUMBER(6),
  section_ID     NUMBER(4)
) 
DISTRIBUTE BY hash(staff_ID,FIRST_NAME,LAST_NAME);
分布式建表的两种选择
DISTRIBUTE BY hash(column_name) 分布在各个节点,注意column_name作为主键列
DISTRIBUTE BY replication 每个节点一份完整数据

 
posted @ 2020-08-03 19:06  star521  阅读(6812)  评论(0编辑  收藏  举报