AJK_HIVE_SQL
-- 功能描述: TEST_SPIDER建表语句集合
-- 涉及集市表: ajk_hse_detail_crawl,ajk_hse_detail_crawl_fq,ajk_hse_list_pr_crawl,ajk_hse_list_pr_crawl_fq,ajk_cmnt_detail_crawl,ajk_cmnt_detail_crawl_fq,ajk_cmnt_list_pr_crawl,ajk_cmnt_list_pr_crawl_fq
-- 颗粒度: 月
-- 创建人: 周钊 20210609
-- 最后修改人: 周钊 20210609
-- 审批人:
-- 备注:工作交接
-- 一些设置的语句
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set spark.executor.instances=6;
set spark.serializer=org.apache.spark.serializer.KryoSerializer;
set hive.exec.max.dynamic.partitions.pernode=99999;
set hive.exec.max.dynamic.partitions=999999;
set hive.exec.max.created.files=999999;
set hive.map.aggr=true;
set hive.groupby.skewindata=true;
set hive.log.explain.output=false;
-- test_spider 把csv文件的数据导入到数据表中
load data inpath '/user/XXX/test.csv' into test_spider.XXX;
-- test_spider 把导入的二手挂牌详情数据去重并插入分区表中
INSERT INTO test_spider.ajk_hse_detail_crawl_fq SELECT
uuid(),
T1.ajk_area_orig,
T1.ajk_cmnt_id_orig,
T1.ajk_cmnt_name_orig,
T1.ajk_alias_name_orig,
T1.ajk_cmnt_addr_orig,
T1.get_method,
T1.ajk_srh_vle,
T1.ajk_hse_title,
T1.ajk_hse_title_url,
T1.ajk_hse_list_pr,
T1.ajk_hse_cmnt_nm,
T1.ajk_hse_dtl_url,
T1.ajk_hse_loc_dist,
T1.ajk_hse_biz_dist,
T1.ajk_hse_loc_add,
T1.ajk_hse_bld_year,
T1.ajk_hse_tpcd,
T1.ajk_hse_prop_year,
T1.ajk_hse_prop_type,
T1.ajk_hse_is_first,
T1.ajk_hse_type,
T1.ajk_hse_blg_area,
T1.ajk_hse_twd,
T1.ajk_hse_ttl_flr,
T1.ajk_hse_flr,
T1.ajk_hse_is_elvt,
T1.ajk_hse_is_heat,
T1.ajk_hse_per_pr,
T1.ajk_ref_hse_dwn_pmt,
T1.ajk_ref_hse_mthly_pmt,
T1.ajk_hse_deg_dect,
T1.ajk_hse_year_per,
T1.ajk_hse_is_unq,
T1.ajk_hse_put_tm,
T1.ajk_hse_shk,
T1.ajk_hse_bad,
T1.ajk_hse_crawl_tm,
T1.batch_id,
T1.is_avl,
T1.crt_tm,
T1.crt_psn,
T1.upt_tm,
T1.upt_psn,
T1.ajk_prov,
T1.ajk_city_orig
FROM
(
SELECT
T.*,
row_number() over (
PARTITION BY T.ajk_prov,
T.ajk_city_orig,
T.ajk_city_orig,
T.ajk_area_orig,
T.ajk_hse_cmnt_nm,
T.ajk_hse_title,
T.ajk_hse_title_url,
T.ajk_hse_per_pr
) RN
FROM
test_spider.ajk_hse_detail_crawl T
) T1
WHERE
T1.RN = 1
AND T1.BATCH_ID = "202104011000";
-- test_spider 把导入的二手挂牌均价数据去重并插入分区表中
INSERT INTO test_spider.ajk_hse_list_pr_crawl_fq SELECT
uuid(),
AJK_AREA_ORIG,
AJK_CMNT_ID_ORIG,
AJK_CMNT_NM_ORIG,
AJK_ALIAS_NM_ORIG,
AJK_CMNT_ADDR_ORIG,
GET_METHOD,
SRH_VAL,
AJK_TITLE,
AJK_HSE_URL,
AJK_HSE_LIST_PR,
AJK_HSE_SQ_LIST_PR,
AJK_HSE_TYPE,
AJK_HSE_AREA,
AJK_HSE_TWD,
AJK_HSE_DEG_DECT,
AJK_HSE_FLR_TYPE,
AJK_HSE_TTL_FLR,
AJK_HSE_BLD_YEAR,
AJK_HSE_BLD_STRU,
AJK_HSE_POS,
AJK_HSE_CRAWL_TM,
BATCH_ID,
IS_AVL,
CRT_TM,
CRT_PSN,
UPT_TM,
UPT_PSN,
AJK_PROV,
AJK_CITY_ORIG
FROM
(
SELECT
T.*,
row_number() over (
PARTITION BY T.AJK_PROV,
T.AJK_CITY_ORIG,
T.AJK_AREA_ORIG,
T.AJK_CMNT_ID_ORIG,
T.AJK_CMNT_NM_ORIG,
T.AJK_ALIAS_NM_ORIG,
T.AJK_CMNT_ADDR_ORIG,
T.GET_METHOD,
T.SRH_VAL,
T.AJK_TITLE,
T.AJK_HSE_URL,
T.AJK_HSE_LIST_PR,
T.AJK_HSE_SQ_LIST_PR,
T.AJK_HSE_TYPE,
T.AJK_HSE_AREA,
T.AJK_HSE_TWD,
T.AJK_HSE_DEG_DECT,
T.AJK_HSE_FLR_TYPE,
T.AJK_HSE_TTL_FLR,
T.AJK_HSE_BLD_YEAR,
T.AJK_HSE_BLD_STRU,
T.AJK_HSE_POS,
T.BATCH_ID
) RN
FROM
test_spider.ajk_hse_list_pr_crawl T
) T1
WHERE
T1.RN = 1
AND T1.BATCH_ID = "202104011000";
-- test_spider 把导入的二手小区详情数据去重并插入分区表中
INSERT INTO test_spider.ajk_cmnt_detail_crawl_fq SELECT
uuid(),
T1.AJK_AREA_ORIG,
T1.AJK_CMNT_ID_ORIG,
T1.AJK_CMNT_NAME_ORIG,
T1.AJK_ALIAS_NAME_ORIG,
T1.AJK_CMNT_ADDR_ORIG,
T1.GET_METHOD,
T1.SRH_VAL,
T1.AJK_CMNT,
T1.AJK_CMNT_URL,
T1.AJK_CMNT_LOC_DIST,
T1.AJK_CMNT_LOC_ADD,
T1.AJK_CMNT_PER_PR,
T1.AJK_CMNT_HOA_TYPE,
T1.AJK_CMNT_TOT_AREA,
T1.AJK_CMNT_BLD_YEAR,
T1.AJK_CMNT_VOL_RTO,
T1.AJK_CMNT_DEV_NM,
T1.AJK_CMNT_HOA_NM,
T1.AJK_CMNT_RLT_SHL,
T1.AJK_CMNT_BIZ_DIST,
T1.AJK_CMNT_HOA_FEE,
T1.AJK_CMNT_HSE_TTL,
T1.AJK_CMNT_TTL_PARK,
T1.AJK_CMNT_GRN_RTO,
T1.AJK_CMNT_SEC_TTL,
T1.AJK_CMNT_RENT_TTL,
T1.AJK_CMNT_3YEAR_PR,
T1.AJK_CMNT_3YEAR_BIZ_PR,
T1.AJK_CMNT_LOT,
T1.AJK_CMNT_LAT,
T1.AJK_CMNT_GP_NUM,
T1.AJK_CMNT_TAG,
T1.AJK_CMNT_CRAWL_TM,
T1.BATCH_ID,
T1.IS_AVL,
T1.CRT_TM,
T1.CRT_PSN,
T1.UPT_TM,
T1.UPT_PSN,
T1.ajk_prov,
T1.ajk_city_orig
FROM
(
SELECT
T.*,
row_number() over (
PARTITION BY T.ajk_prov,
T.ajk_city_orig,
T.AJK_AREA_ORIG,
T.AJK_CMNT_ID_ORIG,
T.AJK_CMNT_NAME_ORIG,
T.AJK_ALIAS_NAME_ORIG,
T.AJK_CMNT_ADDR_ORIG,
T.GET_METHOD,
T.SRH_VAL,
T.AJK_CMNT,
T.AJK_CMNT_URL,
T.AJK_CMNT_LOC_DIST,
T.AJK_CMNT_LOC_ADD,
T.AJK_CMNT_PER_PR,
T.AJK_CMNT_HOA_TYPE,
T.AJK_CMNT_TOT_AREA,
T.AJK_CMNT_BLD_YEAR,
T.AJK_CMNT_VOL_RTO,
T.AJK_CMNT_DEV_NM,
T.AJK_CMNT_HOA_NM,
T.AJK_CMNT_RLT_SHL,
T.AJK_CMNT_BIZ_DIST,
T.AJK_CMNT_HOA_FEE,
T.AJK_CMNT_HSE_TTL,
T.AJK_CMNT_TTL_PARK,
T.AJK_CMNT_GRN_RTO,
T.AJK_CMNT_SEC_TTL,
T.AJK_CMNT_RENT_TTL,
T.AJK_CMNT_3YEAR_PR,
T.AJK_CMNT_3YEAR_BIZ_PR,
T.AJK_CMNT_LOT,
T.AJK_CMNT_LAT,
T.AJK_CMNT_GP_NUM,
T.BATCH_ID
) RN
FROM
test_spider.ajk_cmnt_detail_crawl T
) T1
WHERE
T1.RN = 1
AND T1.BATCH_ID = "202104011000";
-- test_spider 把导入的二手小区均价数据去重并插入分区表中
INSERT INTO test_spider.ajk_cmnt_list_pr_crawl_fq SELECT
uuid(),
T1.AJK_AREA_ORIG,
T1.AJK_CMNT_ID_ORIG,
T1.AJK_CMNT_NM_ORIG,
T1.AJK_ALIAS_NM_ORIG,
T1.AJK_COMM_ADDR_ORIG,
T1.GET_METHOD,
T1.SRH_VAL,
T1.AJK_CMNT_NM,
T1.AJK_CMNT_URL,
T1.AJK_CMNT_PER_PR,
T1.AJK_CMNT_AREA,
T1.AJK_CMNT_LOC,
T1.AJK_CMNT_CRAWL_TM,
T1.BATCH_ID,
T1.IS_AVL,
T1.CRT_TM,
T1.CRT_PSN,
T1.UPT_TM,
T1.UPT_PSN,
T1.AJK_PROV,
T1.AJK_CITY_ORIG
FROM
(
SELECT
T.*,
row_number() over (
PARTITION BY T.AJK_PROV,
T.AJK_CITY_ORIG,
T.AJK_AREA_ORIG,
T.AJK_CMNT_NM,
T.AJK_CMNT_URL,
T.AJK_CMNT_PER_PR,
T.AJK_CMNT_AREA,
T.AJK_CMNT_LOC
) RN
FROM
test_spider.ajk_cmnt_list_pr_crawl T
) T1
WHERE
T1.RN = 1
AND T1.BATCH_ID = "202104011000";
-- ODS_DATA_COLLECTION - 插入ODS数据表
insert into table ods_data_collection.XXX select * from test_spider.XXX_fq;