join多个,随机森林,正则like
uid,mid拼起来
关键词作为特征
SELECT * FROM test1
left outer join test2 on test1.id = test2.id
left outer join test3 on test1.id = test3.id
label 0-
-- ------------------------
-- ---like+repo+comment = interact
-- ------------------------
-- drop table if exists interact_sum;
-- create table interact_sum as
-- select
-- uid,
-- count(action_type) as interact
-- from tianchi_weibo.weibo_action_data_train
-- group by uid;
-----------------
---level point
-----------------
-- drop table if exists interact_levelpoint;
-- create table interact_levelpoint as
-- select
-- uid,
-- case
-- when interact_sum.interact>100 then interact_sum.interact*200
-- when interact_sum.interact<=100 and interact_sum.interact>=51 then interact_sum.interact*100
-- when interact_sum.interact<=50 and interact_sum.interact>=11 then interact_sum.interact*50
-- when interact_sum.interact<=10 and interact_sum.interact>=6 then interact_sum.interact*10
-- when interact_sum.interact<=5 and interact_sum.interact>=0 then interact_sum.interact
-- end as level
-- from interact_sum;
-- --------------------
-- ----max level-------------
-- ---------------------
-- drop table if exists interact_levelpointmax;
-- create table interact_levelpointmax as
-- select
-- uid,
-- max(level) as max_level
-- from interact_levelpoint
-- group by uid;
-- drop table if exists interact_levellabel;
-- create table interact_levellabel as
-- select
-- uid,
-- case
-- when interact_sum.interact>100 then 4
-- when interact_sum.interact<=100 and interact_sum.interact>=51 then 3
-- when interact_sum.interact<=50 and interact_sum.interact>=11 then 2
-- when interact_sum.interact<=10 and interact_sum.interact>=6 then 1
-- when interact_sum.interact<=5 and interact_sum.interact>=0 then 0
-- end as levellabel
-- from interact_sum;
-- drop table if exists interact_levelmax;
-- create table interact_levelmax as
-- select
-- uid,
-- case
-- when max_level>=20000 then max_level/200
-- when max_level>=5000 and max_level<20000 then max_level/100
-- when max_level>=550 and max_level<5000 then max_level/50
-- when max_level>=60 and max_level<550 then max_level/10
-- when max_level>=0 and max_level<60 then max_level
-- end as pointmax
-- from interact_levelpointmax;
drop table if exists weibo_data;
create table weibo_data as
select
uid,
interact,
-- case
-- when havenull.action_sum IS NULL then 0
-- when havenull.action_sum IS NOT NULL then cast(havenull.action_sum as bigint)
-- end as action_sum,
action_sum,
levellabel
from
(
select
x.uid,
x.interact,
y.action_sum,
z.levellabel
from
(
select
uid,
interact
from interact_sum
)x
left outer join
(
select
interact_levelmax.uid,
interact_levelmax.pointmax as action_sum
from interact_levelmax
)y
on x.uid=y.uid
left outer join
(
select
interact_levellabel.uid,
interact_levellabel.levellabel
from interact_levellabel
)z
on x.uid=z.uid
)havenull ;