join多个,随机森林,正则like

uid,mid拼起来
关键词作为特征

SELECT * FROM test1
left outer join test2 on test1.id = test2.id
left outer join test3 on test1.id = test3.id

label 0-

    -- ------------------------ 
    --  ---like+repo+comment = interact
    --  ------------------------
    -- drop table if exists interact_sum;
    -- create table interact_sum as
    -- select
    --     uid,
    --     count(action_type) as interact
    -- from tianchi_weibo.weibo_action_data_train
    -- group by uid;

    -----------------
    ---level point
    -----------------
    -- drop table if exists interact_levelpoint;
    -- create table interact_levelpoint as 
    --  select 
    --     uid,
    --     case 
    --         when interact_sum.interact>100 then interact_sum.interact*200
    --         when interact_sum.interact<=100 and interact_sum.interact>=51 then interact_sum.interact*100
    --         when interact_sum.interact<=50 and interact_sum.interact>=11 then interact_sum.interact*50
    --         when interact_sum.interact<=10 and interact_sum.interact>=6 then interact_sum.interact*10
    --         when interact_sum.interact<=5 and interact_sum.interact>=0 then interact_sum.interact
    --         end as level
    --     from interact_sum;
    

    -- --------------------
    -- ----max level-------------
    -- ---------------------
    -- drop table if exists interact_levelpointmax;
    -- create table interact_levelpointmax as 
    -- select 
    --         uid,
    --         max(level) as max_level
    --     from interact_levelpoint
    -- group by uid;
    
    -- drop table if exists interact_levellabel;
    -- create table interact_levellabel as 
    -- select
    --     uid,
    --     case
    --             when interact_sum.interact>100 then 4
    --             when interact_sum.interact<=100 and interact_sum.interact>=51 then 3
    --             when interact_sum.interact<=50 and interact_sum.interact>=11 then 2
    --             when interact_sum.interact<=10 and interact_sum.interact>=6 then 1
    --             when interact_sum.interact<=5 and interact_sum.interact>=0 then 0
    --             end as levellabel
    --         from interact_sum;    
        
    -- drop table if exists interact_levelmax;
    -- create table interact_levelmax as 
    -- select 
    --         uid,
    --         case 
    --             when max_level>=20000 then max_level/200
    --             when max_level>=5000 and max_level<20000 then max_level/100
    --             when max_level>=550 and max_level<5000 then max_level/50
    --             when max_level>=60 and max_level<550 then max_level/10
    --             when max_level>=0 and max_level<60 then max_level
    --             end as pointmax
    --         from interact_levelpointmax;
            
            
       

    drop table if exists weibo_data;
    create table weibo_data as 
    select
        uid,
        interact,
        -- case 
        --     when havenull.action_sum IS NULL then 0 
        --     when havenull.action_sum IS NOT NULL then cast(havenull.action_sum as bigint)
        -- end as action_sum,
        action_sum,
        levellabel
        
    from
        (
        select
            x.uid,
            x.interact,
            y.action_sum,
            z.levellabel
        from
            (
                select
                    uid,
                    interact
                from interact_sum
                )x
                
            left outer join
                (
                select
                    interact_levelmax.uid,
                    interact_levelmax.pointmax as action_sum
                from interact_levelmax
                )y
                on x.uid=y.uid 
            left outer join
                (
                select
                    interact_levellabel.uid,
                    interact_levellabel.levellabel
                    from interact_levellabel
                )z
                on x.uid=z.uid
           )havenull ;
           
posted @ 2015-10-13 09:57  dunfentiao  阅读(193)  评论(0编辑  收藏  举报