mysql etl过程 p_social_scan

2016-05-16

最近公司有个项目,要爬取7个视频网站、3个图片网站,关于5个关键词的视频、图片数据,并将爬取到的视频、图片做成展示墙,放到企业客户的iPad上展示用。以下是相关的etl过程,写了一整天,纪念一下。哈哈~

1、SQL语句:ddl_social_scan  

-- CREATE TABLE social_scan AS
-- 1_in_Maybelline
SELECT
    *
FROM
    (
        SELECT
            source AS website,
            (
                CASE brand
                WHEN 'Maybelline' THEN
                    '美宝莲'
                WHEN 'Loreal' THEN
                    '欧莱雅'
                WHEN 'Meiji' THEN
                    '美即'
                WHEN 'NYX' THEN
                    'NYX'
                ELSE
                    'none'
                END
            ) AS keyword,
            tag_name AS title,
            ROUND(
                IFNULL(
                    (
                        POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                            IFNULL(pic_comments_num, 0),
                            2
                        ) + POWER(IFNULL(browse_num, 0), 2)
                    ) / (
                        IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                    ),
                    0
                ),
                0
            ) AS volume,
            IFNULL(pic_like_num, 0) AS dianzan_num,
            IFNULL(pic_comments_num, 0) AS comment_num,
            IFNULL(browse_num, 0) AS browse_num,
            1 / 0 AS stay_time,
            REPLACE (
                LEFT (
                    SUBSTRING_INDEX(
                        picture.nice.pic_url,
                        '.info/in/',
                        - 1
                    ),
                    10
                ),
                '/',
                ''
            ) AS upload_date,
            pic_url AS download_url,
            REPLACE (date_time, '-', '') AS sys_date,
            'picture' AS file_type,
            1 / 0 AS media_file_name,
            1 / 0 AS save_dir,
            0 AS video_not_found
        FROM
            picture.nice
        WHERE
            source = 'in'
        AND brand = 'Maybelline'
        ORDER BY
            volume DESC
        LIMIT 20
    ) 1_in_Maybelline
UNION ALL
    -- 2_in_Loreal
    SELECT
        *
    FROM
        (
            SELECT
                source AS website,
                (
                    CASE brand
                    WHEN 'Maybelline' THEN
                        '美宝莲'
                    WHEN 'Loreal' THEN
                        '欧莱雅'
                    WHEN 'Meiji' THEN
                        '美即'
                    WHEN 'NYX' THEN
                        'NYX'
                    ELSE
                        'none'
                    END
                ) AS keyword,
                tag_name AS title,
                ROUND(
                    IFNULL(
                        (
                            POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                IFNULL(pic_comments_num, 0),
                                2
                            ) + POWER(IFNULL(browse_num, 0), 2)
                        ) / (
                            IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                        ),
                        0
                    ),
                    0
                ) AS volume,
                IFNULL(pic_like_num, 0) AS dianzan_num,
                IFNULL(pic_comments_num, 0) AS comment_num,
                IFNULL(browse_num, 0) AS browse_num,
                1 / 0 AS stay_time,
                REPLACE (
                    LEFT (
                        SUBSTRING_INDEX(
                            picture.nice.pic_url,
                            '.info/in/',
                            - 1
                        ),
                        10
                    ),
                    '/',
                    ''
                ) AS upload_date,
                pic_url AS download_url,
                REPLACE (date_time, '-', '') AS sys_date,
                'picture' AS file_type,
                1 / 0 AS media_file_name,
                1 / 0 AS save_dir,
                0 AS video_not_found
            FROM
                picture.nice
            WHERE
                source = 'in'
            AND brand = 'Loreal'
            ORDER BY
                volume DESC
            LIMIT 20
        ) 2_in_Loreal
    UNION ALL
        -- 3_in_Meiji
        SELECT
            *
        FROM
            (
                SELECT
                    source AS website,
                    (
                        CASE brand
                        WHEN 'Maybelline' THEN
                            '美宝莲'
                        WHEN 'Loreal' THEN
                            '欧莱雅'
                        WHEN 'Meiji' THEN
                            '美即'
                        WHEN 'NYX' THEN
                            'NYX'
                        ELSE
                            'none'
                        END
                    ) AS keyword,
                    tag_name AS title,
                    ROUND(
                        IFNULL(
                            (
                                POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                    IFNULL(pic_comments_num, 0),
                                    2
                                ) + POWER(IFNULL(browse_num, 0), 2)
                            ) / (
                                IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                            ),
                            0
                        ),
                        0
                    ) AS volume,
                    IFNULL(pic_like_num, 0) AS dianzan_num,
                    IFNULL(pic_comments_num, 0) AS comment_num,
                    IFNULL(browse_num, 0) AS browse_num,
                    1 / 0 AS stay_time,
                    REPLACE (
                        LEFT (
                            SUBSTRING_INDEX(
                                picture.nice.pic_url,
                                '.info/in/',
                                - 1
                            ),
                            10
                        ),
                        '/',
                        ''
                    ) AS upload_date,
                    pic_url AS download_url,
                    REPLACE (date_time, '-', '') AS sys_date,
                    'picture' AS file_type,
                    1 / 0 AS media_file_name,
                    1 / 0 AS save_dir,
                    0 AS video_not_found
                FROM
                    picture.nice
                WHERE
                    source = 'in'
                AND brand = 'Meiji'
                ORDER BY
                    volume DESC
                LIMIT 20
            ) 3_in_Meiji
        UNION ALL
            -- 4_in_NYX
            SELECT
                *
            FROM
                (
                    SELECT
                        source AS website,
                        (
                            CASE brand
                            WHEN 'Maybelline' THEN
                                '美宝莲'
                            WHEN 'Loreal' THEN
                                '欧莱雅'
                            WHEN 'Meiji' THEN
                                '美即'
                            WHEN 'NYX' THEN
                                'NYX'
                            ELSE
                                'none'
                            END
                        ) AS keyword,
                        tag_name AS title,
                        ROUND(
                            IFNULL(
                                (
                                    POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                        IFNULL(pic_comments_num, 0),
                                        2
                                    ) + POWER(IFNULL(browse_num, 0), 2)
                                ) / (
                                    IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                ),
                                0
                            ),
                            0
                        ) AS volume,
                        IFNULL(pic_like_num, 0) AS dianzan_num,
                        IFNULL(pic_comments_num, 0) AS comment_num,
                        IFNULL(browse_num, 0) AS browse_num,
                        1 / 0 AS stay_time,
                        REPLACE (
                            LEFT (
                                SUBSTRING_INDEX(
                                    picture.nice.pic_url,
                                    '.info/in/',
                                    - 1
                                ),
                                10
                            ),
                            '/',
                            ''
                        ) AS upload_date,
                        pic_url AS download_url,
                        REPLACE (date_time, '-', '') AS sys_date,
                        'picture' AS file_type,
                        1 / 0 AS media_file_name,
                        1 / 0 AS save_dir,
                        0 AS video_not_found
                    FROM
                        picture.nice
                    WHERE
                        source = 'in'
                    AND brand = 'NYX'
                    ORDER BY
                        volume DESC
                    LIMIT 20
                ) 4_in_NYX
            UNION ALL
                -- 5_nice_Maybelline
                SELECT
                    *
                FROM
                    (
                        SELECT
                            source AS website,
                            (
                                CASE brand
                                WHEN 'Maybelline' THEN
                                    '美宝莲'
                                WHEN 'Loreal' THEN
                                    '欧莱雅'
                                WHEN 'Meiji' THEN
                                    '美即'
                                WHEN 'NYX' THEN
                                    'NYX'
                                ELSE
                                    'none'
                                END
                            ) AS keyword,
                            tag_name AS title,
                            ROUND(
                                IFNULL(
                                    (
                                        POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                            IFNULL(pic_comments_num, 0),
                                            2
                                        ) + POWER(IFNULL(browse_num, 0), 2)
                                    ) / (
                                        IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                    ),
                                    0
                                ),
                                0
                            ) AS volume,
                            IFNULL(pic_like_num, 0) AS dianzan_num,
                            IFNULL(pic_comments_num, 0) AS comment_num,
                            IFNULL(browse_num, 0) AS browse_num,
                            1 / 0 AS stay_time,
                            REPLACE (
                                LEFT (
                                    SUBSTRING_INDEX(
                                        picture.nice.pic_url,
                                        'upload/show/',
                                        - 1
                                    ),
                                    10
                                ),
                                '/',
                                ''
                            ) AS upload_date,
                            pic_url AS download_url,
                            REPLACE (date_time, '-', '') AS sys_date,
                            'picture' AS file_type,
                            1 / 0 AS media_file_name,
                            1 / 0 AS save_dir,
                            0 AS video_not_found
                        FROM
                            picture.nice
                        WHERE
                            source = 'nice'
                        AND brand = 'Maybelline'
                        ORDER BY
                            volume DESC
                        LIMIT 20
                    ) 5_nice_Maybelline
                UNION ALL
                    -- 6_nice_Loreal
                    SELECT
                        *
                    FROM
                        (
                            SELECT
                                source AS website,
                                (
                                    CASE brand
                                    WHEN 'Maybelline' THEN
                                        '美宝莲'
                                    WHEN 'Loreal' THEN
                                        '欧莱雅'
                                    WHEN 'Meiji' THEN
                                        '美即'
                                    WHEN 'NYX' THEN
                                        'NYX'
                                    ELSE
                                        'none'
                                    END
                                ) AS keyword,
                                tag_name AS title,
                                ROUND(
                                    IFNULL(
                                        (
                                            POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                IFNULL(pic_comments_num, 0),
                                                2
                                            ) + POWER(IFNULL(browse_num, 0), 2)
                                        ) / (
                                            IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                        ),
                                        0
                                    ),
                                    0
                                ) AS volume,
                                IFNULL(pic_like_num, 0) AS dianzan_num,
                                IFNULL(pic_comments_num, 0) AS comment_num,
                                IFNULL(browse_num, 0) AS browse_num,
                                1 / 0 AS stay_time,
                                REPLACE (
                                    LEFT (
                                        SUBSTRING_INDEX(
                                            picture.nice.pic_url,
                                            'upload/show/',
                                            - 1
                                        ),
                                        10
                                    ),
                                    '/',
                                    ''
                                ) AS upload_date,
                                pic_url AS download_url,
                                REPLACE (date_time, '-', '') AS sys_date,
                                'picture' AS file_type,
                                1 / 0 AS media_file_name,
                                1 / 0 AS save_dir,
                                0 AS video_not_found
                            FROM
                                picture.nice
                            WHERE
                                source = 'nice'
                            AND brand = 'Loreal'
                            ORDER BY
                                volume DESC
                            LIMIT 20
                        ) 6_nice_Loreal
                    UNION ALL
                        -- 7_nice_Meiji
                        SELECT
                            *
                        FROM
                            (
                                SELECT
                                    source AS website,
                                    (
                                        CASE brand
                                        WHEN 'Maybelline' THEN
                                            '美宝莲'
                                        WHEN 'Loreal' THEN
                                            '欧莱雅'
                                        WHEN 'Meiji' THEN
                                            '美即'
                                        WHEN 'NYX' THEN
                                            'NYX'
                                        ELSE
                                            'none'
                                        END
                                    ) AS keyword,
                                    tag_name AS title,
                                    ROUND(
                                        IFNULL(
                                            (
                                                POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                    IFNULL(pic_comments_num, 0),
                                                    2
                                                ) + POWER(IFNULL(browse_num, 0), 2)
                                            ) / (
                                                IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                            ),
                                            0
                                        ),
                                        0
                                    ) AS volume,
                                    IFNULL(pic_like_num, 0) AS dianzan_num,
                                    IFNULL(pic_comments_num, 0) AS comment_num,
                                    IFNULL(browse_num, 0) AS browse_num,
                                    1 / 0 AS stay_time,
                                    REPLACE (
                                        LEFT (
                                            SUBSTRING_INDEX(
                                                picture.nice.pic_url,
                                                'upload/show/',
                                                - 1
                                            ),
                                            10
                                        ),
                                        '/',
                                        ''
                                    ) AS upload_date,
                                    pic_url AS download_url,
                                    REPLACE (date_time, '-', '') AS sys_date,
                                    'picture' AS file_type,
                                    1 / 0 AS media_file_name,
                                    1 / 0 AS save_dir,
                                    0 AS video_not_found
                                FROM
                                    picture.nice
                                WHERE
                                    source = 'nice'
                                AND brand = 'Meiji'
                                ORDER BY
                                    volume DESC
                                LIMIT 20
                            ) 7_nice_Meiji
                        UNION ALL
                            -- 8_nice_NYX
                            SELECT
                                *
                            FROM
                                (
                                    SELECT
                                        source AS website,
                                        (
                                            CASE brand
                                            WHEN 'Maybelline' THEN
                                                '美宝莲'
                                            WHEN 'Loreal' THEN
                                                '欧莱雅'
                                            WHEN 'Meiji' THEN
                                                '美即'
                                            WHEN 'NYX' THEN
                                                'NYX'
                                            ELSE
                                                'none'
                                            END
                                        ) AS keyword,
                                        tag_name AS title,
                                        ROUND(
                                            IFNULL(
                                                (
                                                    POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                        IFNULL(pic_comments_num, 0),
                                                        2
                                                    ) + POWER(IFNULL(browse_num, 0), 2)
                                                ) / (
                                                    IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                                ),
                                                0
                                            ),
                                            0
                                        ) AS volume,
                                        IFNULL(pic_like_num, 0) AS dianzan_num,
                                        IFNULL(pic_comments_num, 0) AS comment_num,
                                        IFNULL(browse_num, 0) AS browse_num,
                                        1 / 0 AS stay_time,
                                        REPLACE (
                                            LEFT (
                                                SUBSTRING_INDEX(
                                                    picture.nice.pic_url,
                                                    'upload/show/',
                                                    - 1
                                                ),
                                                10
                                            ),
                                            '/',
                                            ''
                                        ) AS upload_date,
                                        pic_url AS download_url,
                                        REPLACE (date_time, '-', '') AS sys_date,
                                        'picture' AS file_type,
                                        1 / 0 AS media_file_name,
                                        1 / 0 AS save_dir,
                                        0 AS video_not_found
                                    FROM
                                        picture.nice
                                    WHERE
                                        source = 'nice'
                                    AND brand = 'Meiji'
                                    ORDER BY
                                        volume DESC
                                    LIMIT 20
                                ) 8_nice_NYX
                            UNION ALL
                                -- 9_美拍_美宝莲 
                                SELECT
                                    *
                                FROM
                                    (
                                        SELECT DISTINCT
                                            source AS website,
                                            (
                                                CASE keyword
                                                WHEN '美宝莲' THEN
                                                    '美宝莲'
                                                WHEN 'Loreal' THEN
                                                    '欧莱雅'
                                                WHEN 'Meiji' THEN
                                                    '美即'
                                                WHEN 'NYX' THEN
                                                    'NYX'
                                                ELSE
                                                    'none'
                                                END
                                            ) AS keyword,
                                            title AS title,
                                            ROUND(
                                                IFNULL(
                                                    (
                                                        POWER(IFNULL(zanCount, 0), 2) + POWER(IFNULL(commentCount, 0), 2) + POWER(IFNULL(watchCount, 0), 2) + POWER(IFNULL(netCount, 0), 2) + POWER(IFNULL(favCount, 0), 2) + POWER(IFNULL(tanmuCount, 0), 2) + POWER(IFNULL(shareCount, 0), 2)
                                                    ) / (
                                                        IFNULL(zanCount, 0) + IFNULL(commentCount, 0) + IFNULL(watchCount, 0) + IFNULL(netCount, 0) + IFNULL(favCount, 0) + IFNULL(tanmuCount, 0) + IFNULL(shareCount, 0)
                                                    ),
                                                    0
                                                ),
                                                0
                                            ) AS volume,
                                            IFNULL(zanCount, 0) AS dianzan_num,
                                            IFNULL(commentCount, 0) AS comment_num,
                                            IFNULL(watchCount, 0) AS browse_num,
                                            length AS stay_time,
                                            REPLACE (uploadTime, '-', '') AS upload_date,
                                            downUrl AS download_url,
                                            REPLACE (LEFT(cralwTime, 10), '-', '') AS sys_date,
                                            'media' AS file_type,
                                            1 / 0 AS media_file_name,
                                            1 / 0 AS save_dir,
                                            0 AS video_not_found
                                        FROM
                                            media.7videos
                                        WHERE
                                            source = '美拍'
                                        AND keyword = '美宝莲'
                                        AND downUrl <> ''
                                        ORDER BY
                                            volume DESC
                                        LIMIT 20
                                    ) 9_美拍_美宝莲
View Code

2、SP过程:ddl_p_social_scan

drop procedure IF EXISTS p_social_scan;
delimiter //

CREATE DEFINER = `root`@`%` PROCEDURE `p_social_scan` (
    IN v_dayid_first INT,
    IN v_dayid_end INT
)
BEGIN
    DECLARE
        v_website VARCHAR (50) DEFAULT NULL;

DECLARE
    v_keyword VARCHAR (4) DEFAULT NULL;

DECLARE
    v_title VARCHAR (250) DEFAULT NULL;

DECLARE
    v_volume DOUBLE (17, 0) DEFAULT 0;

DECLARE
    v_dianzan_num BIGINT (20) DEFAULT 0;

DECLARE
    v_comment_num BIGINT (20) DEFAULT 0;

DECLARE
    v_browse_num BIGINT (20) DEFAULT 0;

DECLARE
    v_stay_time VARCHAR (50) DEFAULT NULL;

DECLARE
    v_upload_date INT (11) DEFAULT NULL;

DECLARE
    v_download_url VARCHAR (250) DEFAULT NULL;

DECLARE
    v_sys_date INT (10) DEFAULT NULL;

DECLARE
    v_file_type VARCHAR (7) DEFAULT NULL;

DECLARE
    v_media_file_name VARCHAR (250) DEFAULT NULL;

DECLARE
    v_save_dir VARCHAR (250) DEFAULT NULL;

DECLARE
    v_video_not_found INT (11) DEFAULT 0;

DECLARE
    var INT DEFAULT 0;

DECLARE
    cur CURSOR FOR -- 1_in_Maybelline
    SELECT
        *
    FROM
        (
            SELECT
                source AS website,
                (
                    CASE brand
                    WHEN 'Maybelline' THEN
                        '美宝莲'
                    WHEN 'Loreal' THEN
                        '欧莱雅'
                    WHEN 'Meiji' THEN
                        '美即'
                    WHEN 'NYX' THEN
                        'NYX'
                    ELSE
                        'none'
                    END
                ) AS keyword,
                tag_name AS title,
                ROUND(
                    IFNULL(
                        (
                            POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                IFNULL(pic_comments_num, 0),
                                2
                            ) + POWER(IFNULL(browse_num, 0), 2)
                        ) / (
                            IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                        ),
                        0
                    ),
                    0
                ) AS volume,
                IFNULL(pic_like_num, 0) AS dianzan_num,
                IFNULL(pic_comments_num, 0) AS comment_num,
                IFNULL(browse_num, 0) AS browse_num,
                1 / 0 AS stay_time,
                REPLACE (
                    LEFT (
                        SUBSTRING_INDEX(
                            picture.nice.pic_url,
                            '.info/in/',
                            - 1
                        ),
                        10
                    ),
                    '/',
                    ''
                ) AS upload_date,
                pic_url AS download_url,
                REPLACE (date_time, '-', '') AS sys_date,
                'picture' AS file_type,
                1 / 0 AS media_file_name,
                1 / 0 AS save_dir,
                0 AS video_not_found
            FROM
                picture.nice
            WHERE
                source = 'in'
            AND brand = 'Maybelline'
            AND REPLACE (date_time, '-', '') >= v_dayid_first
            AND REPLACE (date_time, '-', '') < v_dayid_end
            ORDER BY
                volume DESC
            LIMIT 20
        ) 1_in_Maybelline
    UNION ALL
        -- 2_in_Loreal
        SELECT
            *
        FROM
            (
                SELECT
                    source AS website,
                    (
                        CASE brand
                        WHEN 'Maybelline' THEN
                            '美宝莲'
                        WHEN 'Loreal' THEN
                            '欧莱雅'
                        WHEN 'Meiji' THEN
                            '美即'
                        WHEN 'NYX' THEN
                            'NYX'
                        ELSE
                            'none'
                        END
                    ) AS keyword,
                    tag_name AS title,
                    ROUND(
                        IFNULL(
                            (
                                POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                    IFNULL(pic_comments_num, 0),
                                    2
                                ) + POWER(IFNULL(browse_num, 0), 2)
                            ) / (
                                IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                            ),
                            0
                        ),
                        0
                    ) AS volume,
                    IFNULL(pic_like_num, 0) AS dianzan_num,
                    IFNULL(pic_comments_num, 0) AS comment_num,
                    IFNULL(browse_num, 0) AS browse_num,
                    1 / 0 AS stay_time,
                    REPLACE (
                        LEFT (
                            SUBSTRING_INDEX(
                                picture.nice.pic_url,
                                '.info/in/',
                                - 1
                            ),
                            10
                        ),
                        '/',
                        ''
                    ) AS upload_date,
                    pic_url AS download_url,
                    REPLACE (date_time, '-', '') AS sys_date,
                    'picture' AS file_type,
                    1 / 0 AS media_file_name,
                    1 / 0 AS save_dir,
                    0 AS video_not_found
                FROM
                    picture.nice
                WHERE
                    source = 'in'
                AND brand = 'Loreal'
                AND REPLACE (date_time, '-', '') >= v_dayid_first
                AND REPLACE (date_time, '-', '') < v_dayid_end
                ORDER BY
                    volume DESC
                LIMIT 20
            ) 2_in_Loreal
        UNION ALL
            -- 3_in_Meiji
            SELECT
                *
            FROM
                (
                    SELECT
                        source AS website,
                        (
                            CASE brand
                            WHEN 'Maybelline' THEN
                                '美宝莲'
                            WHEN 'Loreal' THEN
                                '欧莱雅'
                            WHEN 'Meiji' THEN
                                '美即'
                            WHEN 'NYX' THEN
                                'NYX'
                            ELSE
                                'none'
                            END
                        ) AS keyword,
                        tag_name AS title,
                        ROUND(
                            IFNULL(
                                (
                                    POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                        IFNULL(pic_comments_num, 0),
                                        2
                                    ) + POWER(IFNULL(browse_num, 0), 2)
                                ) / (
                                    IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                ),
                                0
                            ),
                            0
                        ) AS volume,
                        IFNULL(pic_like_num, 0) AS dianzan_num,
                        IFNULL(pic_comments_num, 0) AS comment_num,
                        IFNULL(browse_num, 0) AS browse_num,
                        1 / 0 AS stay_time,
                        REPLACE (
                            LEFT (
                                SUBSTRING_INDEX(
                                    picture.nice.pic_url,
                                    '.info/in/',
                                    - 1
                                ),
                                10
                            ),
                            '/',
                            ''
                        ) AS upload_date,
                        pic_url AS download_url,
                        REPLACE (date_time, '-', '') AS sys_date,
                        'picture' AS file_type,
                        1 / 0 AS media_file_name,
                        1 / 0 AS save_dir,
                        0 AS video_not_found
                    FROM
                        picture.nice
                    WHERE
                        source = 'in'
                    AND brand = 'Meiji'
                    AND REPLACE (date_time, '-', '') >= v_dayid_first
                    AND REPLACE (date_time, '-', '') < v_dayid_end
                    ORDER BY
                        volume DESC
                    LIMIT 20
                ) 3_in_Meiji
            UNION ALL
                -- 4_in_NYX
                SELECT
                    *
                FROM
                    (
                        SELECT
                            source AS website,
                            (
                                CASE brand
                                WHEN 'Maybelline' THEN
                                    '美宝莲'
                                WHEN 'Loreal' THEN
                                    '欧莱雅'
                                WHEN 'Meiji' THEN
                                    '美即'
                                WHEN 'NYX' THEN
                                    'NYX'
                                ELSE
                                    'none'
                                END
                            ) AS keyword,
                            tag_name AS title,
                            ROUND(
                                IFNULL(
                                    (
                                        POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                            IFNULL(pic_comments_num, 0),
                                            2
                                        ) + POWER(IFNULL(browse_num, 0), 2)
                                    ) / (
                                        IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                    ),
                                    0
                                ),
                                0
                            ) AS volume,
                            IFNULL(pic_like_num, 0) AS dianzan_num,
                            IFNULL(pic_comments_num, 0) AS comment_num,
                            IFNULL(browse_num, 0) AS browse_num,
                            1 / 0 AS stay_time,
                            REPLACE (
                                LEFT (
                                    SUBSTRING_INDEX(
                                        picture.nice.pic_url,
                                        '.info/in/',
                                        - 1
                                    ),
                                    10
                                ),
                                '/',
                                ''
                            ) AS upload_date,
                            pic_url AS download_url,
                            REPLACE (date_time, '-', '') AS sys_date,
                            'picture' AS file_type,
                            1 / 0 AS media_file_name,
                            1 / 0 AS save_dir,
                            0 AS video_not_found
                        FROM
                            picture.nice
                        WHERE
                            source = 'in'
                        AND brand = 'NYX'
                        AND REPLACE (date_time, '-', '') >= v_dayid_first
                        AND REPLACE (date_time, '-', '') < v_dayid_end
                        ORDER BY
                            volume DESC
                        LIMIT 20
                    ) 4_in_NYX
                UNION ALL
                    -- 5_nice_Maybelline
                    SELECT
                        *
                    FROM
                        (
                            SELECT
                                source AS website,
                                (
                                    CASE brand
                                    WHEN 'Maybelline' THEN
                                        '美宝莲'
                                    WHEN 'Loreal' THEN
                                        '欧莱雅'
                                    WHEN 'Meiji' THEN
                                        '美即'
                                    WHEN 'NYX' THEN
                                        'NYX'
                                    ELSE
                                        'none'
                                    END
                                ) AS keyword,
                                tag_name AS title,
                                ROUND(
                                    IFNULL(
                                        (
                                            POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                IFNULL(pic_comments_num, 0),
                                                2
                                            ) + POWER(IFNULL(browse_num, 0), 2)
                                        ) / (
                                            IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                        ),
                                        0
                                    ),
                                    0
                                ) AS volume,
                                IFNULL(pic_like_num, 0) AS dianzan_num,
                                IFNULL(pic_comments_num, 0) AS comment_num,
                                IFNULL(browse_num, 0) AS browse_num,
                                1 / 0 AS stay_time,
                                REPLACE (
                                    LEFT (
                                        SUBSTRING_INDEX(
                                            picture.nice.pic_url,
                                            'upload/show/',
                                            - 1
                                        ),
                                        10
                                    ),
                                    '/',
                                    ''
                                ) AS upload_date,
                                pic_url AS download_url,
                                REPLACE (date_time, '-', '') AS sys_date,
                                'picture' AS file_type,
                                1 / 0 AS media_file_name,
                                1 / 0 AS save_dir,
                                0 AS video_not_found
                            FROM
                                picture.nice
                            WHERE
                                source = 'nice'
                            AND brand = 'Maybelline'
                            AND REPLACE (date_time, '-', '') >= v_dayid_first
                            AND REPLACE (date_time, '-', '') < v_dayid_end
                            ORDER BY
                                volume DESC
                            LIMIT 20
                        ) 5_nice_Maybelline
                    UNION ALL
                        -- 6_nice_Loreal
                        SELECT
                            *
                        FROM
                            (
                                SELECT
                                    source AS website,
                                    (
                                        CASE brand
                                        WHEN 'Maybelline' THEN
                                            '美宝莲'
                                        WHEN 'Loreal' THEN
                                            '欧莱雅'
                                        WHEN 'Meiji' THEN
                                            '美即'
                                        WHEN 'NYX' THEN
                                            'NYX'
                                        ELSE
                                            'none'
                                        END
                                    ) AS keyword,
                                    tag_name AS title,
                                    ROUND(
                                        IFNULL(
                                            (
                                                POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                    IFNULL(pic_comments_num, 0),
                                                    2
                                                ) + POWER(IFNULL(browse_num, 0), 2)
                                            ) / (
                                                IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                            ),
                                            0
                                        ),
                                        0
                                    ) AS volume,
                                    IFNULL(pic_like_num, 0) AS dianzan_num,
                                    IFNULL(pic_comments_num, 0) AS comment_num,
                                    IFNULL(browse_num, 0) AS browse_num,
                                    1 / 0 AS stay_time,
                                    REPLACE (
                                        LEFT (
                                            SUBSTRING_INDEX(
                                                picture.nice.pic_url,
                                                'upload/show/',
                                                - 1
                                            ),
                                            10
                                        ),
                                        '/',
                                        ''
                                    ) AS upload_date,
                                    pic_url AS download_url,
                                    REPLACE (date_time, '-', '') AS sys_date,
                                    'picture' AS file_type,
                                    1 / 0 AS media_file_name,
                                    1 / 0 AS save_dir,
                                    0 AS video_not_found
                                FROM
                                    picture.nice
                                WHERE
                                    source = 'nice'
                                AND brand = 'Loreal'
                                AND REPLACE (date_time, '-', '') >= v_dayid_first
                                AND REPLACE (date_time, '-', '') < v_dayid_end
                                ORDER BY
                                    volume DESC
                                LIMIT 20
                            ) 6_nice_Loreal
                        UNION ALL
                            -- 7_nice_Meiji
                            SELECT
                                *
                            FROM
                                (
                                    SELECT
                                        source AS website,
                                        (
                                            CASE brand
                                            WHEN 'Maybelline' THEN
                                                '美宝莲'
                                            WHEN 'Loreal' THEN
                                                '欧莱雅'
                                            WHEN 'Meiji' THEN
                                                '美即'
                                            WHEN 'NYX' THEN
                                                'NYX'
                                            ELSE
                                                'none'
                                            END
                                        ) AS keyword,
                                        tag_name AS title,
                                        ROUND(
                                            IFNULL(
                                                (
                                                    POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                        IFNULL(pic_comments_num, 0),
                                                        2
                                                    ) + POWER(IFNULL(browse_num, 0), 2)
                                                ) / (
                                                    IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                                ),
                                                0
                                            ),
                                            0
                                        ) AS volume,
                                        IFNULL(pic_like_num, 0) AS dianzan_num,
                                        IFNULL(pic_comments_num, 0) AS comment_num,
                                        IFNULL(browse_num, 0) AS browse_num,
                                        1 / 0 AS stay_time,
                                        REPLACE (
                                            LEFT (
                                                SUBSTRING_INDEX(
                                                    picture.nice.pic_url,
                                                    'upload/show/',
                                                    - 1
                                                ),
                                                10
                                            ),
                                            '/',
                                            ''
                                        ) AS upload_date,
                                        pic_url AS download_url,
                                        REPLACE (date_time, '-', '') AS sys_date,
                                        'picture' AS file_type,
                                        1 / 0 AS media_file_name,
                                        1 / 0 AS save_dir,
                                        0 AS video_not_found
                                    FROM
                                        picture.nice
                                    WHERE
                                        source = 'nice'
                                    AND brand = 'Meiji'
                                    AND REPLACE (date_time, '-', '') >= v_dayid_first
                                    AND REPLACE (date_time, '-', '') < v_dayid_end
                                    ORDER BY
                                        volume DESC
                                    LIMIT 20
                                ) 7_nice_Meiji
                            UNION ALL
                                -- 8_nice_NYX
                                SELECT
                                    *
                                FROM
                                    (
                                        SELECT
                                            source AS website,
                                            (
                                                CASE brand
                                                WHEN 'Maybelline' THEN
                                                    '美宝莲'
                                                WHEN 'Loreal' THEN
                                                    '欧莱雅'
                                                WHEN 'Meiji' THEN
                                                    '美即'
                                                WHEN 'NYX' THEN
                                                    'NYX'
                                                ELSE
                                                    'none'
                                                END
                                            ) AS keyword,
                                            tag_name AS title,
                                            ROUND(
                                                IFNULL(
                                                    (
                                                        POWER(IFNULL(pic_like_num, 0), 2) + POWER(
                                                            IFNULL(pic_comments_num, 0),
                                                            2
                                                        ) + POWER(IFNULL(browse_num, 0), 2)
                                                    ) / (
                                                        IFNULL(pic_like_num, 0) + IFNULL(pic_comments_num, 0) + IFNULL(browse_num, 0)
                                                    ),
                                                    0
                                                ),
                                                0
                                            ) AS volume,
                                            IFNULL(pic_like_num, 0) AS dianzan_num,
                                            IFNULL(pic_comments_num, 0) AS comment_num,
                                            IFNULL(browse_num, 0) AS browse_num,
                                            1 / 0 AS stay_time,
                                            REPLACE (
                                                LEFT (
                                                    SUBSTRING_INDEX(
                                                        picture.nice.pic_url,
                                                        'upload/show/',
                                                        - 1
                                                    ),
                                                    10
                                                ),
                                                '/',
                                                ''
                                            ) AS upload_date,
                                            pic_url AS download_url,
                                            REPLACE (date_time, '-', '') AS sys_date,
                                            'picture' AS file_type,
                                            1 / 0 AS media_file_name,
                                            1 / 0 AS save_dir,
                                            0 AS video_not_found
                                        FROM
                                            picture.nice
                                        WHERE
                                            source = 'nice'
                                        AND brand = 'Meiji'
                                        AND REPLACE (date_time, '-', '') >= v_dayid_first
                                        AND REPLACE (date_time, '-', '') < v_dayid_end
                                        ORDER BY
                                            volume DESC
                                        LIMIT 20
                                    ) 8_nice_NYX
                                UNION ALL
                                    -- 9_美拍_美宝莲 
                                    SELECT
                                        *
                                    FROM
                                        (
                                            SELECT DISTINCT
                                                source AS website,
                                                (
                                                    CASE keyword
                                                    WHEN '美宝莲' THEN
                                                        '美宝莲'
                                                    WHEN 'Loreal' THEN
                                                        '欧莱雅'
                                                    WHEN 'Meiji' THEN
                                                        '美即'
                                                    WHEN 'NYX' THEN
                                                        'NYX'
                                                    ELSE
                                                        'none'
                                                    END
                                                ) AS keyword,
                                                title AS title,
                                                ROUND(
                                                    IFNULL(
                                                        (
                                                            POWER(IFNULL(zanCount, 0), 2) + POWER(IFNULL(commentCount, 0), 2) + POWER(IFNULL(watchCount, 0), 2) + POWER(IFNULL(netCount, 0), 2) + POWER(IFNULL(favCount, 0), 2) + POWER(IFNULL(tanmuCount, 0), 2) + POWER(IFNULL(shareCount, 0), 2)
                                                        ) / (
                                                            IFNULL(zanCount, 0) + IFNULL(commentCount, 0) + IFNULL(watchCount, 0) + IFNULL(netCount, 0) + IFNULL(favCount, 0) + IFNULL(tanmuCount, 0) + IFNULL(shareCount, 0)
                                                        ),
                                                        0
                                                    ),
                                                    0
                                                ) AS volume,
                                                IFNULL(zanCount, 0) AS dianzan_num,
                                                IFNULL(commentCount, 0) AS comment_num,
                                                IFNULL(watchCount, 0) AS browse_num,
                                                length AS stay_time,
                                                REPLACE (uploadTime, '-', '') AS upload_date,
                                                downUrl AS download_url,
                                                REPLACE (LEFT(cralwTime, 10), '-', '') AS sys_date,
                                                'media' AS file_type,
                                                1 / 0 AS media_file_name,
                                                1 / 0 AS save_dir,
                                                0 AS video_not_found
                                            FROM
                                                media.7videos
                                            WHERE
                                                source = '美拍'
                                            AND keyword = '美宝莲'
                                            AND downUrl <> ''
                                            AND REPLACE (LEFT(cralwTime, 10), '-', '') >= v_dayid_first
                                            AND REPLACE (LEFT(cralwTime, 10), '-', '') < v_dayid_end
                                            ORDER BY
                                                volume DESC
                                            LIMIT 20
                                        ) 9_美拍_美宝莲;

DECLARE
    CONTINUE HANDLER FOR NOT FOUND
SET var = 1;

/*是否达到记录的末尾控制变量*/
OPEN cur;

FETCH cur INTO v_website,
 v_keyword,
 v_title,
 v_volume,
 v_dianzan_num,
 v_comment_num,
 v_browse_num,
 v_stay_time,
 v_upload_date,
 v_download_url,
 v_sys_date,
 v_file_type,
 v_media_file_name,
 v_save_dir,
 v_video_not_found;

/*获取第一条记录*/
WHILE var <> 1 DO
    INSERT INTO loreal.`social_scan` (
        website,
        keyword,
        title,
        volume,
        dianzan_num,
        comment_num,
        browse_num,
        stay_time,
        upload_date,
        download_url,
        sys_date,
        file_type,
        media_file_name,
        save_dir,
        video_not_found
    )
VALUES
    (
        v_website,
        v_keyword,
        v_title,
        v_volume,
        v_dianzan_num,
        v_comment_num,
        v_browse_num,
        v_stay_time,
        v_upload_date,
        v_download_url,
        v_sys_date,
        v_file_type,
        v_media_file_name,
        v_save_dir,
        v_video_not_found
    );

FETCH cur INTO v_website,
 v_keyword,
 v_title,
 v_volume,
 v_dianzan_num,
 v_comment_num,
 v_browse_num,
 v_stay_time,
 v_upload_date,
 v_download_url,
 v_sys_date,
 v_file_type,
 v_media_file_name,
 v_save_dir,
 v_video_not_found;

/*取下一条记录*/
END
WHILE;

CLOSE cur;

END

//
delimiter ;
View Code

3、JOB事件:ddl_p_social_scan_job

DROP EVENT
IF EXISTS p_social_scan_job;
delimiter //


CREATE DEFINER = `root`@`%` EVENT `p_social_scan_job` ON SCHEDULE EVERY 1 DAY STARTS '2016-05-16 02:00:00' ON COMPLETION PRESERVE ENABLE DO

BEGIN
    CALL loreal.p_social_scan (
        cast(
            (
                date_sub(curdate(), INTERVAL 1 DAY)
            ) AS SIGNED INTEGER
        ),
        cast(CURDATE() AS SIGNED INTEGER)
    ) ;
END//
delimiter ;
View Code

 

posted @ 2016-05-16 20:32  岑亮  阅读(664)  评论(0编辑  收藏  举报