【Hadoop离线基础总结】关键路径转化率分析(漏斗模型)
关键路径转化
- 需求
在一条指定的业务流程中,各个步骤的完成人数及相对上一个步骤的百分比
- 模型设计
定义好业务流程中的页面标识Step1、 /item Step2、 /category Step3、 /index Step4、 /order
CREATE TABLE dw_oute_numbs AS SELECT 'step1' AS step,COUNT(DISTINCT remote_addr) AS numbs FROM ods_click_pageviews WHERE datestr = '20130920' AND request LIKE '/item%' UNION ALL SELECT 'step2' AS step,COUNT(DISTINCT remote_addr) AS numbs FROM ods_click_pageviews WHERE datestr = '20130920' AND request LIKE '/category%' UNION ALL SELECT 'step3' AS step,COUNT(DISTINCT remote_addr) AS numbs FROM ods_click_pageviews WHERE datestr = '20130920' AND request LIKE '/order%' UNION ALL SELECT 'step4' AS step,COUNT(DISTINCT remote_addr) AS numbs FROM ods_click_pageviews WHERE datestr = '20130920' AND request LIKE '/index%';
+---------------------+----------------------+--+ | dw_oute_numbs.step | dw_oute_numbs.numbs | +---------------------+----------------------+--+ | step1 | 1029 | | step2 | 1029 | | step3 | 1028 | | step4 | 1018 | +---------------------+----------------------+--+
-
求取每一步相对于上一步的转化率
SELECT t.rnnumbs/t.rrnumbs AS rate FROM ( SELECT rn.step AS rnstep, rn.numbs AS rnnumbs, rr.step AS rrstep, rr.numbs AS rrnumbs FROM dw_oute_numbs rn INNER JOIN dw_oute_numbs rr WHERE rr.step = 'step1' ) t;
-
求取每一步相对于第一步的转化率
SELECT t.rrnumbs/t.rnnumbs AS rate FROM ( SELECT rn.step AS rnstep, rn.numbs AS rnnumbs, rr.step AS rrstep, rr.numbs AS rrnumbs FROM dw_oute_numbs rn INNER JOIN dw_oute_numbs rr WHERE CAST(substr(rn.step,5,1) AS INT) = CAST(SUBSTR(rr.step,5,1) AS INT) - 1 ) t;
-
也可以将两个合并到一起
SELECT abs.step,abs.numbs,abs.rate AS abs_ratio, rel.rate AS leakage_rate FROM ( SELECT tmp.rnstep AS step,tmp.rnnumbs AS numbs, tmp.rnnumbs/tmp.rrnumbs AS rate FROM ( SELECT rn.step AS rnstep, rn.numbs AS rnnumbs, rr.step AS rrstep, rr.numbs AS rrnumbs FROM dw_oute_numbs rn INNER JOIN dw_oute_numbs rr ) tmp WHERE tmp.rrstep = 'step1' ) abs LEFT OUTER JOIN ( SELECT tmp.rrstep AS step,tmp.rrnumbs/tmp.rnnumbs AS rate FROM ( SELECT rn.step AS rnstep, rn.numbs AS rnnumbs, rr.step AS rrstep, rr.numbs AS rrnumbs FROM dw_oute_numbs rn INNER JOIN dw_oute_numbs rr ) tmp WHERE CAST(substr(tmp.rnstep,5,1) AS INT) = CAST(SUBSTR(tmp.rrstep,5,1) AS INT) - 1 ) rel ON abs.step=rel.step;