【SQL】窗口函数:求数据组内累计值和累计百分比

〇、概述

1、所需资料

窗口函数实现组内百分比、累计值、累计百分比:https://blog.csdn.net/weixin_39751959/article/details/88828922

2、背景

需求:不同场景不同规则下各区间内基线值的计算和MQ发送

计算位于场景列表内的各场景组合(scene),满足不同规则(rule)某区间dataRange(如20%-80%)的基线平均值

其他场景,计算平均数作为基线值

一、概述

1、输入信息

传入参数:

{"rules":
     [{"dataRange":[20,80],"ruleTypeName":"标准基线","duration":30,"ruleType":"1","ruleId":"123"},
      {"dataRange":[0,20],"ruleTypeName":"管理基线","duration":60,"ruleType":"2","ruleId":"234"},
      {"dataRange":[80,100],"ruleTypeName":"异常基线","duration":90,"ruleType":"3","ruleId":"123"}],
 "modules":
     [{"moduleNumber":"ltc_contract_basic_info","moduleName":"合同基本信息","calField":"create_time","nextFields":["ltc_contract_basic_info_id"]},
      {"moduleNumber":"ltc_contract_assess_basic_info","moduleName":"合同评审信息","preFields":["ltc_contract_basic_info_id"],"nextFields":["ltc_contract_assess_basic_info_id"]},
      {"moduleNumber":"ltc_contract_assess_record","moduleName":"合同评审记录","preFields":["ltc_contract_assess_basic_info_id"],"calField":"contract_assess_end_date"}],"
 scenes":
     {"sceneKeys":
         [{"values":["010101","010102","0102","0103","0103"],"key":"sales_scenario"},
        {"values":["01","02","03","04","05"],"key":"contract_register_type_code"}],
"sceneGroups":
     [{"contract_register_type_code":"03","sales_scenario":"0101"},
     {"contract_register_type_code":"03","sales_scenario":"020102"}]},
"definition":
    {"definitionName":"从注册到评审的时间基线","version":1,"definitionId":"123"}}

2、SQL查询结果

二、实现过程

1、初始信息

根据json可以按照模板生成下列SQL

SELECT
    sales_scenario,
	contract_register_type_code,
	-- ltc_contract_basic_info.create_time,
	-- ltc_contract_assess_record.contract_assess_end_date,
	DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
	1 num_every
FROM
	ltc_contract_basic_info
LEFT JOIN ltc_contract_assess_basic_info 
ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
LEFT JOIN ltc_contract_assess_record 
ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id 
WHERE
	ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
	and
	(sales_scenario,contract_register_type_code) in (('0101','03'),('0102','01'),('020102','03'))

设置根据时间差排序,得到如下结果

下一步思路:计算subtime值的百分比

2、计算组内累计值

sum(num_every) over(partition by sales_scenario,contract_register_type_code order by subtime) rk_combine

 整体:

select 
	sales_scenario,
	contract_register_type_code,
	subtime,
	sum(num_every) over(partition by sales_scenario,contract_register_type_code order by subtime) rk_combine
from (
	SELECT
		sales_scenario,
		contract_register_type_code,
		-- ltc_contract_basic_info.create_time,
		-- ltc_contract_assess_record.contract_assess_end_date,
		DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
		1 num_every
    FROM
		ltc_contract_basic_info
	LEFT JOIN ltc_contract_assess_basic_info 
	ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
	LEFT JOIN ltc_contract_assess_record 
	ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id 
	WHERE
		ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
		and
		(sales_scenario,contract_register_type_code) in (('0101','03'),('0102','01'),('020102','03'))
) res_start

3、获得组内最大值

max(rk_combine) over(partition by sales_scenario,contract_register_type_code) max_rk_combine

 整体:

select 
	sales_scenario,
	contract_register_type_code,
	subtime,
	rk_combine,
	max(rk_combine) over(partition by sales_scenario,contract_register_type_code) max_rk_combine
from
(

	select 
			sales_scenario,
			contract_register_type_code,
			subtime,
			sum(num_every) over(partition by sales_scenario,contract_register_type_code order by subtime) rk_combine
	from 
		(
		SELECT
			sales_scenario,
			contract_register_type_code,
			-- ltc_contract_basic_info.create_time,
			-- ltc_contract_assess_record.contract_assess_end_date,
			DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
			1 num_every
		FROM
			ltc_contract_basic_info
		LEFT JOIN ltc_contract_assess_basic_info 
		ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
		LEFT JOIN ltc_contract_assess_record 
		ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id 
		WHERE
			ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
			and
			(sales_scenario,contract_register_type_code) in (('0101','03'),('0102','01'),('020102','03'))
		) res_start
) res_middle

4、获得百分比

round(rk_combine/max_rk_combine,2)*100 percent

整体:

(select
	sales_scenario,
	contract_register_type_code,
	subtime,
	round(rk_combine/max_rk_combine,2)*100 percent,
	null default_value
from 
(
	select 
		sales_scenario,
		contract_register_type_code,
		subtime,
		rk_combine,
		max(rk_combine) over(partition by sales_scenario,contract_register_type_code) max_rk_combine
	from
	(
		select 
			sales_scenario,
			contract_register_type_code,
			subtime,
			sum(num_every) over(partition by sales_scenario,contract_register_type_code order by subtime) rk_combine
		from 
		(
			SELECT
				sales_scenario,
				contract_register_type_code,
				-- ltc_contract_basic_info.create_time,
				-- ltc_contract_assess_record.contract_assess_end_date,
				DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
				1 num_every
			FROM
				ltc_contract_basic_info
			LEFT JOIN ltc_contract_assess_basic_info 
			ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
			LEFT JOIN ltc_contract_assess_record 
			ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id 
			WHERE
				ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
				and
				(sales_scenario,contract_register_type_code) in (('0101','03'),('0102','01'),('020102','03'))
		) res_start
	) res_middle
) res_end)

 获得位于场景组合的基线均值结果

 5、获得默认基线值

(
SELECT
	sales_scenario,
	contract_register_type_code,
	DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
	0 percent,
	avg(DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time)) over() default_value
FROM
	ltc_contract_basic_info
LEFT JOIN ltc_contract_assess_basic_info 
ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
LEFT JOIN ltc_contract_assess_record 
ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id 
WHERE
	ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
	and
	(
		sales_scenario is null
		or
		contract_register_type_code is null
		or
		(sales_scenario,contract_register_type_code) not in (('0101','03'),('0102','01'),('020102','03')
	)
)		
)

6、结果组合

(select
	sales_scenario,
	contract_register_type_code,
	subtime,
	round(rk_combine/max_rk_combine,2)*100 percent,
	null default_value
from 
(
	select 
		sales_scenario,
		contract_register_type_code,
		subtime,
		rk_combine,
		max(rk_combine) over(partition by sales_scenario,contract_register_type_code) max_rk_combine
	from
	(
		select 
			sales_scenario,
			contract_register_type_code,
			subtime,
			sum(num_every) over(partition by sales_scenario,contract_register_type_code order by subtime) rk_combine
		from 
		(
			SELECT
				sales_scenario,
				contract_register_type_code,
				-- ltc_contract_basic_info.create_time,
				-- ltc_contract_assess_record.contract_assess_end_date,
				DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
				1 num_every
			FROM
				ltc_contract_basic_info
			LEFT JOIN ltc_contract_assess_basic_info 
			ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
			LEFT JOIN ltc_contract_assess_record 
			ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id 
			WHERE
				ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
				and
				(sales_scenario,contract_register_type_code) in (('0101','03'),('0102','01'),('020102','03'))
		) res_start
	) res_middle
) res_end)
union all
(
SELECT
	sales_scenario,
	contract_register_type_code,
	DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time) subtime,
	0 percent,
	avg(DATE_PART('MINUTE', ltc_contract_assess_record.contract_assess_end_date - ltc_contract_basic_info.create_time)) over() default_value
FROM
	ltc_contract_basic_info
LEFT JOIN ltc_contract_assess_basic_info 
ON ltc_contract_basic_info.ltc_contract_basic_info_id = ltc_contract_assess_basic_info.ltc_contract_basic_info_id
LEFT JOIN ltc_contract_assess_record 
ON ltc_contract_assess_basic_info.ltc_contract_assess_basic_info_id = ltc_contract_assess_record.ltc_contract_assess_basic_info_id 
WHERE
	ltc_contract_assess_record.contract_assess_end_date > now( ) - INTERVAL '90 days'
	and
	(
		sales_scenario is null
		or
		contract_register_type_code is null
		or
		(sales_scenario,contract_register_type_code) not in (('0101','03'),('0102','01'),('020102','03')
	)
)		
)

 三、步骤总结

1、计算组内累计值

sum(saleroom) over(partition by area order by date) ---求组内累计值

2、计算组内总计值/最大值

sum(saleroom) over(partition by area order by area)    ---求组内总计值

3、累计值/总计值

组内百分比= saleroom / total_value

累计百分比 = aggregate_value/total_value

四、总结

1、过程

用1表示每项的值

分组计算,按照subtime排序,得到累计值

求出最大的累计值,作为和

用各项累计值除以每一项的和,得到百分比

2、结果计算与返回

package com.boulderai.baseline.cal.service.impl;

import com.boulderai.baseline.cal.mq.MessageProducer;
import com.boulderai.baseline.cal.service.BaseLineCalService;
import cn.hutool.json.JSONUtil;
import com.boulderai.timeline.api.bigdata.BaseLineCalRequest;
import com.boulderai.timeline.api.bigdata.BaseLineMessage;
import org.apache.commons.lang3.StringUtils;
import org.springframework.jdbc.core.JdbcTemplate;
import org.springframework.stereotype.Service;
import org.springframework.util.CollectionUtils;
import lombok.extern.slf4j.Slf4j;
import javax.annotation.Resource;
import java.math.BigDecimal;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.stream.Collectors;

/**
 * @Title:BaseLineCalServiceImpl
 * @Descript:
 * @author: yanwei (yanwei@yanxxcloud.cn)
 * @date:2022/8/3
 **/
@Service
@Slf4j
public class BaseLineCalServiceImpl implements BaseLineCalService {

    @Resource
    private MessageProducer messageProducer;

    @Resource
    private JdbcTemplate jdbcTemplate;

    /**
     * @author 刘金辉
     * @param request
     * @return 发送MQ是否成功
     */
    @Override
    public Boolean calculate(BaseLineCalRequest request) {
        List<BaseLineMessage> messageList = new ArrayList<>();
        request.getRules().forEach(rule -> {
            String sql = request.computeSql(rule.getRuleType());
            String[] union_sql_list = sql.split("union all");
            String sql_in_scene_list = union_sql_list[0];
            Integer[] dataRange = rule.getDataRange();
            List<Map<String, Object>> resListMiddle = jdbcTemplate.queryForList(sql_in_scene_list);
            //sceneKeysList为场景的集合,如[delivery_way_code,contract_register_type_code]
            List<String> sceneKeysList = request.getScenes()
                    .getSceneKeys().stream()
                    .map(x -> x.getKey()).collect(Collectors.toList());
            List<Map<String, Object>> resListUltimate = new ArrayList<>();
            //inSceneListRes为查询到的拆分场景的结果集
            List<Map<String, Object>> inSceneListRes = jdbcTemplate.queryForList(sql_in_scene_list).stream()
                    .filter(x -> Math.round(Double.valueOf(x.get("percent").toString())) >= dataRange[0]
                            && Math.round(Double.valueOf(x.get("percent").toString())) <= dataRange[1])
                    .collect(Collectors.toList());
            request.getScenes().getSceneGroups().forEach(sceneValueMap -> {
                //sceneValueMap为每一个场景组合,如[delivery_way_code -> 01,contract_register_type_code -> 01,02]
                Map<String, Object> resInMap = new HashMap<String, Object>(); //构建每个要插入的map
                sceneValueMap.entrySet().forEach(everySceneCombine->{
                    resInMap.put(everySceneCombine.getKey(),everySceneCombine.getValue());
                });
                Double baselineValue = computeResult2New(sceneValueMap, sceneKeysList, inSceneListRes);
                resInMap.put("value", baselineValue);
                resListUltimate.add(resInMap);
            });
            int default_value = 0;
            String sql_not_in_scene_list = union_sql_list[1];
            List<Map<String, Object>> result = jdbcTemplate.queryForList(sql_not_in_scene_list);
            if (!CollectionUtils.isEmpty(result)) {
                default_value = Math.round(Float.parseFloat(result.get(0).get("default_value").toString()));
            }
            BaseLineMessage ansMsg = new BaseLineMessage();
            ansMsg.setDefinition(request.getDefinition());
            ansMsg.setRule(rule);
            ansMsg.setValues(resListUltimate);
            ansMsg.setDefaultValue(default_value);// 其他场景的平均值,如何确定
            messageList.add(ansMsg);
        });
        log.info("cal:{}", JSONUtil.toJsonStr(request));
        log.info("cal.return:{}", JSONUtil.toJsonStr(messageList));
        messageProducer.SendCalMessageList(messageList);
        return true;
    }

    /**
     * 场景组合拆分为子场景,去数据库查询
     * sceneValueMap为每一个场景组合,如[delivery_way_code -> 01,contract_register_type_code -> 01,02, value=10]
     * sceneKeysList为场景的集合,如[delivery_way_code,contract_register_type_code]
     * inSceneListRes为查询到的拆分场景的结果集
     * @param group
     * @param sceneKeys
     * @param result
     * @return
     */
    private Double computeResult2New(Map<String, String> sceneValueMap, List<String> sceneKeysList, List<Map<String, Object>> inSceneListResList) {
        Double avgOfCombineSceneVal = 0.0;
        List<String> splitSceneValList = Arrays.asList("");
        String[] sceneKeysArray = String.join(",", sceneKeysList).split(",");
        for (String sceneKey : sceneKeysArray) {
            String assignKeyValue = String.valueOf(sceneValueMap.get(sceneKey));
            String[] splitValueArray = assignKeyValue.split(","); //assignKeyValue=01,02 | splitValueArray=[01][02]
            splitSceneValList = calgroup(splitValueArray, splitSceneValList); //splitSceneValList=[01][02]=>splitSceneValList=[01,01][01,02]
        }
        for (String everySceneCombineStr : splitSceneValList) {  //splitSceneValList=[“01,01”,"01,02"] everySceneCombineStr="01,02"
            List<Map<String, Object>> filterEverySceneCombineResList = inSceneListResList.stream().filter(qryResMap -> {
                boolean fetched = true;
                String[] everySceneStr = everySceneCombineStr.split(",");  //["01", "02"]
                for (int i = 0; i < everySceneStr.length; i++) {
                    fetched = everySceneStr[i].equals(String.valueOf(qryResMap.get(sceneKeysArray[i]))); //找到满足条件的子数据
                    if (!fetched) {
                        return false;
                    }
                }
                return fetched;
            }).collect(Collectors.toList());//过滤得到满足每个场景组合的数据
            Double avgRes = filterEverySceneCombineResList.stream().map(p -> Math.round(Double.valueOf(p.get("subtime").toString())))
                    .collect(Collectors.averagingLong(Long::longValue));
            avgOfCombineSceneVal += avgRes;
        }
        return avgOfCombineSceneVal/splitSceneValList.size();
    }

    /**
     * @param group
     * @param sceneKeys
     * @param result
     * @return
     */
    private BigDecimal calResultBefore(Map<String, String> group, List<String> sceneKeys, List<Map<String, Object>> result) {
        List<String> rarry = Arrays.asList("");
        String[] arrKeys = String.join(",", sceneKeys).split(",");
        for (String key : arrKeys) {
            String v = String.valueOf(group.get(key));
            String[] varr = v.split(",");
            rarry = calgroup(varr, rarry);
        }
        //fetch data
        List<Map<String, Object>> results = new ArrayList<>();
        for (String r : rarry) {  //,020102,03
            List<Map<String, Object>> subList = result.stream().filter(rmap -> {
                boolean fetched = true;
                //String[] var = (String[]) Arrays.stream(r.split(",")).skip(0).collect(Collectors.toList()).toArray();
                String[] var = r.split(",");  //["", "020102", "03"]
                for (int i = 0; i < var.length; i++) {
                    fetched = var[i].equals(String.valueOf(rmap.get(arrKeys[i])));
                    if (!fetched) {
                        return false;
                    }
                }
                return fetched;
            }).collect(Collectors.toList());
            results.addAll(subList);
        }
        //计算结果
        return BigDecimal.ZERO;
    }

    /**
     * @param vs
     * @param sub
     * @return
     */
    private List<String> calgroup(String[] vs, List<String> sub) {
        List<String> ans = new ArrayList<>();

        for (String v : vs) {
            sub.stream().forEach(s -> {
                if (StringUtils.isBlank(s)) {
                    ans.add(v);
                } else {
                    ans.add(s + "," + v);
                }
            });
        }
        return ans;
    }

}

 

posted @ 2022-08-16 10:28  哥们要飞  阅读(1224)  评论(0编辑  收藏  举报