posts - 26,comments - 0,views - 12860
# 格网算法计算数据集区域数据密集度
import time
import random
import numpy as np
import pandas as pd

# 模拟数据集
def create_data():
    data_x = []
    data_y = []
    data = []
    for i in range(300000):
        x = random.randrange(0, 300000)
        y = random.randrange(-1500, 1500)
        data_x.append(x)
        data_y.append(y)
        data.append([x, y])
    return data_x, data_y, data

# 计算网格数据密集度
def calculate_density(gridSize, bounds):
    data_x, data_y, data = create_data()

    # 计算网格边界
    x = np.arange(bounds[0][0],
                  bounds[1][0] + gridSize,
                  gridSize)
    y = np.arange(bounds[0][1],
                  bounds[1][1] + gridSize,
                  gridSize)

    # 使用pandas构建网格
    grid = pd.DataFrame(0, index=x[:-1], columns=y[:-1])

    # 将数据分配到网格中
    for point in data:
        if point[0] < bounds[0][0] \
            or point[0] > bounds[1][0] \
            or point[1] < bounds[0][1] \
            or point[1] > bounds[1][1]:
            continue

        # 计算数据在那个网格内
        x_index = int((point[0] - bounds[0][0]) // gridSize)
        y_index = int((point[1] - bounds[0][1]) // gridSize)

        # 将网格计数 +1
        grid.iloc[x_index, y_index] += 1

    # 计算每个网格的密度
    densities = grid.to_numpy() / (gridSize * gridSize)

    # 将密度添加到数据中
    for point in data:
        if point[0] < bounds[0][0] \
            or point[0] > bounds[1][0] \
            or point[1] < bounds[0][1] \
            or point[1] > bounds[1][1]:
            continue

        # 计算数据在那个网格内
        x_index = int((point[0] - bounds[0][0]) // gridSize)
        y_index = int((point[1] - bounds[0][1]) // gridSize)

        point.append(densities[x_index, y_index])

    return densities, data

if __name__ == "__main__":
    start_time = time.time()
    densities, data = calculate_density(100,
                                  [[0, -1500], [300000, 1500]])
    end_time = time.time()
    print("消耗的时间:", end_time - start_time)
    print(densities)
// 生成模拟数据集
function createData() {
    let data = []
    for (i = 0; i < 10000; i++) {
        let data_y = (Math.random() - Math.random())
        let data_x = i
        data.push([data_x, data_y])
    }
    return data
}

// 计算最大值
function getMax(data){
    let len = data.length
    let max = -Infinity

    while(len--){
        max = data[len] > max ? data[len] : max
    }
    return max
}

// 计算最小值
function getMin(data){
    let len = data.length
    let min = Infinity

    while(len--){
        min = data[len] < min ? data[len] : min
    }
    return min
}

// 计算区域数据密集度
function calculateDensity(gridSize){
    let data = createData()
    let x = data.map(v => v[0])
    let y = data.map(v => v[1])
    // 计算数据极值
    // min和max在数据量大时崩溃,125519
    // let min_x = Math.min(...x)
    // let max_x = Math.max(...x)
    // let min_y = Math.min(...y)
    // let max_y = Math.max(...y)

    let min_x = getMin(x)
    let max_x = getMax(x)
    let min_y = getMin(y)
    let max_y = getMax(y)


    // 计算数据区域范围
    let width = max_x - min_x
    let height = max_y - min_y

    // 计算宽高的分段数
    let x_count = data.length / 100
    let y_count = 100

    let x_gridSize = width / (x_count - 1)
    let y_gridSize = height / (y_count - 1)

    // 创建嵌套数组模拟二维数组构建网格
    let counts = Array.from(Array(x_count), () => Array.from(Array(y_count), () => new Array(3).fill(0)))

    console.log(counts)

    data.forEach((v) => {
        if(v[0] < min_x || v[0] > max_x || v[1] < min_y || v[1] > max_y){
            return
        }

        // 计算数据所在的区域坐标
        let x_index = Math.floor((v[0] - min_x) / x_gridSize)
        let y_index = Math.floor((v[1] - min_y) / y_gridSize)

        counts[x_index][y_index][2] += 1
    })

    let densities = counts.map((count, x_index) => count.map((v, y_index) => {
        v[2] /= gridSize
        v[0] = x_index
        v[1] = y_index
        return v
    }))

    console.log(densities)
    return densities
}

let start_time = Date.now()
calculateDensity(10)
let end_time = Date.now()
console.log('消耗的时间:', end_time - start_time)
posted on   转眼春夏秋冬如烟  阅读(237)  评论(0编辑  收藏  举报
相关博文:
阅读排行:
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· 别再用vector<bool>了!Google高级工程师:这可能是STL最大的设计失误
· 单元测试从入门到精通
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理
· 上周热点回顾(3.3-3.9)
< 2025年3月 >
23 24 25 26 27 28 1
2 3 4 5 6 7 8
9 10 11 12 13 14 15
16 17 18 19 20 21 22
23 24 25 26 27 28 29
30 31 1 2 3 4 5

点击右上角即可分享
微信分享提示