minizip-demo

 

http://www.winimage.com/zLibDll/minizip.html

 

minizip https://github.com/domoticz/minizip

 

zlib  https://github.com/madler/zlib/archive/refs/tags/v1.3.1.zip

 

编译 zlib, minizip,然后用下面的工程包含 minizip的库

#include "elapse.h"
#include <iostream>
#include <vector>
#include <fstream>
#include <sstream>
#include <string>
#include <thread>
#include <mutex>
#include <minizip/zip.h>


const int iMaxSize = 10000000; // 总数据量
const int numThreads = 5;    // 线程数
const int iMaxChunkSize = 200000;

// 如果数据量较大,每个文件写入数据超过 iMaxChunkSize,则平均写入数据到10个文件中;
// 如果数据量不是很大,则 每个文件写入 iMaxChunkSize 条数据即可;
int chunkSize = ((iMaxSize / numThreads)>iMaxChunkSize) ? (iMaxSize / numThreads) : iMaxChunkSize;  // 每个线程处理的数据量

std::mutex fileMutex;  // 文件写入锁,防止多个线程同时写入导致数据混乱

typedef struct{
    int iID = 0;
    float fMaxTemp = 0.0f;
    float fMinTemp = 0.0f;
    float fAvgTemp = 0.0f;
    std::string strName="";

}tempDef;

void test1(){
    std::cout<<std::endl<<"==================== test1 ===================="<<std::endl;
    std::vector<tempDef> vecList;

    // add data to vector
    {
        ElapseMillsec  elap_vecpush("elap_vecpush");  // 211ms
        vecList.reserve(iMaxSize*1.2);
        for(int i=0;i<iMaxSize;i++){
            tempDef td;
            td.iID = i;
            td.fMaxTemp = i+0.1f;
            td.fMinTemp = i+0.1f;
            td.fAvgTemp = i+0.1f;
            td.strName = std::string("test_") + std::to_string(i);
            vecList.push_back(td);
        }
    }

    // save data to csv
    {
        ElapseMillsec  elap_save("elap_save");

        // 如果文件不存在则创建;如果存在则覆盖
        std::ofstream ofs("test1.csv", std::ios::out); // 2546ms --> 100w data -->也和设备性能有关系
        ofs << "id,name,max,min,avg\n";
        for(auto& td : vecList){
            ofs << td.iID << "," << td.strName << "," << td.fMaxTemp << "," << td.fMinTemp << "," << td.fAvgTemp << "\n";
        }
        ofs.close();
        std::cout << "save done" << std::endl;
    }
    std::cout<<std::endl;
}

void test2(){
    std::cout<<std::endl<<"==================== test2 ===================="<<std::endl;
    std::vector<tempDef> vecList;

    // 添加数据到 vector
    {
        ElapseMillsec elap_vecpush("elap_vecpush2");  // 211ms
        vecList.reserve(iMaxSize * 1.2);
        for (int i = 0; i < iMaxSize; ++i) {
            vecList.emplace_back(tempDef{i, i + 0.1f, i + 0.1f, i + 0.1f, "test_" + std::to_string(i)});
        }
    }

    // 将数据保存到 CSV
    {
        ElapseMillsec elap_save("elap_save2");

        std::ofstream ofs("test2.csv", std::ios::out | std::ios::binary); // 使用二进制模式打开文件以提高性能
        if (!ofs) {
            std::cerr << "Error opening file!" << std::endl;
            return;
        }

        ofs << "id,name,max,min,avg\n";  // 写入表头

        std::ostringstream oss;
        for (const auto& td : vecList) {
            oss << td.iID << "," << td.strName << "," << td.fMaxTemp << "," << td.fMinTemp << "," << td.fAvgTemp << "\n";
        }

        ofs.write(oss.str().c_str(), oss.str().size());  // 一次性写入所有数据到文件
        ofs.close();

        std::cout << "save done" << std::endl;
    }
    std::cout<<std::endl;
}




// =========== 使用多线程读写 ============
// 线程处理函数,负责将一部分数据写入文件
void processChunk(int threadID, const std::vector<tempDef>& vecList, int start, int end) {
    if(start >= end || start> (int)vecList.size()){
        return;
    }
    std::ofstream ofs("test3_" + std::to_string(threadID) + ".csv", std::ios::out | std::ios::binary);
    if (!ofs) {
        std::cerr << "Error opening file for thread " << threadID << std::endl;
        return;
    }

    // 设置1MB缓冲区,减少I/O次数
    const size_t buf_size = 1 << 20;
    char* buffer = new char[buf_size];
    ofs.rdbuf()->pubsetbuf(buffer, buf_size);

    std::string row;
    row.reserve(64);

    row = "id,name,max,min,avg\n";
    ofs.write(row.c_str(), row.size());

    for (int i = start; i < end && i<(int)vecList.size(); ++i) {
        const auto& td = vecList[i];
        row = std::to_string(td.iID) + "," + td.strName + "," +
              std::to_string(td.fMaxTemp) + "," +
              std::to_string(td.fMinTemp) + "," +
              std::to_string(td.fAvgTemp) + "\n";

        ofs.write(row.c_str(), row.size());
    }

    ofs.close();
    delete[] buffer;
    std::cout << "Thread " << threadID << " done writing" << std::endl;
}

// 主函数,启动线程
void test3() {
    std::cout<<std::endl<<"==================== test3 ===================="<<std::endl;

    std::vector<tempDef> vecList;
    vecList.reserve(iMaxSize);

    // 数据初始化
    {
        ElapseMillsec elap_vecpush("elap_vecpush3");
        for (int i = 0; i < iMaxSize; ++i) {
            vecList.emplace_back(tempDef{i, i + 0.1f, i + 0.1f, i + 0.1f, "test_" + std::to_string(i)});
        }
    }

    ElapseMillsec elap_save("elap_save3");
    // 启动多个线程,每个线程处理 10 万条数据
    std::vector<std::thread> threads;
    for (int i = 0; i < numThreads; ++i) {
        int start = i * chunkSize;
        int end = (i + 1) * chunkSize;
        threads.emplace_back(processChunk, i, std::ref(vecList), start, end);
    }

    // 等待所有线程完成
    for (auto& t : threads) {
        t.join();
    }

    // // 将各个线程生成的文件合并
    // std::ofstream finalFile("test3.csv", std::ios::out | std::ios::binary);
    // finalFile << "id,name,max,min,avg\n";  // 写入表头

    // for (int i = 0; i < numThreads; ++i) {
    //     std::ifstream partFile("test3_" + std::to_string(i) + ".csv", std::ios::in | std::ios::binary);
    //     finalFile << partFile.rdbuf();  // 将子文件内容合并到最终文件中
    //     partFile.close();
    // }
    //finalFile.close();
    //std::cout << "All threads done and files merged" << std::endl;
    std::cout<<std::endl;
}



void compressFiles() {
    ElapseMillsec elap_zip("elap_zip");
    zipFile zf = zipOpen("test4.zip", APPEND_STATUS_CREATE);
    if (zf == nullptr) {
        std::cerr << "Error creating zip file!" << std::endl;
        return;
    }

    // 将 5 个 CSV 文件添加到压缩包中
    const char* files[] = {"test3_0.csv", "test3_1.csv", "test3_2.csv", "test3_3.csv", "test3_4.csv"};
    for (const char* file : files) {
        zip_fileinfo zfi = {};
        if (zipOpenNewFileInZip(zf, file, &zfi, nullptr, 0, nullptr, 0, nullptr, Z_DEFLATED, Z_DEFAULT_COMPRESSION) != ZIP_OK) {
            std::cerr << "Error adding file " << file << " to zip!" << std::endl;
            zipClose(zf, nullptr);
            return;
        }

        std::ifstream ifs(file, std::ios::binary);
        std::vector<char> buffer((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>());
        zipWriteInFileInZip(zf, buffer.data(), buffer.size());
        zipCloseFileInZip(zf);
    }

    zipClose(zf, nullptr);
    std::cout << "Files successfully compressed into test.zip" << std::endl;
}

int main(){
    test1();
    test2();
    test3();

    compressFiles();
    return 0;
}

 

posted @ 2024-10-14 23:08  He_LiangLiang  阅读(10)  评论(0编辑  收藏  举报