minizip-demo
http://www.winimage.com/zLibDll/minizip.html
minizip https://github.com/domoticz/minizip
zlib https://github.com/madler/zlib/archive/refs/tags/v1.3.1.zip
编译 zlib, minizip,然后用下面的工程包含 minizip的库
#include "elapse.h" #include <iostream> #include <vector> #include <fstream> #include <sstream> #include <string> #include <thread> #include <mutex> #include <minizip/zip.h> const int iMaxSize = 10000000; // 总数据量 const int numThreads = 5; // 线程数 const int iMaxChunkSize = 200000; // 如果数据量较大,每个文件写入数据超过 iMaxChunkSize,则平均写入数据到10个文件中; // 如果数据量不是很大,则 每个文件写入 iMaxChunkSize 条数据即可; int chunkSize = ((iMaxSize / numThreads)>iMaxChunkSize) ? (iMaxSize / numThreads) : iMaxChunkSize; // 每个线程处理的数据量 std::mutex fileMutex; // 文件写入锁,防止多个线程同时写入导致数据混乱 typedef struct{ int iID = 0; float fMaxTemp = 0.0f; float fMinTemp = 0.0f; float fAvgTemp = 0.0f; std::string strName=""; }tempDef; void test1(){ std::cout<<std::endl<<"==================== test1 ===================="<<std::endl; std::vector<tempDef> vecList; // add data to vector { ElapseMillsec elap_vecpush("elap_vecpush"); // 211ms vecList.reserve(iMaxSize*1.2); for(int i=0;i<iMaxSize;i++){ tempDef td; td.iID = i; td.fMaxTemp = i+0.1f; td.fMinTemp = i+0.1f; td.fAvgTemp = i+0.1f; td.strName = std::string("test_") + std::to_string(i); vecList.push_back(td); } } // save data to csv { ElapseMillsec elap_save("elap_save"); // 如果文件不存在则创建;如果存在则覆盖 std::ofstream ofs("test1.csv", std::ios::out); // 2546ms --> 100w data -->也和设备性能有关系 ofs << "id,name,max,min,avg\n"; for(auto& td : vecList){ ofs << td.iID << "," << td.strName << "," << td.fMaxTemp << "," << td.fMinTemp << "," << td.fAvgTemp << "\n"; } ofs.close(); std::cout << "save done" << std::endl; } std::cout<<std::endl; } void test2(){ std::cout<<std::endl<<"==================== test2 ===================="<<std::endl; std::vector<tempDef> vecList; // 添加数据到 vector { ElapseMillsec elap_vecpush("elap_vecpush2"); // 211ms vecList.reserve(iMaxSize * 1.2); for (int i = 0; i < iMaxSize; ++i) { vecList.emplace_back(tempDef{i, i + 0.1f, i + 0.1f, i + 0.1f, "test_" + std::to_string(i)}); } } // 将数据保存到 CSV { ElapseMillsec elap_save("elap_save2"); std::ofstream ofs("test2.csv", std::ios::out | std::ios::binary); // 使用二进制模式打开文件以提高性能 if (!ofs) { std::cerr << "Error opening file!" << std::endl; return; } ofs << "id,name,max,min,avg\n"; // 写入表头 std::ostringstream oss; for (const auto& td : vecList) { oss << td.iID << "," << td.strName << "," << td.fMaxTemp << "," << td.fMinTemp << "," << td.fAvgTemp << "\n"; } ofs.write(oss.str().c_str(), oss.str().size()); // 一次性写入所有数据到文件 ofs.close(); std::cout << "save done" << std::endl; } std::cout<<std::endl; } // =========== 使用多线程读写 ============ // 线程处理函数,负责将一部分数据写入文件 void processChunk(int threadID, const std::vector<tempDef>& vecList, int start, int end) { if(start >= end || start> (int)vecList.size()){ return; } std::ofstream ofs("test3_" + std::to_string(threadID) + ".csv", std::ios::out | std::ios::binary); if (!ofs) { std::cerr << "Error opening file for thread " << threadID << std::endl; return; } // 设置1MB缓冲区,减少I/O次数 const size_t buf_size = 1 << 20; char* buffer = new char[buf_size]; ofs.rdbuf()->pubsetbuf(buffer, buf_size); std::string row; row.reserve(64); row = "id,name,max,min,avg\n"; ofs.write(row.c_str(), row.size()); for (int i = start; i < end && i<(int)vecList.size(); ++i) { const auto& td = vecList[i]; row = std::to_string(td.iID) + "," + td.strName + "," + std::to_string(td.fMaxTemp) + "," + std::to_string(td.fMinTemp) + "," + std::to_string(td.fAvgTemp) + "\n"; ofs.write(row.c_str(), row.size()); } ofs.close(); delete[] buffer; std::cout << "Thread " << threadID << " done writing" << std::endl; } // 主函数,启动线程 void test3() { std::cout<<std::endl<<"==================== test3 ===================="<<std::endl; std::vector<tempDef> vecList; vecList.reserve(iMaxSize); // 数据初始化 { ElapseMillsec elap_vecpush("elap_vecpush3"); for (int i = 0; i < iMaxSize; ++i) { vecList.emplace_back(tempDef{i, i + 0.1f, i + 0.1f, i + 0.1f, "test_" + std::to_string(i)}); } } ElapseMillsec elap_save("elap_save3"); // 启动多个线程,每个线程处理 10 万条数据 std::vector<std::thread> threads; for (int i = 0; i < numThreads; ++i) { int start = i * chunkSize; int end = (i + 1) * chunkSize; threads.emplace_back(processChunk, i, std::ref(vecList), start, end); } // 等待所有线程完成 for (auto& t : threads) { t.join(); } // // 将各个线程生成的文件合并 // std::ofstream finalFile("test3.csv", std::ios::out | std::ios::binary); // finalFile << "id,name,max,min,avg\n"; // 写入表头 // for (int i = 0; i < numThreads; ++i) { // std::ifstream partFile("test3_" + std::to_string(i) + ".csv", std::ios::in | std::ios::binary); // finalFile << partFile.rdbuf(); // 将子文件内容合并到最终文件中 // partFile.close(); // } //finalFile.close(); //std::cout << "All threads done and files merged" << std::endl; std::cout<<std::endl; } void compressFiles() { ElapseMillsec elap_zip("elap_zip"); zipFile zf = zipOpen("test4.zip", APPEND_STATUS_CREATE); if (zf == nullptr) { std::cerr << "Error creating zip file!" << std::endl; return; } // 将 5 个 CSV 文件添加到压缩包中 const char* files[] = {"test3_0.csv", "test3_1.csv", "test3_2.csv", "test3_3.csv", "test3_4.csv"}; for (const char* file : files) { zip_fileinfo zfi = {}; if (zipOpenNewFileInZip(zf, file, &zfi, nullptr, 0, nullptr, 0, nullptr, Z_DEFLATED, Z_DEFAULT_COMPRESSION) != ZIP_OK) { std::cerr << "Error adding file " << file << " to zip!" << std::endl; zipClose(zf, nullptr); return; } std::ifstream ifs(file, std::ios::binary); std::vector<char> buffer((std::istreambuf_iterator<char>(ifs)), std::istreambuf_iterator<char>()); zipWriteInFileInZip(zf, buffer.data(), buffer.size()); zipCloseFileInZip(zf); } zipClose(zf, nullptr); std::cout << "Files successfully compressed into test.zip" << std::endl; } int main(){ test1(); test2(); test3(); compressFiles(); return 0; }