从html页面中抽取table表格数据
/** * [getDataFromTrElems 获取表格行元素数据] * @param {[Object]} trElems [trs dom] * @param {[String]} type [type] * @return {[Array]} */ function getDataFromTrElems(trElems, type) { if (!trElems || !trElems.length) { return []; } var data = []; var cell = type == 'head' ? 'th' : 'td'; for (var i = 0, n = trElems.length; i < n; i++) { var trElem = $(trElems[i]); var cellElems = trElem.children(cell); var trData = []; for (var j = 0, m = cellElems.length; j < m; j++) { cellElem = $(cellElems[j]); colspan = +cellElem.attr('colspan'); if (!(colspan > 0)) { colspan = 1; } rowspan = +cellElem.attr('rowspan'); if (!(rowspan > 0)) { rowspan = 1; } text = $.trim(cellElem.text()); trData.push({ colspan: colspan, rowspan: rowspan, text: text }); } data.push(trData); } return data; } /** * [getDataFromHead 获取表格head数据] * @param {[Object]} elem [head dom] * @return {[Array]} */ function getDataFromHead(elem) { if (!elem || !elem.length) { return []; } var trElems = elem.children('tr'); return getDataFromTrElems(trElems, 'head'); } /** * [getDataFromBody 获取表格body数据] * @param {[Object]} elem [body dom] * @return {[Array]} */ function getDataFromBody(elem) { if (!elem || !elem.length) { return []; } var trElems = elem.children('tr'); return getDataFromTrElems(trElems, 'body'); } /** * [getDataFromTbaleElem 获取表格元素数据] * @param {[Object]} elem [table dom] * @return {[Array]} */ function getDataFromTbaleElem(elem) { if (!elem || !elem.length) { return []; } var headElem = elem.children('thead'); var bodyElem = elem.children('tbody'); var headData = getDataFromHead(headElem); var bodyData = getDataFromBody(bodyElem); var rawData = headData.concat(bodyData); return formRawData(rawData); } /** * [formRawData 格式化表格初始数据] * @param {[Array]} rawData [raw data] * @return {[Array]} */ function formRawData(rawData) { var data = []; rawData.forEach(function(trData, i) { data[i] = data[i] && data[i].length ? data[i] : []; var colIndex = -1; trData.forEach(function(cellData, j) { var text = cellData.text; for (var c = 0; c < cellData.colspan; c++) { ++colIndex; if (typeof data[i][colIndex] != 'undefined') { ++colIndex; } data[i][colIndex] = text; for (var r = 2; r <= cellData.rowspan; r++) { var rowIndex = i + r - 1; rawData[rowIndex].splice(colIndex, 0, { text: text, colspan: 1, rowspan: 1 }); } } }); }); return data; } $(document).ready(function() { var start = new Date(); var tableElems = $('table'); var results = {}; for (var index = 0, len = tableElems.length; index < len; index++) { var tableElem = $(tableElems[index]); results[index] = getDataFromTbaleElem(tableElem); } var end = new Date(); console.log(results, end - start); });
数据冰冷的,但我们要让数据温暖起来,改变我们的生活!
【推荐】国内首个AI IDE,深度理解中文开发场景,立即下载体验Trae
【推荐】编程新体验,更懂你的AI,立即体验豆包MarsCode编程助手
【推荐】抖音旗下AI助手豆包,你的智能百科全书,全免费不限次数
【推荐】轻量又高性能的 SSH 工具 IShell:AI 加持,快人一步
· AI与.NET技术实操系列:向量存储与相似性搜索在 .NET 中的实现
· 基于Microsoft.Extensions.AI核心库实现RAG应用
· Linux系列:如何用heaptrack跟踪.NET程序的非托管内存泄露
· 开发者必知的日志记录最佳实践
· SQL Server 2025 AI相关能力初探
· winform 绘制太阳,地球,月球 运作规律
· 震惊!C++程序真的从main开始吗?99%的程序员都答错了
· AI与.NET技术实操系列(五):向量存储与相似性搜索在 .NET 中的实现
· 超详细:普通电脑也行Windows部署deepseek R1训练数据并当服务器共享给他人
· 【硬核科普】Trae如何「偷看」你的代码?零基础破解AI编程运行原理