快乐坚果

博客园 首页 新随笔 联系 订阅 管理

利用goquery读取html中指定ID的数据保存至二维数组中。

代码如下:

 1 package main
 2 
 3 import (
 4     "fmt"
 5     "log"
 6     "strings"
 7     "io"
 8 
 9     "github.com/PuerkitoBio/goquery"
10 )
11 
12 func main() {
13     html := `<html>
14     <body>
15         <table id="mydata">
16             <tr id="one">
17                 <td>one</td>
18                 <td>two</td>
19                 <td>three</td>
20             </tr>
21             <tr id="two">
22                 <td>3</td>
23                 <td>4</td>
24                 <td>5</td>
25             </tr>
26             <tr id="three">
27                 <td>八</td>
28                 <td>九</td>
29                 <td>十</td>
30             </tr>
31             <tr id="four">
32                 <td>百</td>
33                 <td>千</td>
34                 <td>万</td>
35             </tr>
36             <tr id="five">
37                 <td>100</td>
38                 <td>200</td>
39                 <td>300</td>
40             </tr>
41         </table>
42         <table id="ayy">
43             <tr>
44                 <td>ookk</td>
45                 <td>aabb</td>
46             </tr>
47             <tr>
48                 <td>ccdd</td>
49                 <td>eeff</td>
50             </tr>
51             <tr>
52                 <td>1122</td>
53                 <td>3344</td>
54             </tr>
55         </table>
56     </body>
57     </html>
58     `
59 
60     var table [][]string = GetHtmlTableData(strings.NewReader(html), "#mydata")
61 
62     for trkey,tr := range table {
63         for tdkey, _ := range tr{
64             fmt.Printf("%10s\t|", table[trkey][tdkey])
65             if len(tr) == (tdkey+1) {
66                 fmt.Println("\n","------------------------------------------------")
67             }
68         }
69     }
70     table = GetHtmlTableData(strings.NewReader(html), "#ayy")
71     fmt.Println(table)
72 }
73 
74 func GetHtmlTableData(html io.Reader, tableId string) (table [][]string) {
75     var tableCache [][]string
76 
77     dom,err:=goquery.NewDocumentFromReader(html)
78     if err != nil{
79         log.Fatalln(err)
80     }
81 
82     trNum := dom.Find("table"+tableId).Find("tr").Length()
83     tdNum := dom.Find("table"+tableId).Find("tr").Find("td").Length()
84 
85     tableCache = make([][]string, trNum, trNum)
86     for i:=0; i<trNum; i++{
87         tableCache[i] = make([]string, tdNum/trNum, tdNum/trNum)
88         for j:=0; j<(tdNum/trNum); j++ {
89             tableCache[i][j] = dom.Find(tableId).Find("tr").Find("td").Eq(i*(tdNum/trNum)+j).Text()
90         }
91     }
92 
93     return tableCache
94 }

 

输出结果如下:

posted on 2020-06-15 12:56  merrynuts  阅读(1578)  评论(1编辑  收藏  举报