Stangray
SAP THQ Apple Nintendo Google Reynolds Amazon Yahoo

        public static DataSet GetPrivateItemList(HtmlDocument htmlDocument)
        {
            DataSet ds = new DataSet();
            HtmlElementCollection TableCollection = htmlDocument.GetElementsByTagName("TABLE");
            int tableIndex = 1;
            foreach (HtmlElement tableElement in TableCollection)
            {              
                HtmlElementCollection TrCollection = tableElement.GetElementsByTagName("TR");              
                int rows = TrCollection.Count;
                int columns = 0;
                foreach (HtmlElement trElement in TrCollection)
                {
                    HtmlElementCollection TdCollection = trElement.GetElementsByTagName("TD");
                    int maxColumns = TdCollection.Count;
                    //最大列数
                    if (maxColumns > columns)
                    {
                        columns = maxColumns;
                    }
                }

                //创建一张空的映射表
                DataTable memDataTable = CreateRelationTable(columns, rows,tableIndex);

                int rowIndex = 0;

                foreach (HtmlElement trElement in TrCollection)
                {
                    HtmlElementCollection TdCollection = trElement.GetElementsByTagName("TD");
                    int tmpColumnIndex = 0;
                    foreach (HtmlElement td in TdCollection)
                    {
                         int tmpRowIndex = rowIndex;
                        int rowspan = int.Parse(td.GetAttribute("ROWSPAN"));
                        int colspan = int.Parse(td.GetAttribute("COLSPAN"));

                        if (memDataTable.Rows[tmpRowIndex][tmpColumnIndex].ToString() == string.Empty)
                        {
                           
                        }
                        else
                        {
                            for (int j = 0; j < memDataTable.Columns.Count; j++)
                            {
                                if (memDataTable.Rows[tmpRowIndex][j].ToString() == string.Empty)
                                {
                                    tmpColumnIndex = j;
                                    break;
                                }
                            }
                        }

                        int doubleRowIndex = tmpRowIndex;
                        for (int i = 1; i <= rowspan; i++)
                        {
                            //处理跨行
                            memDataTable.Rows[tmpRowIndex][tmpColumnIndex] = td.InnerText;
                            tmpRowIndex++;
                        }

                       
                        for (int j = 2; j <= colspan; j++)
                        {
                            tmpColumnIndex++;
                            for (int i = 2; i <= rowspan; i++)
                            {
                                //处理跨行
                                if (doubleRowIndex >= memDataTable.Rows.Count -1)
                                {
                                    break;
                                }
                                memDataTable.Rows[doubleRowIndex + 1][tmpColumnIndex] = " ";
                                doubleRowIndex++;
                            }
                        }

                        tmpColumnIndex++;
                    }
                    rowIndex++;
                }


                //去除空列功能
                for (int i = memDataTable.Columns.Count - 1; i >= 0; i--)
                {
                    bool candelete = true;
                    for (int j = 0; j < memDataTable.Rows.Count; j++)
                    {
                        if (memDataTable.Rows[j][i].ToString().Trim().Length > 0)
                        {
                            candelete = false;
                            break;
                        }
                    }

                    if (candelete)
                    {
                        memDataTable.Columns.RemoveAt(i);
                    }
                }

                //去除空行
                for (int i = memDataTable.Rows.Count - 1; i >= 0; i--)
                {
                    bool candelete = true;
                    for (int j = 0; j < memDataTable.Columns.Count; j++)
                    {
                        if (memDataTable.Rows[i][j].ToString().Trim().Length > 0)
                        {
                            candelete = false;
                            break;
                        }
                    }

                    if (candelete)
                    {
                        memDataTable.Rows.RemoveAt(i);
                    }
                }

                ds.Tables.Add(memDataTable);

                tableIndex++;
            }

            return ds;
        }

posted on 2011-05-24 12:30  Stangray  阅读(1531)  评论(0编辑  收藏  举报