代码改变世界

MVC爬取网页指定内容到数据库

2019-03-13 20:28  .net小跟班(杜)  阅读(295)  评论(0编辑  收藏  举报

控制器

//获取并插入

//XPath获取
        public JsonResult Add(string url)
        {
            HtmlWeb web = new HtmlWeb();
            HtmlDocument document = web.Load(url);
            //创建html的节点
            HtmlNode node1 = document.DocumentNode;
            //获取需要的内容节点
            string jiedian = "//*[@id='761dfa3c-837a-6ba5-6b1b-9fa9afad498e']";
            //获取需要的内容
            HtmlNode node2 = node1.SelectSingleNode(jiedian);
            string con = node2.InnerText;
            //字符串替换
            string str1 = con.Replace("\r\n\t", "").Replace("\r\n\t", "").Replace("\r\n", "");

            //字符串截取
            string[] str2 = str1.Split('\t');
            StringBuilder builder = new StringBuilder();
            foreach (string item in str2 )
            {
                if (!string.IsNullOrEmpty(item.Trim()))
                {
                    builder.Append("insert into Files values('" + item + "')");
                }
            }
            string sql = builder.ToString();
            int i = db.ExeNonQuery(sql);
            return Json(i);
        }

//读取

public JsonResult GetList()
        {
            string sql = "select * from Files";
            DataTable dt = db.GetTable(sql);
            List<FilesViewModel> list = new List<FilesViewModel>();
            foreach (DataRow item in dt.Rows)
            {
                FilesViewModel files = new FilesViewModel();
                files.Name = item["Name"].ToString();
                list.Add(files);
            }
            return Json(list);
        }

//视图采用ajax获取

<div>
    <table>
        <tr>
            <td>Url:<input id="url" type="text" /><input id="Button1" type="button" value="开始爬取" onclick="paqu()" /></td>
        </tr>
        <tr>
            <td>内容</td>
        </tr>  
        <tbody id="content">

        </tbody>
    </table>
</div>
<script>

    function paqu() {
        var url = $("#url").val();
        $.ajax({
            url: "/Files/Add",
            type: "post",
            data: { url: url },
            success: function (data) {
                if (data > 0) {
                    alert("添加成功");
                    load();
                }
            }
        })
    }

    function load() {
        $.ajax({
            url: "/Files/GetList",
            type: "post",
            success: function (data) {
                $("#content").empty();
                $(data).each(function () {
                    var tr = "<tr><td>" + this.Name + "</td></tr>";
                    $("#content").append(tr);
                })
            }
        })
    }
</script>