TP5.1 爬虫

插件合集

点击跳转

环境

PHP >= 7.1 !!!
PHP >= 7.1 !!!
PHP >= 7.1 !!!

安装

composer require jaeger/querylist

后端

// 爬虫
    public function crawler()
    {
        if(request()->post()){
            $url = $this->param['url'];
            $range = $this->param['range'];
            $pageS = $this->param['pageS'];
            $pageE = $this->param['pageE'];
            $rules = [];
            foreach($this->param['content'] as $v){ if(!empty($v['field']) && !empty($v['tag']) && !empty($v['val'])) $rules[$v['field']] = [$v['tag'], $v['val']]; }

            // 循环从开始到结束每页数据
            $addVals = [];
            for($i = $pageS; $i <= $pageE; $i++){
                $urls = str_replace('#', $i, $url);
                $ql = QueryList::rules($rules)->range($range);
                $data = $ql->get($urls)->queryData();
                foreach($data as $v){ $addVals[] = $v; }
                $ql->destruct();
            }

            // 爬取详情页
            foreach($addVals as $k => $v){
                $info = $this->param['info'];
                $info_data = QueryList::get(trim($v['source']))->rules([ $info['field'] => [$info['tag'], $info['val']] ])->queryData();
                $addVals[$k][$info['field']] = $info_data[$info['field']];

                // 这里写入库

            }
            QueryList::destructDocuments();
        }
        return $this->fetch();
    }

前端

<{include file="public/header" /}>
</head>

<body>
<div class="layui-fluid">
    <div class="layui-row">
        <form class="layui-form" method="post">

            <div class="layui-form-item ">
                <label class="layui-form-label">地址:</label>
                <div class="layui-input-block">
                    <input type="text" id="url" name="url" autocomplete="off" class="layui-input" placeholder="url链接,分页用#表示,如:www.baidu-page#.com">
                </div>
            </div>

            <div class="layui-form-item">
                <label class="layui-form-label">开始页数:</label>
                <div class="layui-input-block">
                    <input type="text" id="pageS" name="pageS" autocomplete="off" class="layui-input" placeholder="开始页数(数字)">
                </div>
            </div>

            <div class="layui-form-item">
                <label class="layui-form-label">结束页数:</label>
                <div class="layui-input-block">
                    <input type="text" id="pageE" name="pageE" autocomplete="off" class="layui-input" placeholder="结束页数(数字)">
                </div>
            </div>

            <div class="layui-form-item">
                <label class="layui-form-label">列表断点:</label>
                <div class="layui-input-block">
                    <input type="text" id="range" name="range" autocomplete="off" class="layui-input" placeholder="支持标签、class,如:span、.class、#id">
                </div>
            </div>

            <div class="layui-form-item">
                <label class="layui-form-label">详情地址:</label>
                <div class="layui-input-block">
                    <input type="text" id="content[info][field]" name="content[info][field]" autocomplete="off" class="layui-input layui-disabled" value="source" placeholder="字段名">
                    <input type="text" id="content[info][tag]" name="content[info][tag]" autocomplete="off" class="layui-input" placeholder="支持标签、class,如:span、.class">
                    <input type="text" id="content[info][val]" name="content[info][val]" autocomplete="off" class="layui-input" placeholder="属性,如:href、src、text、html">
                </div>
            </div>

            <div class="layui-form-item">
                <label class="layui-form-label">内容:</label>
                <div class="layui-input-block">
                    <input type="text" id="content[0][field]" name="content[0][field]" autocomplete="off" class="layui-input" placeholder="字段名">
                    <input type="text" id="content[0][tag]" name="content[0][tag]" autocomplete="off" class="layui-input" placeholder="支持标签、class,如:span、.class">
                    <input type="text" id="content[0][val]" name="content[0][val]" autocomplete="off" class="layui-input" placeholder="属性,如:href、src、text、html">
                </div>
            </div>

            <div class="layui-form-item">
                <div class="layui-input-block">
                    <button class="layui-btn" type="button" id="addC">
                        ===================================================================================================
                        添加内容模块
                        ===================================================================================================
                    </button>
                </div>
            </div>

            <div class="layui-form-item">
                <label class="layui-form-label">内容详情:</label>
                <div class="layui-input-block">
                    <input type="text" id="info[field]" name="info[field]" autocomplete="off" class="layui-input" placeholder="字段名">
                    <input type="text" id="info[tag]" name="info[tag]" autocomplete="off" class="layui-input" placeholder="支持标签、class,如:span、.class">
                    <input type="text" id="info[val]" name="info[val]" autocomplete="off" class="layui-input" placeholder="属性,如:href、src、text、html">
                </div>
            </div>

            <div class="layui-form-item">
                <label for="L_repass" class="layui-form-label"></label>
                <button class="layui-btn" lay-submit="" lay-filter="crawler">爬取</button>
            </div>

        </form>
    </div>
</div>

<!--包含footer文件-->
<{include file="public/footer" /}>

<!--请在下方写此页面业务相关的脚本-->
<script type="text/javascript">
    let index_c = 2;

    layui.use(['form','layer'], function() {
        $ = layui.jquery;
        var form = layui.form
            , layer = layui.layer;

        //监听提交
        form.on('submit(crawler)', function (data) {
            $.ajax({
                type: 'POST',
                url: "<{:url('crawler')}>",
                data: data.field,
                dataType: 'json',
                success: function (res) {
                    layer.msg(res['msg'], {
                        icon: 1, time: 1000,
                    });
                }
            });
            return false;
        });

    })

    $(function(){
        addC()
    });

    function addC(){

        $("#addC").click(function(){
            var str = `
                <div class="layui-form-item">
                    <label class="layui-form-label">内容:</label>
                    <div class="layui-input-block">
                        <input type="text" id="content[`+ index_c +`][field]" name="content[`+ index_c +`][field]" autocomplete="off" class="layui-input" placeholder="字段名">
                        <input type="text" id="content[`+ index_c +`][tag]" name="content[`+ index_c +`][tag]" autocomplete="off" class="layui-input" placeholder="支持标签、class,如:span、.class">
                        <input type="text" id="content[`+ index_c +`][val]" name="content[`+ index_c +`][val]" autocomplete="off" class="layui-input" placeholder="属性,如:href、src、text、html">
                    </div>
                </div>
            `;
            $(this).parent().parent().before(str);
            index_c++
        })
    }


</script>
<!--/请在上方写此页面业务相关的脚本-->
</body>
</html>

页面展示

 

posted @ 2020-12-21 13:52  张永峰z  阅读(185)  评论(0编辑  收藏  举报