通过curl和正则获取页面数据

 1 <?php
 2 /**
 3  * Created by PhpStorm.
 4  * User: 意灵魔法馆
 5  * Date: 2016/5/25 0025
 6  * Time: 上午 1:22
 7  */
 8 
 9 namespace back\controller;
10 
11 //导入类
12 use framewrok\core\Controller;
13 use framewrok\tool\RequestClient;
14 use framewrok\core\Factory;
15 
16 class QuestionController extends Controller
17 {
18     public function zhihuAction()
19     {
20         //实例化请求类
21         $t_client = new RequestClient();
22         //url地址
23         $url = 'www.myweb.com/zhihu/zhihu.html';
24         $t_client->url = $url;
25         //通过调用get方法得到结果
26         $response = $t_client->get();
27 
28         //通过正则处理将需要的数据获取到
29         $pattern = <<<RE
30 #<a[^>]*?class="js-title-link"[^>]*?>(.*?)</a>.*?<script type="text" class="content">(.*?)</script>#s
31 RE;
32         //匹配
33         preg_match_all($pattern, $response, $value_list);
34         //第一子模式就是全部的问题
35         $question_list = $value_list[1];
36 //        echo'<pre>';
37 //        var_dump($question_list);
38         //第二个子模式就是全部的回答
39         $anwser_list = $value_list[2];
40 
41         //将内容添加到数据表中
42         $question_model = Factory::getModel('question');
43         $anwser_model = Factory::getModel('anwser');
44         $counter = 0;
45         //每一问题都对应多个回答
46         foreach ($question_list as $key => $question) {
47 
48             $question_data['question_content'] =str_replace(['<em>','</em>'],'',$question);
49             $question_data['category_id'] = 1;
50             $question_data['add_time'] = time();
51             //计数器方便知道自己添加的
52             if (false !== $question_id = $question_model->insertValue($question_data)) {
53                 //问题插入成功
54                 $anwser_data['question_id'] = $question_id;
55                 $anwser_data['anwser_content'] = $anwser_list[$key];
56                 $anwser_data['add_time'] = time();
57 
58                 //将答案放入数据库中
59                 $anwser_model->insertValue($anwser_data);
60 
61                 ++$counter;
62             }
63         }
64         $this->jumpWait('index.php?m=back&c=Category&a=list', '创建了' . $counter . '条数据');
65 
66 
67     }
68 
69 }

 

posted @ 2016-05-26 22:54  意灵魔法馆  阅读(841)  评论(0编辑  收藏  举报