这两天在学下采集,使用querylist,使用起来很是简单php
采集博客园博客代码html
<?php namespace app\index\controller; use QL\QueryList; class Index { public function index() { $article=[]; for($i=1;$i<=13;$i++){ $ql = QueryList::get('https://www.cnblogs.com/jcydd/default.html?page='.$i); // 采集文章连接 $rt= $ql->find('.postTitle a')->attrs('href'); //halt($rt); foreach($rt as $k=>$v){ $ql2=QueryList::get($v); $article[$i.$k]['title']=$ql2->find('#cb_post_title_url')->text(); $article[$i.$k]['date']=$ql2->find('#post-date')->text(); $article[$i.$k]['date']=strtotime($article[$i.$k]['date']); $content=$ql2->find('#cnblogs_post_body')->html(); //获取文章图片连接 $img=$ql2->find('#topics img')->attrs('src'); //若是文章有图片,则替换图片本地地址 if(count($img)==0){ $article[$i.$k]['content']=$content; }else{ foreach($img as $vv){ $newimgurl=getimg($vv); $content=str_replace($vv,$newimgurl,$content); } $article[$i.$k]['content']=$content; } } } dump($article); } }