1. 简单的爬虫php
1. xampp开启php的cURL -> xampp/php/php.ini 搜索curl去掉分号便可html
2. cmd D: -> cd xampp -> cd phpweb
php -f ../htdocs/mirror/curl/curl.php > ../htdocs/mirror/curl/haha.html (将爬下来的数据保存在haha.html中)cookie
// ../htdocs/mirror/curl/curl.php <?php $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://www.baidu.com"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); $output = curl_exec($ch); //将输出文件中百度替换成屌丝 echo str_replace("百度", "屌丝", $output); ?>
2. 访问WebServicesession
http://www.webxml.com.cn/WebServices/WeatherWebService.asmx?op=getWeatherbyCityNameapp
<?php $data = 'theCityName=武汉'; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://www.webxml.com.cn/WebServices/WeatherWebService.asmx/getWeatherbyCityName"); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_USERAGENT, 1); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $data); curl_setopt($ch, CURLOPT_HTTPHEADER, array("application/x-www-form-urlencoded; charset=utf-8", "Content-length:".strlen($data) )); $rtn = curl_exec($ch); if(!curl_errno($ch)) echo $rtn; else echo "Curl error ".curl_error($ch); curl_close($ch); ?>
3. 模拟登录后下载登录以后的信息curl
<?php $data = "username=252973202@qq.com&password=这个密码就不写出来啦&remember=1"; $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, "http://www.imooc.com/user/login"); curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1); //执行以后不直接打印出来 //模拟登录开始 date_default_timezone_set('PRC'); //设置Cookie时区(有过时时间) //模板 大多数都是如此 curl_setopt($ch, CURLOPT_COOKIESESSION, true); curl_setopt($ch, CURLOPT_COOKIEFILE, "cookiefile"); curl_setopt($ch, CURLOPT_COOKIEJAR, "cookiefile"); curl_setopt($ch, CURLOPT_COOKIE, "session_name()"."=".session_id()); curl_setopt($ch, CURLOPT_HEADER, 0); curl_setopt($ch, CURLOPT_USERAGENT, 1); curl_setopt($ch, CURLOPT_FOLLOWLOCATION, 1); //cURL支持页面连接跳转 curl_setopt($ch, CURLOPT_POST, 1); curl_setopt($ch, CURLOPT_POSTFIELDS, $data); curl_setopt($ch, CURLOPT_HTTPHEADER, array("application/x-www-form-urlencoded; charset=utf-8", "Content-length:".strlen($data) )); $rtn = curl_exec($ch); //模拟登录结束 //登录成功后获取登录以后的信息 curl_setopt($ch, CURLOPT_URL, "http://www.imooc.com/space/index"); curl_setopt($ch, CURLOPT_POST, 0); curl_setopt($ch, CURLOPT_HTTPHEADER, array("Content-type:text/xml")); $output = curl_exec($ch); curl_close($ch); echo $output; ?>