网站首页> 文章专栏> php如何使用curl模拟百度蜘蛛进行采集?
php如何使用curl模拟百度蜘蛛进行采集?
编辑时间:2018-05-09 09:06:48 作者:皮皮赖 浏览:1431 评论:0
使用curl模拟百度蜘蛛进行采集源码如下
<?php
// 关闭PHP报错
error_reporting( E_ALL^E_NOTICE^E_WARNING );

//实现使用curl模拟百度蜘蛛进行采集
class Curlcontent{

    protected function _GetContent( $url )
    {

        $this->ch = curl_init();
        //随机生成IP
        $this->ip = rand(0,255).'.'.rand(0,255).'.'.rand(0,255).'.'.rand(0,255) ; // 百度蜘蛛
        $this->timeout = 15;
        curl_setopt($this->ch,CURLOPT_URL,$url);
        curl_setopt($this->ch,CURLOPT_TIMEOUT,0);
        //伪造百度蜘蛛IP
        curl_setopt($this->ch,CURLOPT_HTTPHEADER,array('X-FORWARDED-FOR:'.$this->ip.'','CLIENT-IP:'.$this->ip.''));
        //伪造百度蜘蛛头部
        curl_setopt($this->ch,CURLOPT_USERAGENT,"Mozilla/5.0 (compatible; Baiduspider/2.0; +http://www.baidu.com/search/spider.html)");
        curl_setopt($this->ch,CURLOPT_RETURNTRANSFER,1);
        curl_setopt($this->ch,CURLOPT_HEADER,0);
        curl_setopt($this->ch,CURLOPT_CONNECTTIMEOUT,$this->timeout);
        curl_setopt($this->ch,CURLOPT_SSL_VERIFYPEER,false);
        $content = curl_exec($this->ch);

    }
    public  function getcurl($url){
        $this->_GetContent($url);
    }
}

//判断url是否存在  存在进行访问
if ($_GET['url']) {
    $api = $_GET['url'];
    $Curlcontent = new Curlcontent();
    $data = $Curlcontent->getcurl($api);
    $mydata = ['msg'=>200,'val'=>'提交成功','num'=>1];
    exit(json_encode($mydata, JSON_UNESCAPED_UNICODE));
}else{
    $mydata = ['msg'=>404,'val'=>'提交失败','cause'=>'URL推广链接不能为空'];
    exit(json_encode($mydata, JSON_UNESCAPED_UNICODE));
}

参考:https://www.52bz.la/4067.html 

来说两句吧
最新评论