php 论坛采集程序 模拟登陆,抓取页面 实现代码
php 论坛采集程序 模拟登陆,抓取页面 实现代码
发布时间:2016-12-29 来源:查字典编辑
摘要:复制代码代码如下:

复制代码 代码如下:

<?php

// 吴燕军

// 2009-06-27

// 采集程序php

set_time_limit(0);

//cookie保存目录

$cookie_jar = '/tmp/cookie.tmp';

/*函数------------------------------------------------------------------------------------------------------------*/

//模拟请求数据

function request($url,$postfields,$cookie_jar,$referer){

$ch = curl_init();

$options = array(CURLOPT_URL => $url,

CURLOPT_HEADER => 0,

CURLOPT_NOBODY => 0,

CURLOPT_PORT => 80,

CURLOPT_POST => 1,

CURLOPT_POSTFIELDS => $postfields,

CURLOPT_RETURNTRANSFER => 1,

CURLOPT_FOLLOWLOCATION => 1,

CURLOPT_COOKIEJAR => $cookie_jar,

CURLOPT_COOKIEFILE => $cookie_jar,

CURLOPT_REFERER => $referer

);

curl_setopt_array($ch, $options);

$code = curl_exec($ch);

curl_close($ch);

return $code;

}

//获取帖子列表

function getThreadsList($code){

preg_match_all('/ <-------------------------------------------------------------------------------------------------*/

/*登录论坛 begin*/

$url = 'http://bbs.war3.cn/logging.php?action=login';

$postfields='loginfield=username&username=1nject10n& password=xxxxxx&questionid=0&cookietime=315360000& referer=http://bbs.war3.cn/&loginsubmit=提交';

request($url,$postfields,$cookie_jar,'');

unset($postfields,$url);

/*登录论坛 end*/

/*获取帖子列表(位于第一页的帖子) begin*/

$url = 'http://bbs.war3.cn/forumdisplay.php?fid=57';

$code = request($url,'',$cookie_jar,'');

$threadsList = getThreadsList($code);

/*获取帖子列表 end*/

//帖子序列

$rows = 0;

/*循环抓取所有帖子源代码 begin*/

foreach($threadsList as $list){

$url = "http://bbs.war3.cn/viewthread.php?tid=$list";

if(isExits($code)){

$code = request($url,'',$cookie_jar,'');

$color = $rows%2==0?'#00CCFF':'#FFFF33';

echo " <div>";

echo " <h1>第",($rows+1),"贴: </h1> <br/>";

$author = getAuthor($code);

printAuthor($author);

$title = getTitle($code);

printTitle($title);

$contents = getContents($code);

printContents($contents);

echo " </div>";

$rows++;

}

else

printError();

echo "----------------------------------------------------------------------------------------- <br/> <br/>";

}

/*抓取源代码 end*/

?>

推荐文章
猜你喜欢
附近的人在看
推荐阅读
拓展阅读
相关阅读
网友关注
最新php教程学习
热门php教程学习
编程开发子分类