¡¡¡¡²É¼¯ÒѾ²»ÊÇʲôÐÂÃû´ÊÁË
£¬ºÜ¶àÕ¾³¤ÎªÁËÊ¡ÊÂ
£¬Ò²¾ÖÏÞÓÚÈËÁ¦µÄȱ·¦£¬Ê¹ÓóÌÐòÀ´¸ø×Ô¼ºµÄÍøÕ¾Ìíש¼ÓÍߣ¬±ÈÈç±¾È˵ĸöÈËÍøÕ¾Ò²²É¼¯ÁË´óÁ¿µÄÐÂÎÅ£¬ÄÇôÈç¹ûʵÏÖÄØ£¿½ñÌìÎÒÃÇÔËÓÃphpÀ´ÊµÏÖÕâ¸ö¹¦ÄÜ
¡£
̸µ½²É¼¯£¬ÎÒÃDz»µÃ²»ËµÁ½¸ö¶«Î÷£¬µÚÒ»¸öÊÇÈçºÎ»ñÈ¡Ô¶³ÌÍøÕ¾µÄÔ´´úÂ룬Õâ¸ö¿ÉÒÔͨ¹ýphpµÄÒ»¸öÀ©Õ¹curlÀ´»ñÈ
¡£¬ÁíÒ»¸öÊÇÈç¹ûȥƥÅäÄãÐèÒªµÄÐÅÏ¢£¬Õâ¸öµÄ½â¾ö°ì·¨ÊÇÕýÔò±í´ïʽ¡£
WindowsÏ¿ªÆôcurlµÄ·½·¨ÈçÏ£º
1¡¢¿½±´
PHPĿ¼ÖеÄlibeay32.dll£¬ssleay32.dll£¬php5ts.dll£¬php_curl.dllÎļþµ½system32Ŀ¼¡£
2¡¢ÐÞ¸Äphp.ini£ºÅäÖúÃextension_dir£¬È¥µôextension=php_curl.dllÇ°ÃæµÄ·ÖºÅ¡£
3¡¢ÖØÆðapache¡£
LinuxÏ¿ªÆôcurlµÄ·½·¨ÈçÏ£º
½øÈë°²×°ÔphpµÄÔ´ÂëĿ¼£¬
cdext
cdcurl
phpize
./configure--with-curl=DIR
make
¾Í»áÔÚ
PHPDIR/ext/curl/moudles/ÏÂÉú³Écurl.soµÄÎļþ¡£
¸´ÖÆcurl.soÎļþµ½extensionsµÄÅäÖÃĿ¼£¬ÐÞ¸Äphp.ini¾ÍºÃÁË¡£
È»ºóÄã¾Í¿ÉÒÔÀûÓÃcurlÀ´»ñÈ¡µ½Ö¸¶¨urlµÄÍøÒ³Ô´ÂëÁË£¬ÕâÀï¸ø´ó¼ÒÒ»¸ö·â×°ºÃµÄº¯Êý£º
ÒÔÏÂΪÒýÓõÄÄÚÈÝ£º
functiongetwebcontent($url){
$ch=curl_init();
$timeout=10;
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,1);
$contents=trim(curl_exec($ch));
curl_close($ch);
return$contents;
}
½ÓÏÂÀ´¾ÍÓ¦¸Ã˵µ½phpÖеÄÕýÔò±í´ïʽÁË£º
1.ÖÐÀ¨ºÅ
£Û0-9£ÝÆ¥Åä0-9
£Ûa-z£ÝÆ¥Åäa-zСд×Öĸ
£ÛA-Z£ÝÆ¥ÅäA-Z´óд×Öĸ
£Ûa-zA-Z£ÝÆ¥ÅäËùÓдóСд×Öĸ
¿ÉÒÔʹÓÃasciiÀ´Öƶ¨¸ü¶à
2.Á¿´Ê
ÒÔÏÂΪÒýÓõÄÄÚÈÝ£º
p+Æ¥ÅäÖÁÉÙÒ»¸öº¬pµÄ×Ö·û´®
p*ÅãÅãÈκΰüº¬0¸ö»ò¶à¸öpµÄ×Ö·û´®
p?Æ¥ÅäÈκΰüº¬0¸ö»òÒ»¸öpµÄ×Ö·û´®
p{2}Æ¥Åä°üº¬2¸öpµÄÐòÁеÄ×Ö·û´®
p{2,3}Æ¥ÅäÈκΰüº¬2¸ö»ò3¸öµÄ×Ö·û´®
p$Æ¥ÅäÈκÎÒÔp½áβµÄ×Ö·û´®
^pÆ¥ÅäÈκÎÒÔp¿ªÍ·µÄ×Ö·û´®
[^a-zA-Z]Æ¥ÅäÈκβ»°üº¬a-zA-ZµÄ×Ö·û´®
p.pÆ¥ÅäÈκΰüº¬p¡¢½ÓÏÂÀ´ÊÇÈκÎ×Ö·û¡¢ÔÙ½ÓÏÂÀ´ÓÐÓÖÊÇpµÄ×Ö·û´®
^.{2}$Æ¥ÅäÈκÎÖµ°üº¬2¸ö×Ö·ûµÄ×Ö·û´®
<b>(.*)b>Æ¥ÅäÈκα»<b>>°üΧµÄ×Ö·û´®
p(hp)*Æ¥ÅäÈκÎÒ»¸ö°üº¬p,ºóÃæÊǶà¸ö»ò0¸öhpµÄ×Ö·û´®
3.Ô¤¶¨Òå×Ö·û·¶Î§
ÒÔÏÂΪÒýÓõÄÄÚÈÝ£º
[:alpha:]ͬ[a-zA-Z]
[:alnum:]ͬ[a-zA-Z0-9]
[:cntrl:]Æ¥Åä¿ØÖÆ×Ö·û£¬±ÈÈçÖƱí·û£¬·´Ð±¸Ü£¬Í˸ñ·û
[:digit:]ͬ[0-9]
[:graph:]ËùÓÐASCII33~166·¶Î§ÄÚ¿ÉÒÔ´òÓ¡µÄ×Ö·û
[:lower:]ͬ[a-z]
[:punct:]±êµã·ûºÅ
[:upper:]ͬ[A-Z]
[:space:]¿Õ°××Ö·û£¬¿ÉÒÔÊÇ¿Õ¸ñ¡¢Ë®Æ½ÖƱí·û¡¢»»ÐС¢»»Ò³¡¢»Ø³µ
[:xdigit:]Ê®Áù½øÖÆ·ûͬ[a-fA-F0-9]
·Ï»°²»¶à˵£¬Ö±½ÓÉÏÎÒµÄÔ´Âë°É£¬ÓÐʲô²»¶®µÄ¿ÉÒÔÉϰٶȲé²é¡£
ÒÔÏÂΪÒýÓõÄÄÚÈÝ£º
<?php
header("Content-type:text/html;charset=utf-8");
getinfo("
http://rss.*******.com.cn/rollnews/news/gn_total.js",1);
getinfo("
http://rss.*******.com.cn/rollnews/news/gj_total.js",2);
getinfo("
http://rss.*******.com.cn/rollnews/news/sh_total.js",3);
getinfo("
http://rss.*******.com.cn/rollnews/sports/sports_total.js",4);
getinfo("
http://rss.*******.com.cn/rollnews/tech/tech1_total.js",5);
getinfo("
http://rss.*******.com.cn/rollnews/finance/finance1_news_total.js",6);
getinfo("
http://rss.*******.com.cn/rollnews/ent/ent_total.js",7);
getinfo("
http://rss.*******.com.cn/rollnews/jczs/jczs_total.js",8);
functiongetinfo($infourl,$catid)
{
$pagecontent=getwebcontent($infourl);
preg_match_all("/title:"(.*?)"/",$pagecontent,$match);
$titlearr=$match[1];
preg_match_all("/link:"(.*?)"/",$pagecontent,$match);
$urlarr=$match[1];
for($i=1;$i<count($urlarr);$i++){
echo"go{$titlearr[$i-1]} ";
$title=iconv("gbk","utf-8",$titlearr[$i-1]);
$content=iconv("gbk","utf-8",getnewscontent($urlarr[$i]));
$content=
mysql_escape_string($content);
if(!insertdb($title,$content,$catid))break;
}
}
functioninsertdb($title,$content,$catid){
½«Êý¾ÝдÈëÄãµÄ¿â
}
functiongetnewscontent($newsurl){
$newscontent=getwebcontent($newsurl);
preg_match_all("/<divclass="blkContainerSblkCon"id="artibody">([sS]*?)<!--publish_helper_end-->/",$newscontent,$match);
$content=preg_replace("/<a.*?</a>/si","",$match[1][0]);
$content=preg_replace("/<divstyle="overflow:hidden;zoom:1;"class="otherContent_01">.*?</div>/si","",$content);
$content=preg_replace("/<divclass="blk-video">.*?<divclass="clearcl"></div>/si","",$content);
$content=str_replace("<divstyle="clear:both;height:0;visibility:hiddden;overflow:hidden;"></div>","",$content);
return$content;
}
functiongetwebcontent($url){
$ch=curl_init();
$timeout=10;
curl_setopt($ch,CURLOPT_URL,$url);
curl_setopt($ch,CURLOPT_RETURNTRANSFER,1);
curl_setopt($ch,CURLOPT_CONNECTTIMEOUT,$timeout);
curl_setopt($ch,CURLOPT_FOLLOWLOCATION,1);
$contents=trim(curl_exec($ch));
curl_close($ch);
return$contents;
}
?>