学了一天,成果如下:
用来抓取网页中的图片链接,要学的东西很多啊!
<?php
class spider {
private $_url = "";
private $_sites = "";
function spider($url) {
$this->_url = $url;
}
function start() {
if(file_get_contents($this->_url)==false)
{echo "error";echo '<a href="index.php">back</a>';die();}else{
$content = file_get_contents($this->_url);
$this->_sites["links"] = $this->getLinks($content);}
}
function getLinks($content){
$pat = '/<a(.*?)href="(.*?)"(.*?)><img(.*?)src="(.*?)"(.*?)><\/a>/s';
preg_match_all($pat, $content, $m);
return $m;
}
function filterLinks(){
$realLinks = ""; //获取链接地址
$pic = ""; //获取图片地址
$reallinks = $this->_sites["links"][2];
$pic = $this->_sites["links"][5];
foreach($reallinks as $v){
if($v!="#"){
$realLinks[] = $v;
}
}
echo "<div><center>";
foreach($realLinks as $i => $v){
$b = $i % 4;
if($b == 0){echo "<br>";}
echo "<a href='".$realLinks[$i]."' target=_blank><img src='".$pic[$i]."' border=0></a>";
}
echo '<a href="index.php">back</a>';
echo "</center></div>";
}
}
function ac($lin) {
$spider = new spider("$lin");
$spider->start();
$spider->filterLinks();
}
if (isset($_REQUEST['url'])){
$link=$_REQUEST['url'];
ac($link);
}else{
echo "<center><form method='post' action='index.php'>url:<input name='url' type='text' value='http://'><input type='submit' value='Submit'>
</form></center>";
ac("http://images.google.cn/imgcat?hl=zh-CN&sa=h&catid=566");
}
?>
class spider {
private $_url = "";
private $_sites = "";
function spider($url) {
$this->_url = $url;
}
function start() {
if(file_get_contents($this->_url)==false)
{echo "error";echo '<a href="index.php">back</a>';die();}else{
$content = file_get_contents($this->_url);
$this->_sites["links"] = $this->getLinks($content);}
}
function getLinks($content){
$pat = '/<a(.*?)href="(.*?)"(.*?)><img(.*?)src="(.*?)"(.*?)><\/a>/s';
preg_match_all($pat, $content, $m);
return $m;
}
function filterLinks(){
$realLinks = ""; //获取链接地址
$pic = ""; //获取图片地址
$reallinks = $this->_sites["links"][2];
$pic = $this->_sites["links"][5];
foreach($reallinks as $v){
if($v!="#"){
$realLinks[] = $v;
}
}
echo "<div><center>";
foreach($realLinks as $i => $v){
$b = $i % 4;
if($b == 0){echo "<br>";}
echo "<a href='".$realLinks[$i]."' target=_blank><img src='".$pic[$i]."' border=0></a>";
}
echo '<a href="index.php">back</a>';
echo "</center></div>";
}
}
function ac($lin) {
$spider = new spider("$lin");
$spider->start();
$spider->filterLinks();
}
if (isset($_REQUEST['url'])){
$link=$_REQUEST['url'];
ac($link);
}else{
echo "<center><form method='post' action='index.php'>url:<input name='url' type='text' value='http://'><input type='submit' value='Submit'>
</form></center>";
ac("http://images.google.cn/imgcat?hl=zh-CN&sa=h&catid=566");
}
?>
用来抓取网页中的图片链接,要学的东西很多啊!

