欢迎来到福编程网,本站提供各种互联网专业知识!

发布一个用PHP fsockopen写的HTTP下载的类

发布时间:2007-02-22 作者: 来源:转载
如果支持打开远程内容的选项的话,实际上php用fopen或file_get_contents都能获得一个网页的内容,但是默认的函数有个不足的地方就是无法获取HTTP头,这在一些特殊的应用中很不方便,如,有一个链接:http://www.abc.com/showvd.asp?id=18假如它返回的是一个图
如果支持打开远程内容的选项的话,实际上php用fopen或file_get_contents都能获得一个网页的内容,但是默认的函数有个不足的地方就是无法获取HTTP头,这在一些特殊的应用中很不方便,如,有一个链接:

http://www.abc.com/showvd.asp?id=18

假如它返回的是一个图片,用默认函数就很难识别,但如果通过HTTP应答头来判断就简单多了,此外如果对方通过Refer来防盗链的话,也是无法获取的,用HTTP类就能完美解决这些问题,而且速度也相差无几。


使用方法:

$hd=newDedeHttpDown();
$hd->OpenUrl("http://www.dedecms.com");
echo$hd->GetHtml();
//如果保存为文件则用$hd->SaveBin("dede.html");
$hd->Close();

获得http请求头用
$hd->GetHead("key")
设置请求头
$hd->SetHead(key,value);(必须在调用OpenUrl之前设定)


代码如下:

/*---------------------------------------------------------------------
//织梦Http下载类V1.0
//出自:织梦之旅http://www.dedecms.com
//作者:IT柏拉图
//时间:2005-11-1312:39
//声明:首发在落伍者网站,转载请保留版权信息
---------------------------------------------------------------------*/
classDedeHttpDown
{
var$m_url="";
var$m_urlpath="";
var$m_scheme="http";
var$m_host="";
var$m_port="80";
var$m_user="";
var$m_pass="";
var$m_path="/";
var$m_query="";
var$m_fp="";
var$m_error="";
var$m_httphead="";
var$m_html="";
var$m_puthead="";
var$BaseUrlPath="";
var$HomeUrl="";
var$JumpCount=0;//防止多重重定向陷入死循环
//
//初始化系统
//
functionPrivateInit($url)
{
if($url=="")return;
$urls="";
$urls=@parse_url($url);
$this->m_url=$url;
if(is_array($urls))
{
$this->m_host=$urls["host"];
if(!empty($urls["scheme"]))$this->m_scheme=$urls["scheme"];

if(!empty($urls["user"])){
$this->m_user=$urls["user"];
}

if(!empty($urls["pass"])){
$this->m_pass=$urls["pass"];
}

if(!empty($urls["port"])){
$this->m_port=$urls["port"];
}

if(!empty($urls["path"]))$this->m_path=$urls["path"];
$this->m_urlpath=$this->m_path;

if(!empty($urls["query"])){
$this->m_query=$urls["query"];
$this->m_urlpath.="?".$this->m_query;
}
$this->HomeUrl=$urls["host"];
$this->BaseUrlPath=$this->HomeUrl.$urls["path"];
$this->BaseUrlPath=ereg_replace("/([^/]*).(.*)$","/",$this->BaseUrlPath);
$this->BaseUrlPath=ereg_replace("/$","",$this->BaseUrlPath);
}
}
//
//打开指定网址
//
functionOpenUrl($url)
{
//重设各参数
$this->m_url="";
$this->m_urlpath="";
$this->m_scheme="http";
$this->m_host="";
$this->m_port="80";
$this->m_user="";
$this->m_pass="";
$this->m_path="/";
$this->m_query="";
$this->m_error="";
$this->JumpCount=0;
$this->m_httphead=Array();
//$this->m_puthead="";
$this->m_html="";
$this->Close();
//初始化系统
$this->PrivateInit($url);
$this->PrivateStartSession();
}
//
//打开303重定向网址
//
functionJumpOpenUrl($url)
{
//重设各参数
$this->m_url="";
$this->m_urlpath="";
$this->m_scheme="http";
$this->m_host="";
$this->m_port="80";
$this->m_user="";
$this->m_pass="";
$this->m_path="/";
$this->m_query="";
$this->m_error="";
$this->JumpCount++;
$this->m_httphead=Array();
$this->m_html="";
$this->Close();
//初始化系统
$this->PrivateInit($url);
$this->PrivateStartSession();
}
//
//获得某操作错误的原因
//
functionprintError()
{
echo"错误信息:".$this->m_error;
echo"具体返回头:

";
foreach($this->m_httpheadas$k=>$v)
{echo"$k=>$v

rn";}
}
//
//判别用Get方法发送的头的应答结果是否正确
//
functionIsGetOK()
{
if(ereg("^2",$this->GetHead("http-state")))
{returntrue;}
else
{
$this->m_error.=$this->GetHead("http-state")."-".$this->GetHead("http-describe")."

";
returnfalse;
}
}
//
//看看返回的网页是否是text类型
//
functionIsText()
{
if(ereg("^2",$this->GetHead("http-state"))
&&eregi("^text",$this->GetHead("content-type")))
{returntrue;}
else
{
$this->m_error.="内容为非文本类型或网址重定向

";
returnfalse;
}
}
//
//判断返回的网页是否是特定的类型
//
functionIsContentType($ctype)
{
if(ereg("^2",$this->GetHead("http-state"))
&&$this->GetHead("content-type")==strtolower($ctype))
{returntrue;}
else
{
$this->m_error.="类型不对".$this->GetHead("content-type")."

";
returnfalse;
}
}
//
//用Http协议下载文件
//
functionSaveToBin($savefilename)
{
if(!$this->IsGetOK())returnfalse;
if(@feof($this->m_fp)){$this->m_error="连接已经关闭!";returnfalse;}
$fp=fopen($savefilename,"w");
while(!feof($this->m_fp)){
fwrite($fp,fread($this->m_fp,1024));
}
fclose($this->m_fp);

fclose($fp);
returntrue;
}
//
//保存网页内容为Text文件
//
functionSaveToText($savefilename)
{
if($this->IsText())$this->SaveBinFile($savefilename);
elsereturn"";
}
//
//用Http协议获得一个网页的内容
//
functionGetHtml()
{
if(!$this->IsText())return"";
if($this->m_html!="")return$this->m_html;
if(!$this->m_fp||@feof($this->m_fp))return"";
while(!feof($this->m_fp)){
$this->m_html.=fgets($this->m_fp,256);
}
@fclose($this->m_fp);
return$this->m_html;
}
//
//开始HTTP会话
//
functionPrivateStartSession()
{

if(!$this->PrivateOpenHost()){
$this->m_error.="打开远程主机出错!";
returnfalse;
}

if($this->GetHead("http-edition")=="HTTP/1.1")$httpv="HTTP/1.1";
else$httpv="HTTP/1.0";

//发送固定的起始请求头GET、Host信息
fputs($this->m_fp,"GET".$this->m_urlpath."$httpvrn");
$this->m_puthead["Host"]=$this->m_host;

//发送用户自定义的请求头
if(!isset($this->m_puthead["Accept"])){$this->m_puthead["Accept"]="*/*";}
if(!isset($this->m_puthead["User-Agent"])){$this->m_puthead["User-Agent"]="Mozilla/4.0(compatible;MSIE6.0;WindowsNT5.2)";}
if(!isset($this->m_puthead["Refer"])){$this->m_puthead["Refer"]="http://".$this->m_puthead["Host"];}
foreach($this->m_putheadas$k=>$v){
$k=trim($k);
$v=trim($v);
if($k!=""&&$v!=""){
fputs($this->m_fp,"$k:$vrn");
}
}

//发送固定的结束请求头
//HTTP1.1协议必须指定文档结束后关闭链接,否则读取文档时无法使用feof判断结束
if($httpv=="HTTP/1.1")fputs($this->m_fp,"Connection:Closernrn");
elsefputs($this->m_fp,"rn");

//获取应答头状态信息
$httpstas=explode("",fgets($this->m_fp,256));
$this->m_httphead["http-edition"]=trim($httpstas[0]);
$this->m_httphead["http-state"]=trim($httpstas[1]);
$this->m_httphead["http-describe"]="";
for($i=2;$i<count($httpstas);$i++){
$this->m_httphead["http-describe"].="".trim($httpstas[$i]);
}
//获取详细应答头
while(!feof($this->m_fp)){
$line=trim(fgets($this->m_fp,256));
if($line=="")break;
$hkey="";
$hvalue="";
$v=0;
for($i=0;$iif($v==1)$hvalue.=$line[$i];
if($line[$i]==":")$v=1;
if($v==0)$hkey.=$line[$i];
}
$hkey=trim($hkey);
if($hkey!="")$this->m_httphead[strtolower($hkey)]=trim($hvalue);
}
//判断是否是3xx开头的应答
if(ereg("^3",$this->m_httphead["http-state"]))
{
if($this->JumpCount>3)return;
if(isset($this->m_httphead["location"])){
$newurl=$this->m_httphead["location"];
if(eregi("^http",$newurl)){
$this->JumpOpenUrl($newurl);
}
else{
$newurl=$this->FillUrl($newurl);
$this->JumpOpenUrl($newurl);
}
}
else
{$this->m_error="无法识别的转移应答!";}
}//
}
//
//获得一个Http头的值
//
functionGetHead($headname)
{
$headname=strtolower($headname);
if(isset($this->m_httphead[$headname]))
return$this->m_httphead[$headname];
else
return"";
}
//
//设置Http头的值
//
functionSetHead($skey,$svalue)
{
$this->m_puthead[$skey]=$svalue;
}
//
//打开连接
//
functionPrivateOpenHost()
{
if($this->m_host=="")returnfalse;
$this->m_fp=@fsockopen($this->m_host,$this->m_port,&$errno,&$errstr,10);
if(!$this->m_fp){
$this->m_error=$errstr;
returnfalse;
}
else{
returntrue;
}
}
//
//关闭连接
//
functionClose(){
@fclose($this->m_fp);
}
//
//补全相对网址
//
functionFillUrl($surl)
{
$i=0;
$dstr="";
$pstr="";
$okurl="";
$pathStep=0;
$surl=trim($surl);
if($surl=="")return"";
$pos=strpos($surl,"#");
if($pos>0)$surl=substr($surl,0,$pos);
if($surl[0]=="/"){
$okurl="http://".$this->HomeUrl."/".$surl;
}
elseif($surl[0]==".")
{
if(strlen($surl)<=2)return"";
elseif($surl[0]=="/")
{
$okurl="http://".$this->BaseUrlPath."/".substr($surl,2,strlen($surl)-2);
}
else{
$urls=explode("/",$surl);
foreach($urlsas$u){
if($u=="..")$pathStep++;
elseif($i<count($urls)-1)$dstr.=$urls[$i]."/";
else$dstr.=$urls[$i];
$i++;
}
$urls=explode("/",$this->BaseUrlPath);
if(count($urls)<=$pathStep)
return"";
else{
$pstr="http://";
for($i=0;$i<count($urls)-$pathStep;$i++)
{$pstr.=$urls[$i]."/";}
$okurl=$pstr.$dstr;
}
}
}
else
{
if(strlen($surl)<7)
$okurl="http://".$this->BaseUrlPath."/".$surl;
elseif(strtolower(substr($surl,0,7))=="http://")
$okurl=$surl;
else
$okurl="http://".$this->BaseUrlPath."/".$surl;
}
$okurl=eregi_replace("^(http://)","",$okurl);
$okurl=eregi_replace("/{1,}","/",$okurl);
return"http://".$okurl;
}
}
?>

相关推荐