| 订阅 | 在线投稿
分享
 
 
 

PHP采集程序中常用的函数

2008-12-19 08:07:55 编辑來源:互联网 国际版 评论
 
 
  //获得当前的脚本网址

  function get_php_url(){

   if(!empty($_SERVER["REQUEST_URI"])){

   $scriptName = $_SERVER["REQUEST_URI"];

   $nowurl = $scriptName;

   }else{

   $scriptName = $_SERVER["PHP_SELF"];

   if(empty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName;

   else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"];

   }

   return $nowurl;

  }

  //把全角数字转为半角数字

  function GetAlabNum($fnum){

   $nums = array("0","1","2","3","4","5","6","7","8","9");

   $fnums = "0123456789";

   for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum);

   $fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum);

   if($fnum=="") $fnum=0;

   return $fnum;

  }

  //去除HTML标记

  function Text2Html($txt){

   $txt = str_replace(" ","",$txt);

   $txt = str_replace("<","&lt;",$txt);

   $txt = str_replace(">","&gt;",$txt);

   $txt = preg_replace("/[\r\n]{1,}/isU","<br/>\r\n",$txt);

   return $txt;

  }

  //清除HTML标记

  function ClearHtml($str){

   $str = str_replace('<','&lt;',$str);

   $str = str_replace('>','&gt;',$str);

   return $str;

  }

  //相对路径转化成绝对路径

  function relative_to_absolute($content, $feed_url) {

   preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol);

   $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url);

   $server_url = preg_replace("/\/.*/", "", $server_url);

   if ($server_url == '') {

   return $content;

   }

   if (isset($protocol[0])) {

   $new_content = preg_replace('/href="\//', 'href="'.$protocol[0].$server_url.'/', $content);

   $new_content = preg_replace('/src="\//', 'src="'.$protocol[0].$server_url.'/', $new_content);

   } else {

   $new_content = $content;

   }

   return $new_content;

  }

  //取得所有链接

  function get_all_url($code){

   preg_match_all('/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr);

   return array('name'=>$arr[2],'url'=>$arr[1]);

  }

  //获取指定标记中的内容

  function get_tag_data($str, $start, $end){

   if ( $start == '' || $end == '' ){

   return;

   }

   $str = explode($start, $str);

   $str = explode($end, $str[1]);

   return $str[0];

  }

  //HTML表格的每行转为CSV格式数组

  function get_tr_array($table) {

   $table = preg_replace("'<td[^>]*?>'si",'"',$table);

   $table = str_replace("</td>",'",',$table);

   $table = str_replace("</tr>","{tr}",$table);

   //去掉 HTML 标记

   $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);

   //去掉空白字符

   $table = preg_replace("'([\r\n])[\s]+'","",$table);

   $table = str_replace(" ","",$table);

   $table = str_replace(" ","",$table);

   $table = explode(",{tr}",$table);

   array_pop($table);

   return $table;

  }

  //将HTML表格的每行每列转为数组,采集表格数据

  function get_td_array($table) {

   $table = preg_replace("'<table[^>]*?>'si","",$table);

   $table = preg_replace("'<tr[^>]*?>'si","",$table);

   $table = preg_replace("'<td[^>]*?>'si","",$table);

   $table = str_replace("</tr>","{tr}",$table);

   $table = str_replace("</td>","{td}",$table);

   //去掉 HTML 标记

   $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table);

   //去掉空白字符

   $table = preg_replace("'([\r\n])[\s]+'","",$table);

   $table = str_replace(" ","",$table);

   $table = str_replace(" ","",$table);

  

   $table = explode('{tr}', $table);

   array_pop($table);

   foreach ($table as $key=>$tr) {

   $td = explode('{td}', $tr);

   array_pop($td);

   $td_array[] = $td;

   }

   return $td_array;

  }

  //返回字符串中的所有单词 $distinct=true 去除重复

  function split_en_str($str,$distinct=true) {

   preg_match_all('/([a-zA-Z]+)/',$str,$match);

   if ($distinct == true) {

   $match[1] = array_unique($match[1]);

   }

   sort($match[1]);

   return $match[1];

  }
 
 
//获得当前的脚本网址 function get_php_url(){ if(!empty($_SERVER["REQUEST_URI"])){ $scriptName = $_SERVER["REQUEST_URI"]; $nowurl = $scriptName; }else{ $scriptName = $_SERVER["PHP_SELF"]; if(empty($_SERVER["QUERY_STRING"])) $nowurl = $scriptName; else $nowurl = $scriptName."?".$_SERVER["QUERY_STRING"]; } return $nowurl; } //把全角数字转为半角数字 function GetAlabNum($fnum){ $nums = array("0","1","2","3","4","5","6","7","8","9"); $fnums = "0123456789"; for($i=0;$i<=9;$i++) $fnum = str_replace($nums[$i],$fnums[$i],$fnum); $fnum = ereg_replace("[^0-9\.]|^0{1,}","",$fnum); if($fnum=="") $fnum=0; return $fnum; } //去除HTML标记 function Text2Html($txt){ $txt = str_replace(" "," ",$txt); $txt = str_replace("<","&lt;",$txt); $txt = str_replace(">","&gt;",$txt); $txt = preg_replace("/[\r\n]{1,}/isU","<br/>\r\n",$txt); return $txt; } //清除HTML标记 function ClearHtml($str){ $str = str_replace('<','&lt;',$str); $str = str_replace('>','&gt;',$str); return $str; } //相对路径转化成绝对路径 function relative_to_absolute($content, $feed_url) { preg_match('/(http|https|ftp):\/\//', $feed_url, $protocol); $server_url = preg_replace("/(http|https|ftp|news):\/\//", "", $feed_url); $server_url = preg_replace("/\/.*/", "", $server_url); if ($server_url == '') { return $content; } if (isset($protocol[0])) { $new_content = preg_replace('/href="\//', 'href="'.$protocol[0].$server_url.'/', $content); $new_content = preg_replace('/src="\//', 'src="'.$protocol[0].$server_url.'/', $new_content); } else { $new_content = $content; } return $new_content; } //取得所有链接 function get_all_url($code){ preg_match_all('/<a\s+href=["|\']?([^>"\' ]+)["|\']?\s*[^>]*>([^>]+)<\/a>/i',$code,$arr); return array('name'=>$arr[2],'url'=>$arr[1]); } //获取指定标记中的内容 function get_tag_data($str, $start, $end){ if ( $start == '' || $end == '' ){ return; } $str = explode($start, $str); $str = explode($end, $str[1]); return $str[0]; } //HTML表格的每行转为CSV格式数组 function get_tr_array($table) { $table = preg_replace("'<td[^>]*?>'si",'"',$table); $table = str_replace("</td>",'",',$table); $table = str_replace("</tr>","{tr}",$table); //去掉 HTML 标记 $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table); //去掉空白字符 $table = preg_replace("'([\r\n])[\s]+'","",$table); $table = str_replace(" ","",$table); $table = str_replace(" ","",$table); $table = explode(",{tr}",$table); array_pop($table); return $table; } //将HTML表格的每行每列转为数组,采集表格数据 function get_td_array($table) { $table = preg_replace("'<table[^>]*?>'si","",$table); $table = preg_replace("'<tr[^>]*?>'si","",$table); $table = preg_replace("'<td[^>]*?>'si","",$table); $table = str_replace("</tr>","{tr}",$table); $table = str_replace("</td>","{td}",$table); //去掉 HTML 标记 $table = preg_replace("'<[\/\!]*?[^<>]*?>'si","",$table); //去掉空白字符 $table = preg_replace("'([\r\n])[\s]+'","",$table); $table = str_replace(" ","",$table); $table = str_replace(" ","",$table); $table = explode('{tr}', $table); array_pop($table); foreach ($table as $key=>$tr) { $td = explode('{td}', $tr); array_pop($td); $td_array[] = $td; } return $td_array; } //返回字符串中的所有单词 $distinct=true 去除重复 function split_en_str($str,$distinct=true) { preg_match_all('/([a-zA-Z]+)/',$str,$match); if ($distinct == true) { $match[1] = array_unique($match[1]); } sort($match[1]); return $match[1]; }
󰈣󰈤
日版宠物情人插曲《Winding Road》歌词

日版宠物情人2017的插曲,很带节奏感,日语的,女生唱的。 最后听见是在第8集的时候女主手割伤了,然后男主用嘴帮她吸了一下,插曲就出来了。 歌手:Def...

兄弟共妻,我成了他们夜里的美食

老钟家的两个儿子很特别,就是跟其他的人不太一样,魔一般的执着。兄弟俩都到了要结婚的年龄了,不管自家老爹怎么磨破嘴皮子,兄弟俩说不娶就不娶,老父母为兄弟两操碎了心...

网络安全治理:国家安全保障的主要方向是打击犯罪,而不是处置和惩罚受害者

来源:中国青年报 新的攻击方法不断涌现,黑客几乎永远占据网络攻击的上风,我们不可能通过技术手段杜绝网络攻击。国家安全保障的主要方向是打击犯罪,而不是处置和惩罚...

 
 
 
>>返回首页<<
 为你推荐
 
 
 
 转载本文
 UBB代码 HTML代码
复制到剪贴板...
 
 
 热帖排行
 
纯美的她_仔婷
苏州河畔_秀气女生
痞子的甘南日记
疑是银河落九天
 
 
王朝网络微信公众号
微信扫码关注本站公众号wangchaonetcn
 
  免责声明:本文仅代表作者个人观点,与王朝网络无关。王朝网络登载此文出于传递更多信息之目的,并不意味着赞同其观点或证实其描述,其原创性以及文中陈述文字和内容未经本站证实,对本文以及其中全部或者部分内容、文字的真实性、完整性、及时性本站不作任何保证或承诺,请读者仅作参考,并请自行核实相关内容。
 
 
©2005- 王朝网络 版权所有