欢迎您访问程序员文章站本站旨在为大家提供分享程序员计算机编程知识!
您现在的位置是: 首页  >  php教程

很好用的php rss解析类

程序员文章站 2022-06-01 09:28:28
...

rss

/**
* Rss Parse Class ver0.1
*
* @link http://www.ugia.cn/?p=42
* @author: legend (PASiOcn@msn.com)
* @version 0.1
*/

class RssParse {

var $encoding = "utf-8";
var $rssurl = "http://www.ugia.cn/wp-rss2.php";

var $resource = "";
var $tag = "";

var $insidechannel = false;
var $insideitem = false;
var $insideimage = false;

var $item = array();
var $channel = array();
var $image = "";

var $items = array();
var $images = array();

function rssReset()
{
$this->item = array();
$this->channel = array();
$this->images = "";
$this->items = array();
$this->images = array();
}

function getResource()
{
$fp = @fopen($this->rssurl, "rb");

if (is_resource($fp)) {

while($data = fread($fp, 4096)) {
$ipd .= $data;
}
$this->resource = $ipd;
@fclose($fp);

return true;
}

return false;
}

function getEncoding()
{
if (preg_match('| encoding="([^"]*)"|', $this->resource, $result))
{
$this->encoding = strtolower($result[1]);
}
else
{
$this->encoding = "utf-8";
}
}

function parseRss($rssurl = '')
{
if (!empty($rssurl))
{
$this->rssurl = $rssurl;
}

if (!$this->getResource())
{
return false;
}

$this->getEncoding();

if ($this->encoding != "utf-8")
{
$this->resource = iconv($this->encoding, "UTF-8", $this->resource);
}

$xml_parser = xml_parser_create("utf-8");

xml_parser_set_option($xml_parser, XML_OPTION_CASE_FOLDING, false);
xml_set_object($xml_parser, $this);
xml_set_element_handler($xml_parser, "startElement", "endElement");
xml_set_character_data_handler($xml_parser, "characterData");

xml_parse($xml_parser, $this->resource, true);
xml_parser_free($xml_parser);

if ( count($this->channel) > 1)
{
$this->channel['pubdate'] = $this->mystrtotime($this->channel['pubdate']);
if ($this->channel['pubdate'] {
$this->channel['pubdate'] = $this->items[0]['pubdate'];
}
}
return true;
}

function getAll()
{
return array(
'channel' => $this->channel,
'items' => $this->items,
'images' => $this->images
);
}

function getChannel()
{
return $this->channel;
}

function getItems()
{
return $this->items;
}

function getImages()
{
return $this->images;
}

function startElement($parser, $name, $attrs)
{
if ($this->insideitem || $this->insideimage || $this->insidechannel)
{
$this->tag = strtolower($name);
}

switch ($name)
{
case "channel" : $this->insidechannel = true; break;
case "item" : $this->insideitem = true; break;
case "image" : $this->insideimage = true; break;
}
}

function endElement($parser, $name)
{
if ($name == "channel")
{
$this->insidechannel = false;

}
else if ($name == "url")
{
$this->images[] = trim($this->image);
$this->insideimage = false;
$this->image = "";
}
else if ($name == "item")
{
$this->item['pubdate'] = $this->mystrtotime($this->item['pubdate']);
$this->item['description'] = trim(strip_tags($this->item['description']));
$this->item['description'] = str_replace(" ", "", $this->item['description']);

/**
if (strlen($this->item['description']) > 700)
{
$this->item['description'] = substr($this->item['description'], 0, 697) . "…";
}
*/

$this->items[] = $this->item;
$this->item = array();
$this->insideitem = false;
}
}

function characterData($parser, $data)
{
if ($this->insideitem)
{
switch ($this->tag)
{
case "title": $this->item['title'] .= $data; break;
case "description": $this->item['description'] .= $data; break;
case "link": $this->item['link'] .= $data; break;
case "dc:date": $this->item['pubdate'] .= $data; break;
case "pubdate": $this->item['pubdate'] .= $data; break;
case "modified": $this->item['pubdate'] .= $data; break;
}
}
elseif ($this->insideimage && $this->tag == "url")
{
$this->image .= $data;
}
elseif ($this->insidechannel)
{
switch ($this->tag)
{
case "title": $this->channel['title'] .= $data; break;
case "description": $this->channel['description'] .= $data; break;
case "link": $this->channel['link'] .= $data; break;
case "dc:date": $this->channel['pubdate'] .= $data; break;
case "pubdate": $this->channel['pubdate'] .= $data; break;
case "lastbuilddate": $this->channel['pubdate'] .= $data; break;
case "modified": $this->channel['pubdate'] .= $data; break;
}
}
}

/**
* 日期格式太多,除了php中的strtotime()函数能够转化的,我另外加了一个格式的识别,其他的未写。
*/
function mystrtotime($time)
{
$curtime = strtotime($time);
if ($curtme {
if (preg_match("|\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}\+\d{2}:\d{2}|", $time, $result))
{
$time = str_replace(array("T", "+"), array(" ", " +"), $time);
$time[23] = "";
}

// if (………

$curtime = strtotime($time);
}

return $curtime;
}

function getError($msg)
{
die($msg);
}
}
?>