Файл: wordpress/wp-includes/SimplePie/Decode/HTML/Entities.php
Строк: 265
<?php
/**
* SimplePie
*
* A PHP-Based RSS and Atom Feed Framework.
* Takes the hard work out of managing a complete RSS/Atom solution.
*
* Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are
* permitted provided that the following conditions are met:
*
* * Redistributions of source code must retain the above copyright notice, this list of
* conditions and the following disclaimer.
*
* * Redistributions in binary form must reproduce the above copyright notice, this list
* of conditions and the following disclaimer in the documentation and/or other materials
* provided with the distribution.
*
* * Neither the name of the SimplePie Team nor the names of its contributors may be used
* to endorse or promote products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
*
* @package SimplePie
* @version 1.3.1
* @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
* @author Ryan Parman
* @author Geoffrey Sneddon
* @author Ryan McCue
* @link http://simplepie.org/ SimplePie
* @license http://www.opensource.org/licenses/bsd-license.php BSD License
*/
/**
* Decode HTML Entities
*
* This implements HTML5 as of revision 967 (2007-06-28)
*
* @deprecated Use DOMDocument instead!
* @package SimplePie
*/
class SimplePie_Decode_HTML_Entities
{
/**
* Data to be parsed
*
* @access private
* @var string
*/
var $data = '';
/**
* Currently consumed bytes
*
* @access private
* @var string
*/
var $consumed = '';
/**
* Position of the current byte being parsed
*
* @access private
* @var int
*/
var $position = 0;
/**
* Create an instance of the class with the input data
*
* @access public
* @param string $data Input data
*/
public function __construct($data)
{
$this->data = $data;
}
/**
* Parse the input data
*
* @access public
* @return string Output data
*/
public function parse()
{
while (($this->position = strpos($this->data, '&', $this->position)) !== false)
{
$this->consume();
$this->entity();
$this->consumed = '';
}
return $this->data;
}
/**
* Consume the next byte
*
* @access private
* @return mixed The next byte, or false, if there is no more data
*/
public function consume()
{
if (isset($this->data[$this->position]))
{
$this->consumed .= $this->data[$this->position];
return $this->data[$this->position++];
}
else
{
return false;
}
}
/**
* Consume a range of characters
*
* @access private
* @param string $chars Characters to consume
* @return mixed A series of characters that match the range, or false
*/
public function consume_range($chars)
{
if ($len = strspn($this->data, $chars, $this->position))
{
$data = substr($this->data, $this->position, $len);
$this->consumed .= $data;
$this->position += $len;
return $data;
}
else
{
return false;
}
}
/**
* Unconsume one byte
*
* @access private
*/
public function unconsume()
{
$this->consumed = substr($this->consumed, 0, -1);
$this->position--;
}
/**
* Decode an entity
*
* @access private
*/
public function entity()
{
switch ($this->consume())
{
case "x09":
case "x0A":
case "x0B":
case "x0B":
case "x0C":
case "x20":
case "x3C":
case "x26":
case false:
break;
case "x23":
switch ($this->consume())
{
case "x78":
case "x58":
$range = '0123456789ABCDEFabcdef';
$hex = true;
break;
default:
$range = '0123456789';
$hex = false;
$this->unconsume();
break;
}
if ($codepoint = $this->consume_range($range))
{
static $windows_1252_specials = array(0x0D => "x0A", 0x80 => "xE2x82xAC", 0x81 => "xEFxBFxBD", 0x82 => "xE2x80x9A", 0x83 => "xC6x92", 0x84 => "xE2x80x9E", 0x85 => "xE2x80xA6", 0x86 => "xE2x80xA0", 0x87 => "xE2x80xA1", 0x88 => "xCBx86", 0x89 => "xE2x80xB0", 0x8A => "xC5xA0", 0x8B => "xE2x80xB9", 0x8C => "xC5x92", 0x8D => "xEFxBFxBD", 0x8E => "xC5xBD", 0x8F => "xEFxBFxBD", 0x90 => "xEFxBFxBD", 0x91 => "xE2x80x98", 0x92 => "xE2x80x99", 0x93 => "xE2x80x9C", 0x94 => "xE2x80x9D", 0x95 => "xE2x80xA2", 0x96 => "xE2x80x93", 0x97 => "xE2x80x94", 0x98 => "xCBx9C", 0x99 => "xE2x84xA2", 0x9A => "xC5xA1", 0x9B => "xE2x80xBA", 0x9C => "xC5x93", 0x9D => "xEFxBFxBD", 0x9E => "xC5xBE", 0x9F => "xC5xB8");
if ($hex)
{
$codepoint = hexdec($codepoint);
}
else
{
$codepoint = intval($codepoint);
}
if (isset($windows_1252_specials[$codepoint]))
{
$replacement = $windows_1252_specials[$codepoint];
}
else
{
$replacement = SimplePie_Misc::codepoint_to_utf8($codepoint);
}
if (!in_array($this->consume(), array(';', false), true))
{
$this->unconsume();
}
$consumed_length = strlen($this->consumed);
$this->data = substr_replace($this->data, $replacement, $this->position - $consumed_length, $consumed_length);
$this->position += strlen($replacement) - $consumed_length;
}
break;
default:
static $entities = array(
'Aacute' => "xC3x81",
'aacute' => "xC3xA1",
'Aacute;' => "xC3x81",
'aacute;' => "xC3xA1",
'Acirc' => "xC3x82",
'acirc' => "xC3xA2",
'Acirc;' => "xC3x82",
'acirc;' => "xC3xA2",
'acute' => "xC2xB4",
'acute;' => "xC2xB4",
'AElig' => "xC3x86",
'aelig' => "xC3xA6",
'AElig;' => "xC3x86",
'aelig;' => "xC3xA6",
'Agrave' => "xC3x80",
'agrave' => "xC3xA0",
'Agrave;' => "xC3x80",
'agrave;' => "xC3xA0",
'alefsym;' => "xE2x84xB5",
'Alpha;' => "xCEx91",
'alpha;' => "xCExB1",
'AMP' => "x26",
'amp' => "x26",
'AMP;' => "x26",
'amp;' => "x26",
'and;' => "xE2x88xA7",
'ang;' => "xE2x88xA0",
'apos;' => "x27",
'Aring' => "xC3x85",
'aring' => "xC3xA5",
'Aring;' => "xC3x85",
'aring;' => "xC3xA5",
'asymp;' => "xE2x89x88",
'Atilde' => "xC3x83",
'atilde' => "xC3xA3",
'Atilde;' => "xC3x83",
'atilde;' => "xC3xA3",
'Auml' => "xC3x84",
'auml' => "xC3xA4",
'Auml;' => "xC3x84",
'auml;' => "xC3xA4",
'bdquo;' => "xE2x80x9E",
'Beta;' => "xCEx92",
'beta;' => "xCExB2",
'brvbar' => "xC2xA6",
'brvbar;' => "xC2xA6",
'bull;' => "xE2x80xA2",
'cap;' => "xE2x88xA9",
'Ccedil' => "xC3x87",
'ccedil' => "xC3xA7",
'Ccedil;' => "xC3x87",
'ccedil;' => "xC3xA7",
'cedil' => "xC2xB8",
'cedil;' => "xC2xB8",
'cent' => "xC2xA2",
'cent;' => "xC2xA2",
'Chi;' => "xCExA7",
'chi;' => "xCFx87",
'circ;' => "xCBx86",
'clubs;' => "xE2x99xA3",
'cong;' => "xE2x89x85",
'COPY' => "xC2xA9",
'copy' => "xC2xA9",
'COPY;' => "xC2xA9",
'copy;' => "xC2xA9",
'crarr;' => "xE2x86xB5",
'cup;' => "xE2x88xAA",
'curren' => "xC2xA4",
'curren;' => "xC2xA4",
'Dagger;' => "xE2x80xA1",
'dagger;' => "xE2x80xA0",
'dArr;' => "xE2x87x93",
'darr;' => "xE2x86x93",
'deg' => "xC2xB0",
'deg;' => "xC2xB0",
'Delta;' => "xCEx94",
'delta;' => "xCExB4",
'diams;' => "xE2x99xA6",
'divide' => "xC3xB7",
'divide;' => "xC3xB7",
'Eacute' => "xC3x89",
'eacute' => "xC3xA9",
'Eacute;' => "xC3x89",
'eacute;' => "xC3xA9",
'Ecirc' => "xC3x8A",
'ecirc' => "xC3xAA",
'Ecirc;' => "xC3x8A",
'ecirc;' => "xC3xAA",
'Egrave' => "xC3x88",
'egrave' => "xC3xA8",
'Egrave;' => "xC3x88",
'egrave;' => "xC3xA8",
'empty;' => "xE2x88x85",
'emsp;' => "xE2x80x83",
'ensp;' => "xE2x80x82",
'Epsilon;' => "xCEx95",
'epsilon;' => "xCExB5",
'equiv;' => "xE2x89xA1",
'Eta;' => "xCEx97",
'eta;' => "xCExB7",
'ETH' => "xC3x90",
'eth' => "xC3xB0",
'ETH;' => "xC3x90",
'eth;' => "xC3xB0",
'Euml' => "xC3x8B",
'euml' => "xC3xAB",
'Euml;' => "xC3x8B",
'euml;' => "xC3xAB",
'euro;' => "xE2x82xAC",
'exist;' => "xE2x88x83",
'fnof;' => "xC6x92",
'forall;' => "xE2x88x80",
'frac12' => "xC2xBD",
'frac12;' => "xC2xBD",
'frac14' => "xC2xBC",
'frac14;' => "xC2xBC",
'frac34' => "xC2xBE",
'frac34;' => "xC2xBE",
'frasl;' => "xE2x81x84",
'Gamma;' => "xCEx93",
'gamma;' => "xCExB3",
'ge;' => "xE2x89xA5",
'GT' => "x3E",
'gt' => "x3E",
'GT;' => "x3E",
'gt;' => "x3E",
'hArr;' => "xE2x87x94",
'harr;' => "xE2x86x94",
'hearts;' => "xE2x99xA5",
'hellip;' => "xE2x80xA6",
'Iacute' => "xC3x8D",
'iacute' => "xC3xAD",
'Iacute;' => "xC3x8D",
'iacute;' => "xC3xAD",
'Icirc' => "xC3x8E",
'icirc' => "xC3xAE",
'Icirc;' => "xC3x8E",
'icirc;' => "xC3xAE",
'iexcl' => "xC2xA1",
'iexcl;' => "xC2xA1",
'Igrave' => "xC3x8C",
'igrave' => "xC3xAC",
'Igrave;' => "xC3x8C",
'igrave;' => "xC3xAC",
'image;' => "xE2x84x91",
'infin;' => "xE2x88x9E",
'int;' => "xE2x88xAB",
'Iota;' => "xCEx99",
'iota;' => "xCExB9",
'iquest' => "xC2xBF",
'iquest;' => "xC2xBF",
'isin;' => "xE2x88x88",
'Iuml' => "xC3x8F",
'iuml' => "xC3xAF",
'Iuml;' => "xC3x8F",
'iuml;' => "xC3xAF",
'Kappa;' => "xCEx9A",
'kappa;' => "xCExBA",
'Lambda;' => "xCEx9B",
'lambda;' => "xCExBB",
'lang;' => "xE3x80x88",
'laquo' => "xC2xAB",
'laquo;' => "xC2xAB",
'lArr;' => "xE2x87x90",
'larr;' => "xE2x86x90",
'lceil;' => "xE2x8Cx88",
'ldquo;' => "xE2x80x9C",
'le;' => "xE2x89xA4",
'lfloor;' => "xE2x8Cx8A",
'lowast;' => "xE2x88x97",
'loz;' => "xE2x97x8A",
'lrm;' => "xE2x80x8E",
'lsaquo;' => "xE2x80xB9",
'lsquo;' => "xE2x80x98",
'LT' => "x3C",
'lt' => "x3C",
'LT;' => "x3C",
'lt;' => "x3C",
'macr' => "xC2xAF",
'macr;' => "xC2xAF",
'mdash;' => "xE2x80x94",
'micro' => "xC2xB5",
'micro;' => "xC2xB5",
'middot' => "xC2xB7",
'middot;' => "xC2xB7",
'minus;' => "xE2x88x92",
'Mu;' => "xCEx9C",
'mu;' => "xCExBC",
'nabla;' => "xE2x88x87",
'nbsp' => "xC2xA0",
'nbsp;' => "xC2xA0",
'ndash;' => "xE2x80x93",
'ne;' => "xE2x89xA0",
'ni;' => "xE2x88x8B",
'not' => "xC2xAC",
'not;' => "xC2xAC",
'notin;' => "xE2x88x89",
'nsub;' => "xE2x8Ax84",
'Ntilde' => "xC3x91",
'ntilde' => "xC3xB1",
'Ntilde;' => "xC3x91",
'ntilde;' => "xC3xB1",
'Nu;' => "xCEx9D",
'nu;' => "xCExBD",
'Oacute' => "xC3x93",
'oacute' => "xC3xB3",
'Oacute;' => "xC3x93",
'oacute;' => "xC3xB3",
'Ocirc' => "xC3x94",
'ocirc' => "xC3xB4",
'Ocirc;' => "xC3x94",
'ocirc;' => "xC3xB4",
'OElig;' => "xC5x92",
'oelig;' => "xC5x93",
'Ograve' => "xC3x92",
'ograve' => "xC3xB2",
'Ograve;' => "xC3x92",
'ograve;' => "xC3xB2",
'oline;' => "xE2x80xBE",
'Omega;' => "xCExA9",
'omega;' => "xCFx89",
'Omicron;' => "xCEx9F",
'omicron;' => "xCExBF",
'oplus;' => "xE2x8Ax95",
'or;' => "xE2x88xA8",
'ordf' => "xC2xAA",
'ordf;' => "xC2xAA",
'ordm' => "xC2xBA",
'ordm;' => "xC2xBA",
'Oslash' => "xC3x98",
'oslash' => "xC3xB8",
'Oslash;' => "xC3x98",
'oslash;' => "xC3xB8",
'Otilde' => "xC3x95",
'otilde' => "xC3xB5",
'Otilde;' => "xC3x95",
'otilde;' => "xC3xB5",
'otimes;' => "xE2x8Ax97",
'Ouml' => "xC3x96",
'ouml' => "xC3xB6",
'Ouml;' => "xC3x96",
'ouml;' => "xC3xB6",
'para' => "xC2xB6",
'para;' => "xC2xB6",
'part;' => "xE2x88x82",
'permil;' => "xE2x80xB0",
'perp;' => "xE2x8AxA5",
'Phi;' => "xCExA6",
'phi;' => "xCFx86",
'Pi;' => "xCExA0",
'pi;' => "xCFx80",
'piv;' => "xCFx96",
'plusmn' => "xC2xB1",
'plusmn;' => "xC2xB1",
'pound' => "xC2xA3",
'pound;' => "xC2xA3",
'Prime;' => "xE2x80xB3",
'prime;' => "xE2x80xB2",
'prod;' => "xE2x88x8F",
'prop;' => "xE2x88x9D",
'Psi;' => "xCExA8",
'psi;' => "xCFx88",
'QUOT' => "x22",
'quot' => "x22",
'QUOT;' => "x22",
'quot;' => "x22",
'radic;' => "xE2x88x9A",
'rang;' => "xE3x80x89",
'raquo' => "xC2xBB",
'raquo;' => "xC2xBB",
'rArr;' => "xE2x87x92",
'rarr;' => "xE2x86x92",
'rceil;' => "xE2x8Cx89",
'rdquo;' => "xE2x80x9D",
'real;' => "xE2x84x9C",
'REG' => "xC2xAE",
'reg' => "xC2xAE",
'REG;' => "xC2xAE",
'reg;' => "xC2xAE",
'rfloor;' => "xE2x8Cx8B",
'Rho;' => "xCExA1",
'rho;' => "xCFx81",
'rlm;' => "xE2x80x8F",
'rsaquo;' => "xE2x80xBA",
'rsquo;' => "xE2x80x99",
'sbquo;' => "xE2x80x9A",
'Scaron;' => "xC5xA0",
'scaron;' => "xC5xA1",
'sdot;' => "xE2x8Bx85",
'sect' => "xC2xA7",
'sect;' => "xC2xA7",
'shy' => "xC2xAD",
'shy;' => "xC2xAD",
'Sigma;' => "xCExA3",
'sigma;' => "xCFx83",
'sigmaf;' => "xCFx82",
'sim;' => "xE2x88xBC",
'spades;' => "xE2x99xA0",
'sub;' => "xE2x8Ax82",
'sube;' => "xE2x8Ax86",
'sum;' => "xE2x88x91",
'sup;' => "xE2x8Ax83",
'sup1' => "xC2xB9",
'sup1;' => "xC2xB9",
'sup2' => "xC2xB2",
'sup2;' => "xC2xB2",
'sup3' => "xC2xB3",
'sup3;' => "xC2xB3",
'supe;' => "xE2x8Ax87",
'szlig' => "xC3x9F",
'szlig;' => "xC3x9F",
'Tau;' => "xCExA4",
'tau;' => "xCFx84",
'there4;' => "xE2x88xB4",
'Theta;' => "xCEx98",
'theta;' => "xCExB8",
'thetasym;' => "xCFx91",
'thinsp;' => "xE2x80x89",
'THORN' => "xC3x9E",
'thorn' => "xC3xBE",
'THORN;' => "xC3x9E",
'thorn;' => "xC3xBE",
'tilde;' => "xCBx9C",
'times' => "xC3x97",
'times;' => "xC3x97",
'TRADE;' => "xE2x84xA2",
'trade;' => "xE2x84xA2",
'Uacute' => "xC3x9A",
'uacute' => "xC3xBA",
'Uacute;' => "xC3x9A",
'uacute;' => "xC3xBA",
'uArr;' => "xE2x87x91",
'uarr;' => "xE2x86x91",
'Ucirc' => "xC3x9B",
'ucirc' => "xC3xBB",
'Ucirc;' => "xC3x9B",
'ucirc;' => "xC3xBB",
'Ugrave' => "xC3x99",
'ugrave' => "xC3xB9",
'Ugrave;' => "xC3x99",
'ugrave;' => "xC3xB9",
'uml' => "xC2xA8",
'uml;' => "xC2xA8",
'upsih;' => "xCFx92",
'Upsilon;' => "xCExA5",
'upsilon;' => "xCFx85",
'Uuml' => "xC3x9C",
'uuml' => "xC3xBC",
'Uuml;' => "xC3x9C",
'uuml;' => "xC3xBC",
'weierp;' => "xE2x84x98",
'Xi;' => "xCEx9E",
'xi;' => "xCExBE",
'Yacute' => "xC3x9D",
'yacute' => "xC3xBD",
'Yacute;' => "xC3x9D",
'yacute;' => "xC3xBD",
'yen' => "xC2xA5",
'yen;' => "xC2xA5",
'yuml' => "xC3xBF",
'Yuml;' => "xC5xB8",
'yuml;' => "xC3xBF",
'Zeta;' => "xCEx96",
'zeta;' => "xCExB6",
'zwj;' => "xE2x80x8D",
'zwnj;' => "xE2x80x8C"
);
for ($i = 0, $match = null; $i < 9 && $this->consume() !== false; $i++)
{
$consumed = substr($this->consumed, 1);
if (isset($entities[$consumed]))
{
$match = $consumed;
}
}
if ($match !== null)
{
$this->data = substr_replace($this->data, $entities[$match], $this->position - strlen($consumed) - 1, strlen($match) + 1);
$this->position += strlen($entities[$match]) - strlen($consumed) - 1;
}
break;
}
}
}