Файл: forsoc.ru/includes/utf/utf_tools.php
Строк: 1289
<?php
/**
*
* This file is part of the phpBB Forum Software package.
*
* @copyright (c) phpBB Limited <https://www.phpbb.com>
* @license GNU General Public License, version 2 (GPL-2.0)
*
* For full copyright and license information, please see
* the docs/CREDITS.txt file.
*
*/
/**
*/
if (!defined('IN_PHPBB'))
{
exit;
}
// Enforce ASCII only string handling
setlocale(LC_CTYPE, 'C');
/**
* UTF-8 tools
*
* Whenever possible, these functions will try to use PHP's built-in functions or
* extensions, otherwise they will default to custom routines.
*
*/
if (!extension_loaded('xml'))
{
/**
* Implementation of PHP's native utf8_encode for people without XML support
* This function exploits some nice things that ISO-8859-1 and UTF-8 have in common
*
* @param string $str ISO-8859-1 encoded data
* @return string UTF-8 encoded data
*/
function utf8_encode($str)
{
$out = '';
for ($i = 0, $len = strlen($str); $i < $len; $i++)
{
$letter = $str[$i];
$num = ord($letter);
if ($num < 0x80)
{
$out .= $letter;
}
else if ($num < 0xC0)
{
$out .= "xC2" . $letter;
}
else
{
$out .= "xC3" . chr($num - 64);
}
}
return $out;
}
/**
* Implementation of PHP's native utf8_decode for people without XML support
*
* @param string $str UTF-8 encoded data
* @return string ISO-8859-1 encoded data
*/
function utf8_decode($str)
{
$pos = 0;
$len = strlen($str);
$ret = '';
while ($pos < $len)
{
$ord = ord($str[$pos]) & 0xF0;
if ($ord === 0xC0 || $ord === 0xD0)
{
$charval = ((ord($str[$pos]) & 0x1F) << 6) | (ord($str[$pos + 1]) & 0x3F);
$pos += 2;
$ret .= (($charval < 256) ? chr($charval) : '?');
}
else if ($ord === 0xE0)
{
$ret .= '?';
$pos += 3;
}
else if ($ord === 0xF0)
{
$ret .= '?';
$pos += 4;
}
else
{
$ret .= $str[$pos];
++$pos;
}
}
return $ret;
}
}
// mbstring is old and has it's functions around for older versions of PHP.
// if mbstring is not loaded, we go into native mode.
if (extension_loaded('mbstring'))
{
mb_internal_encoding('UTF-8');
/**
* UTF-8 aware alternative to strrpos
* Find position of last occurrence of a char in a string
*/
/**
* UTF-8 aware alternative to strrpos
* @ignore
*/
function utf8_strrpos($str, $needle, $offset = null)
{
// Emulate behaviour of strrpos rather than raising warning
if (empty($str))
{
return false;
}
if (is_null($offset))
{
return mb_strrpos($str, $needle);
}
else
{
return mb_strrpos($str, $needle, $offset);
}
}
/**
* UTF-8 aware alternative to strpos
* @ignore
*/
function utf8_strpos($str, $needle, $offset = null)
{
if (is_null($offset))
{
return mb_strpos($str, $needle);
}
else
{
return mb_strpos($str, $needle, $offset);
}
}
/**
* UTF-8 aware alternative to strtolower
* @ignore
*/
function utf8_strtolower($str)
{
return mb_strtolower($str);
}
/**
* UTF-8 aware alternative to strtoupper
* @ignore
*/
function utf8_strtoupper($str)
{
return mb_strtoupper($str);
}
/**
* UTF-8 aware alternative to substr
* @ignore
*/
function utf8_substr($str, $offset, $length = null)
{
if (is_null($length))
{
return mb_substr($str, $offset);
}
else
{
return mb_substr($str, $offset, $length);
}
}
/**
* Return the length (in characters) of a UTF-8 string
* @ignore
*/
function utf8_strlen($text)
{
return mb_strlen($text, 'utf-8');
}
}
else
{
/**
* UTF-8 aware alternative to strrpos
* Find position of last occurrence of a char in a string
*
* @author Harry Fuecks
* @param string $str haystack
* @param string $needle needle
* @param integer $offset (optional) offset (from left)
* @return mixed integer position or FALSE on failure
*/
function utf8_strrpos($str, $needle, $offset = null)
{
if (is_null($offset))
{
$ar = explode($needle, $str);
if (sizeof($ar) > 1)
{
// Pop off the end of the string where the last match was made
array_pop($ar);
$str = join($needle, $ar);
return utf8_strlen($str);
}
return false;
}
else
{
if (!is_int($offset))
{
trigger_error('utf8_strrpos expects parameter 3 to be long', E_USER_ERROR);
return false;
}
$str = utf8_substr($str, $offset);
if (false !== ($pos = utf8_strrpos($str, $needle)))
{
return $pos + $offset;
}
return false;
}
}
/**
* UTF-8 aware alternative to strpos
* Find position of first occurrence of a string
*
* @author Harry Fuecks
* @param string $str haystack
* @param string $needle needle
* @param integer $offset offset in characters (from left)
* @return mixed integer position or FALSE on failure
*/
function utf8_strpos($str, $needle, $offset = null)
{
if (is_null($offset))
{
$ar = explode($needle, $str);
if (sizeof($ar) > 1)
{
return utf8_strlen($ar[0]);
}
return false;
}
else
{
if (!is_int($offset))
{
trigger_error('utf8_strpos: Offset must be an integer', E_USER_ERROR);
return false;
}
$str = utf8_substr($str, $offset);
if (false !== ($pos = utf8_strpos($str, $needle)))
{
return $pos + $offset;
}
return false;
}
}
/**
* UTF-8 aware alternative to strtolower
* Make a string lowercase
* Note: The concept of a characters "case" only exists is some alphabets
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
* not exist in the Chinese alphabet, for example. See Unicode Standard
* Annex #21: Case Mappings
*
* @param string
* @return string string in lowercase
*/
function utf8_strtolower($string)
{
static $utf8_upper_to_lower = array(
"xC3x80" => "xC3xA0", "xC3x81" => "xC3xA1",
"xC3x82" => "xC3xA2", "xC3x83" => "xC3xA3", "xC3x84" => "xC3xA4", "xC3x85" => "xC3xA5",
"xC3x86" => "xC3xA6", "xC3x87" => "xC3xA7", "xC3x88" => "xC3xA8", "xC3x89" => "xC3xA9",
"xC3x8A" => "xC3xAA", "xC3x8B" => "xC3xAB", "xC3x8C" => "xC3xAC", "xC3x8D" => "xC3xAD",
"xC3x8E" => "xC3xAE", "xC3x8F" => "xC3xAF", "xC3x90" => "xC3xB0", "xC3x91" => "xC3xB1",
"xC3x92" => "xC3xB2", "xC3x93" => "xC3xB3", "xC3x94" => "xC3xB4", "xC3x95" => "xC3xB5",
"xC3x96" => "xC3xB6", "xC3x98" => "xC3xB8", "xC3x99" => "xC3xB9", "xC3x9A" => "xC3xBA",
"xC3x9B" => "xC3xBB", "xC3x9C" => "xC3xBC", "xC3x9D" => "xC3xBD", "xC3x9E" => "xC3xBE",
"xC4x80" => "xC4x81", "xC4x82" => "xC4x83", "xC4x84" => "xC4x85", "xC4x86" => "xC4x87",
"xC4x88" => "xC4x89", "xC4x8A" => "xC4x8B", "xC4x8C" => "xC4x8D", "xC4x8E" => "xC4x8F",
"xC4x90" => "xC4x91", "xC4x92" => "xC4x93", "xC4x96" => "xC4x97", "xC4x98" => "xC4x99",
"xC4x9A" => "xC4x9B", "xC4x9C" => "xC4x9D", "xC4x9E" => "xC4x9F", "xC4xA0" => "xC4xA1",
"xC4xA2" => "xC4xA3", "xC4xA4" => "xC4xA5", "xC4xA6" => "xC4xA7", "xC4xA8" => "xC4xA9",
"xC4xAA" => "xC4xAB", "xC4xAE" => "xC4xAF", "xC4xB4" => "xC4xB5", "xC4xB6" => "xC4xB7",
"xC4xB9" => "xC4xBA", "xC4xBB" => "xC4xBC", "xC4xBD" => "xC4xBE", "xC5x81" => "xC5x82",
"xC5x83" => "xC5x84", "xC5x85" => "xC5x86", "xC5x87" => "xC5x88", "xC5x8A" => "xC5x8B",
"xC5x8C" => "xC5x8D", "xC5x90" => "xC5x91", "xC5x94" => "xC5x95", "xC5x96" => "xC5x97",
"xC5x98" => "xC5x99", "xC5x9A" => "xC5x9B", "xC5x9C" => "xC5x9D", "xC5x9E" => "xC5x9F",
"xC5xA0" => "xC5xA1", "xC5xA2" => "xC5xA3", "xC5xA4" => "xC5xA5", "xC5xA6" => "xC5xA7",
"xC5xA8" => "xC5xA9", "xC5xAA" => "xC5xAB", "xC5xAC" => "xC5xAD", "xC5xAE" => "xC5xAF",
"xC5xB0" => "xC5xB1", "xC5xB2" => "xC5xB3", "xC5xB4" => "xC5xB5", "xC5xB6" => "xC5xB7",
"xC5xB8" => "xC3xBF", "xC5xB9" => "xC5xBA", "xC5xBB" => "xC5xBC", "xC5xBD" => "xC5xBE",
"xC6xA0" => "xC6xA1", "xC6xAF" => "xC6xB0", "xC8x98" => "xC8x99", "xC8x9A" => "xC8x9B",
"xCEx86" => "xCExAC", "xCEx88" => "xCExAD", "xCEx89" => "xCExAE", "xCEx8A" => "xCExAF",
"xCEx8C" => "xCFx8C", "xCEx8E" => "xCFx8D", "xCEx8F" => "xCFx8E", "xCEx91" => "xCExB1",
"xCEx92" => "xCExB2", "xCEx93" => "xCExB3", "xCEx94" => "xCExB4", "xCEx95" => "xCExB5",
"xCEx96" => "xCExB6", "xCEx97" => "xCExB7", "xCEx98" => "xCExB8", "xCEx99" => "xCExB9",
"xCEx9A" => "xCExBA", "xCEx9B" => "xCExBB", "xCEx9C" => "xCExBC", "xCEx9D" => "xCExBD",
"xCEx9E" => "xCExBE", "xCEx9F" => "xCExBF", "xCExA0" => "xCFx80", "xCExA1" => "xCFx81",
"xCExA3" => "xCFx83", "xCExA4" => "xCFx84", "xCExA5" => "xCFx85", "xCExA6" => "xCFx86",
"xCExA7" => "xCFx87", "xCExA8" => "xCFx88", "xCExA9" => "xCFx89", "xCExAA" => "xCFx8A",
"xCExAB" => "xCFx8B", "xD0x81" => "xD1x91", "xD0x82" => "xD1x92", "xD0x83" => "xD1x93",
"xD0x84" => "xD1x94", "xD0x85" => "xD1x95", "xD0x86" => "xD1x96", "xD0x87" => "xD1x97",
"xD0x88" => "xD1x98", "xD0x89" => "xD1x99", "xD0x8A" => "xD1x9A", "xD0x8B" => "xD1x9B",
"xD0x8C" => "xD1x9C", "xD0x8E" => "xD1x9E", "xD0x8F" => "xD1x9F", "xD0x90" => "xD0xB0",
"xD0x91" => "xD0xB1", "xD0x92" => "xD0xB2", "xD0x93" => "xD0xB3", "xD0x94" => "xD0xB4",
"xD0x95" => "xD0xB5", "xD0x96" => "xD0xB6", "xD0x97" => "xD0xB7", "xD0x98" => "xD0xB8",
"xD0x99" => "xD0xB9", "xD0x9A" => "xD0xBA", "xD0x9B" => "xD0xBB", "xD0x9C" => "xD0xBC",
"xD0x9D" => "xD0xBD", "xD0x9E" => "xD0xBE", "xD0x9F" => "xD0xBF", "xD0xA0" => "xD1x80",
"xD0xA1" => "xD1x81", "xD0xA2" => "xD1x82", "xD0xA3" => "xD1x83", "xD0xA4" => "xD1x84",
"xD0xA5" => "xD1x85", "xD0xA6" => "xD1x86", "xD0xA7" => "xD1x87", "xD0xA8" => "xD1x88",
"xD0xA9" => "xD1x89", "xD0xAA" => "xD1x8A", "xD0xAB" => "xD1x8B", "xD0xAC" => "xD1x8C",
"xD0xAD" => "xD1x8D", "xD0xAE" => "xD1x8E", "xD0xAF" => "xD1x8F", "xD2x90" => "xD2x91",
"xE1xB8x82" => "xE1xB8x83", "xE1xB8x8A" => "xE1xB8x8B", "xE1xB8x9E" => "xE1xB8x9F", "xE1xB9x80" => "xE1xB9x81",
"xE1xB9x96" => "xE1xB9x97", "xE1xB9xA0" => "xE1xB9xA1", "xE1xB9xAA" => "xE1xB9xAB", "xE1xBAx80" => "xE1xBAx81",
"xE1xBAx82" => "xE1xBAx83", "xE1xBAx84" => "xE1xBAx85", "xE1xBBxB2" => "xE1xBBxB3"
);
return strtr(strtolower($string), $utf8_upper_to_lower);
}
/**
* UTF-8 aware alternative to strtoupper
* Make a string uppercase
* Note: The concept of a characters "case" only exists is some alphabets
* such as Latin, Greek, Cyrillic, Armenian and archaic Georgian - it does
* not exist in the Chinese alphabet, for example. See Unicode Standard
* Annex #21: Case Mappings
*
* @param string
* @return string string in uppercase
*/
function utf8_strtoupper($string)
{
static $utf8_lower_to_upper = array(
"xC3xA0" => "xC3x80", "xC3xA1" => "xC3x81",
"xC3xA2" => "xC3x82", "xC3xA3" => "xC3x83", "xC3xA4" => "xC3x84", "xC3xA5" => "xC3x85",
"xC3xA6" => "xC3x86", "xC3xA7" => "xC3x87", "xC3xA8" => "xC3x88", "xC3xA9" => "xC3x89",
"xC3xAA" => "xC3x8A", "xC3xAB" => "xC3x8B", "xC3xAC" => "xC3x8C", "xC3xAD" => "xC3x8D",
"xC3xAE" => "xC3x8E", "xC3xAF" => "xC3x8F", "xC3xB0" => "xC3x90", "xC3xB1" => "xC3x91",
"xC3xB2" => "xC3x92", "xC3xB3" => "xC3x93", "xC3xB4" => "xC3x94", "xC3xB5" => "xC3x95",
"xC3xB6" => "xC3x96", "xC3xB8" => "xC3x98", "xC3xB9" => "xC3x99", "xC3xBA" => "xC3x9A",
"xC3xBB" => "xC3x9B", "xC3xBC" => "xC3x9C", "xC3xBD" => "xC3x9D", "xC3xBE" => "xC3x9E",
"xC3xBF" => "xC5xB8", "xC4x81" => "xC4x80", "xC4x83" => "xC4x82", "xC4x85" => "xC4x84",
"xC4x87" => "xC4x86", "xC4x89" => "xC4x88", "xC4x8B" => "xC4x8A", "xC4x8D" => "xC4x8C",
"xC4x8F" => "xC4x8E", "xC4x91" => "xC4x90", "xC4x93" => "xC4x92", "xC4x97" => "xC4x96",
"xC4x99" => "xC4x98", "xC4x9B" => "xC4x9A", "xC4x9D" => "xC4x9C", "xC4x9F" => "xC4x9E",
"xC4xA1" => "xC4xA0", "xC4xA3" => "xC4xA2", "xC4xA5" => "xC4xA4", "xC4xA7" => "xC4xA6",
"xC4xA9" => "xC4xA8", "xC4xAB" => "xC4xAA", "xC4xAF" => "xC4xAE", "xC4xB5" => "xC4xB4",
"xC4xB7" => "xC4xB6", "xC4xBA" => "xC4xB9", "xC4xBC" => "xC4xBB", "xC4xBE" => "xC4xBD",
"xC5x82" => "xC5x81", "xC5x84" => "xC5x83", "xC5x86" => "xC5x85", "xC5x88" => "xC5x87",
"xC5x8B" => "xC5x8A", "xC5x8D" => "xC5x8C", "xC5x91" => "xC5x90", "xC5x95" => "xC5x94",
"xC5x97" => "xC5x96", "xC5x99" => "xC5x98", "xC5x9B" => "xC5x9A", "xC5x9D" => "xC5x9C",
"xC5x9F" => "xC5x9E", "xC5xA1" => "xC5xA0", "xC5xA3" => "xC5xA2", "xC5xA5" => "xC5xA4",
"xC5xA7" => "xC5xA6", "xC5xA9" => "xC5xA8", "xC5xAB" => "xC5xAA", "xC5xAD" => "xC5xAC",
"xC5xAF" => "xC5xAE", "xC5xB1" => "xC5xB0", "xC5xB3" => "xC5xB2", "xC5xB5" => "xC5xB4",
"xC5xB7" => "xC5xB6", "xC5xBA" => "xC5xB9", "xC5xBC" => "xC5xBB", "xC5xBE" => "xC5xBD",
"xC6xA1" => "xC6xA0", "xC6xB0" => "xC6xAF", "xC8x99" => "xC8x98", "xC8x9B" => "xC8x9A",
"xCExAC" => "xCEx86", "xCExAD" => "xCEx88", "xCExAE" => "xCEx89", "xCExAF" => "xCEx8A",
"xCExB1" => "xCEx91", "xCExB2" => "xCEx92", "xCExB3" => "xCEx93", "xCExB4" => "xCEx94",
"xCExB5" => "xCEx95", "xCExB6" => "xCEx96", "xCExB7" => "xCEx97", "xCExB8" => "xCEx98",
"xCExB9" => "xCEx99", "xCExBA" => "xCEx9A", "xCExBB" => "xCEx9B", "xCExBC" => "xCEx9C",
"xCExBD" => "xCEx9D", "xCExBE" => "xCEx9E", "xCExBF" => "xCEx9F", "xCFx80" => "xCExA0",
"xCFx81" => "xCExA1", "xCFx83" => "xCExA3", "xCFx84" => "xCExA4", "xCFx85" => "xCExA5",
"xCFx86" => "xCExA6", "xCFx87" => "xCExA7", "xCFx88" => "xCExA8", "xCFx89" => "xCExA9",
"xCFx8A" => "xCExAA", "xCFx8B" => "xCExAB", "xCFx8C" => "xCEx8C", "xCFx8D" => "xCEx8E",
"xCFx8E" => "xCEx8F", "xD0xB0" => "xD0x90", "xD0xB1" => "xD0x91", "xD0xB2" => "xD0x92",
"xD0xB3" => "xD0x93", "xD0xB4" => "xD0x94", "xD0xB5" => "xD0x95", "xD0xB6" => "xD0x96",
"xD0xB7" => "xD0x97", "xD0xB8" => "xD0x98", "xD0xB9" => "xD0x99", "xD0xBA" => "xD0x9A",
"xD0xBB" => "xD0x9B", "xD0xBC" => "xD0x9C", "xD0xBD" => "xD0x9D", "xD0xBE" => "xD0x9E",
"xD0xBF" => "xD0x9F", "xD1x80" => "xD0xA0", "xD1x81" => "xD0xA1", "xD1x82" => "xD0xA2",
"xD1x83" => "xD0xA3", "xD1x84" => "xD0xA4", "xD1x85" => "xD0xA5", "xD1x86" => "xD0xA6",
"xD1x87" => "xD0xA7", "xD1x88" => "xD0xA8", "xD1x89" => "xD0xA9", "xD1x8A" => "xD0xAA",
"xD1x8B" => "xD0xAB", "xD1x8C" => "xD0xAC", "xD1x8D" => "xD0xAD", "xD1x8E" => "xD0xAE",
"xD1x8F" => "xD0xAF", "xD1x91" => "xD0x81", "xD1x92" => "xD0x82", "xD1x93" => "xD0x83",
"xD1x94" => "xD0x84", "xD1x95" => "xD0x85", "xD1x96" => "xD0x86", "xD1x97" => "xD0x87",
"xD1x98" => "xD0x88", "xD1x99" => "xD0x89", "xD1x9A" => "xD0x8A", "xD1x9B" => "xD0x8B",
"xD1x9C" => "xD0x8C", "xD1x9E" => "xD0x8E", "xD1x9F" => "xD0x8F", "xD2x91" => "xD2x90",
"xE1xB8x83" => "xE1xB8x82", "xE1xB8x8B" => "xE1xB8x8A", "xE1xB8x9F" => "xE1xB8x9E", "xE1xB9x81" => "xE1xB9x80",
"xE1xB9x97" => "xE1xB9x96", "xE1xB9xA1" => "xE1xB9xA0", "xE1xB9xAB" => "xE1xB9xAA", "xE1xBAx81" => "xE1xBAx80",
"xE1xBAx83" => "xE1xBAx82", "xE1xBAx85" => "xE1xBAx84", "xE1xBBxB3" => "xE1xBBxB2"
);
return strtr(strtoupper($string), $utf8_lower_to_upper);
}
/**
* UTF-8 aware alternative to substr
* Return part of a string given character offset (and optionally length)
*
* Note arguments: comparied to substr - if offset or length are
* not integers, this version will not complain but rather massages them
* into an integer.
*
* Note on returned values: substr documentation states false can be
* returned in some cases (e.g. offset > string length)
* mb_substr never returns false, it will return an empty string instead.
* This adopts the mb_substr approach
*
* Note on implementation: PCRE only supports repetitions of less than
* 65536, in order to accept up to MAXINT values for offset and length,
* we'll repeat a group of 65535 characters when needed.
*
* Note on implementation: calculating the number of characters in the
* string is a relatively expensive operation, so we only carry it out when
* necessary. It isn't necessary for +ve offsets and no specified length
*
* @author Chris Smith<chris@jalakai.co.uk>
* @param string $str
* @param integer $offset number of UTF-8 characters offset (from left)
* @param integer $length (optional) length in UTF-8 characters from offset
* @return mixed string or FALSE if failure
*/
function utf8_substr($str, $offset, $length = NULL)
{
// generates E_NOTICE
// for PHP4 objects, but not PHP5 objects
$str = (string) $str;
$offset = (int) $offset;
if (!is_null($length))
{
$length = (int) $length;
}
// handle trivial cases
if ($length === 0 || ($offset < 0 && $length < 0 && $length < $offset))
{
return '';
}
// normalise negative offsets (we could use a tail
// anchored pattern, but they are horribly slow!)
if ($offset < 0)
{
// see notes
$strlen = utf8_strlen($str);
$offset = $strlen + $offset;
if ($offset < 0)
{
$offset = 0;
}
}
$op = '';
$lp = '';
// establish a pattern for offset, a
// non-captured group equal in length to offset
if ($offset > 0)
{
$ox = (int) ($offset / 65535);
$oy = $offset % 65535;
if ($ox)
{
$op = '(?:.{65535}){' . $ox . '}';
}
$op = '^(?:' . $op . '.{' . $oy . '})';
}
else
{
// offset == 0; just anchor the pattern
$op = '^';
}
// establish a pattern for length
if (is_null($length))
{
// the rest of the string
$lp = '(.*)$';
}
else
{
if (!isset($strlen))
{
// see notes
$strlen = utf8_strlen($str);
}
// another trivial case
if ($offset > $strlen)
{
return '';
}
if ($length > 0)
{
// reduce any length that would
// go passed the end of the string
$length = min($strlen - $offset, $length);
$lx = (int) ($length / 65535);
$ly = $length % 65535;
// negative length requires a captured group
// of length characters
if ($lx)
{
$lp = '(?:.{65535}){' . $lx . '}';
}
$lp = '(' . $lp . '.{'. $ly . '})';
}
else if ($length < 0)
{
if ($length < ($offset - $strlen))
{
return '';
}
$lx = (int) ((-$length) / 65535);
$ly = (-$length) % 65535;
// negative length requires ... capture everything
// except a group of -length characters
// anchored at the tail-end of the string
if ($lx)
{
$lp = '(?:.{65535}){' . $lx . '}';
}
$lp = '(.*)(?:' . $lp . '.{' . $ly . '})$';
}
}
if (!preg_match('#' . $op . $lp . '#us', $str, $match))
{
return '';
}
return $match[1];
}
/**
* Return the length (in characters) of a UTF-8 string
*
* @param string $text UTF-8 string
* @return integer Length (in chars) of given string
*/
function utf8_strlen($text)
{
// Since utf8_decode is replacing multibyte characters to ? strlen works fine
return strlen(utf8_decode($text));
}
}
/**
* UTF-8 aware alternative to str_split
* Convert a string to an array
*
* @author Harry Fuecks
* @param string $str UTF-8 encoded
* @param int $split_len number to characters to split string by
* @return array characters in string reverses
*/
function utf8_str_split($str, $split_len = 1)
{
if (!is_int($split_len) || $split_len < 1)
{
return false;
}
$len = utf8_strlen($str);
if ($len <= $split_len)
{
return array($str);
}
preg_match_all('/.{' . $split_len . '}|[^x00]{1,' . $split_len . '}$/us', $str, $ar);
return $ar[0];
}
/**
* UTF-8 aware alternative to strspn
* Find length of initial segment matching the mask
*
* @author Harry Fuecks
*/
function utf8_strspn($str, $mask, $start = null, $length = null)
{
if ($start !== null || $length !== null)
{
$str = utf8_substr($str, $start, $length);
}
preg_match('/^[' . $mask . ']+/u', $str, $matches);
if (isset($matches[0]))
{
return utf8_strlen($matches[0]);
}
return 0;
}
/**
* UTF-8 aware alternative to ucfirst
* Make a string's first character uppercase
*
* @author Harry Fuecks
* @param string
* @return string with first character as upper case (if applicable)
*/
function utf8_ucfirst($str)
{
switch (utf8_strlen($str))
{
case 0:
return '';
break;
case 1:
return utf8_strtoupper($str);
break;
default:
preg_match('/^(.{1})(.*)$/us', $str, $matches);
return utf8_strtoupper($matches[1]) . $matches[2];
break;
}
}
/**
* Recode a string to UTF-8
*
* If the encoding is not supported, the string is returned as-is
*
* @param string $string Original string
* @param string $encoding Original encoding (lowered)
* @return string The string, encoded in UTF-8
*/
function utf8_recode($string, $encoding)
{
$encoding = strtolower($encoding);
if ($encoding == 'utf-8' || !is_string($string) || empty($string))
{
return $string;
}
// we force iso-8859-1 to be cp1252
if ($encoding == 'iso-8859-1')
{
$encoding = 'cp1252';
}
// convert iso-8859-8-i to iso-8859-8
else if ($encoding == 'iso-8859-8-i')
{
$encoding = 'iso-8859-8';
$string = hebrev($string);
}
// First, try iconv()
if (function_exists('iconv'))
{
$ret = @iconv($encoding, 'utf-8', $string);
if (!empty($ret))
{
return $ret;
}
}
// Try the mb_string extension
if (function_exists('mb_convert_encoding'))
{
// mbstring is nasty on PHP4, we must make *sure* that we send a good encoding
switch ($encoding)
{
case 'iso-8859-1':
case 'iso-8859-2':
case 'iso-8859-4':
case 'iso-8859-7':
case 'iso-8859-9':
case 'iso-8859-15':
case 'windows-1251':
case 'windows-1252':
case 'cp1252':
case 'shift_jis':
case 'euc-kr':
case 'big5':
case 'gb2312':
$ret = @mb_convert_encoding($string, 'utf-8', $encoding);
if (!empty($ret))
{
return $ret;
}
}
}
// Try the recode extension
if (function_exists('recode_string'))
{
$ret = @recode_string($encoding . '..utf-8', $string);
if (!empty($ret))
{
return $ret;
}
}
// If nothing works, check if we have a custom transcoder available
if (!preg_match('#^[a-z0-9_ \-]+$#', $encoding))
{
// Make sure the encoding name is alphanumeric, we don't want it to be abused into loading arbitrary files
trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
}
global $phpbb_root_path, $phpEx;
// iso-8859-* character encoding
if (preg_match('/iso[_ -]?8859[_ -]?(\d+)/', $encoding, $array))
{
switch ($array[1])
{
case '1':
case '2':
case '4':
case '7':
case '8':
case '9':
case '15':
if (!function_exists('iso_8859_' . $array[1]))
{
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
{
trigger_error('Basic reencoder file is missing', E_USER_ERROR);
}
include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
}
return call_user_func('iso_8859_' . $array[1], $string);
break;
default:
trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
break;
}
}
// CP/WIN character encoding
if (preg_match('/(?:cp|windows)[_- ]?(\d+)/', $encoding, $array))
{
switch ($array[1])
{
case '932':
break;
case '1250':
case '1251':
case '1252':
case '1254':
case '1255':
case '1256':
case '1257':
case '874':
if (!function_exists('cp' . $array[1]))
{
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
{
trigger_error('Basic reencoder file is missing', E_USER_ERROR);
}
include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
}
return call_user_func('cp' . $array[1], $string);
break;
default:
trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
break;
}
}
// TIS-620
if (preg_match('/tis[_ -]?620/', $encoding))
{
if (!function_exists('tis_620'))
{
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx))
{
trigger_error('Basic reencoder file is missing', E_USER_ERROR);
}
include($phpbb_root_path . 'includes/utf/data/recode_basic.' . $phpEx);
}
return tis_620($string);
}
// SJIS
if (preg_match('/sjis(?:[_ -]?win)?|(?:cp|ibm)[_ -]?932|shift[_ -]?jis/', $encoding))
{
if (!function_exists('sjis'))
{
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
{
trigger_error('CJK reencoder file is missing', E_USER_ERROR);
}
include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
}
return sjis($string);
}
// EUC_KR
if (preg_match('/euc[_ -]?kr/', $encoding))
{
if (!function_exists('euc_kr'))
{
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
{
trigger_error('CJK reencoder file is missing', E_USER_ERROR);
}
include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
}
return euc_kr($string);
}
// BIG-5
if (preg_match('/big[_ -]?5/', $encoding))
{
if (!function_exists('big5'))
{
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
{
trigger_error('CJK reencoder file is missing', E_USER_ERROR);
}
include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
}
return big5($string);
}
// GB2312
if (preg_match('/gb[_ -]?2312/', $encoding))
{
if (!function_exists('gb2312'))
{
if (!file_exists($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx))
{
trigger_error('CJK reencoder file is missing', E_USER_ERROR);
}
include($phpbb_root_path . 'includes/utf/data/recode_cjk.' . $phpEx);
}
return gb2312($string);
}
// Trigger an error?! Fow now just give bad data :-(
trigger_error('Unknown encoding: ' . $encoding, E_USER_ERROR);
//return $string; // use utf_normalizer::cleanup() ?
}
/**
* Replace all UTF-8 chars that are not in ASCII with their NCR
*
* @param string $text UTF-8 string in NFC
* @return string ASCII string using NCRs for non-ASCII chars
*/
function utf8_encode_ncr($text)
{
return preg_replace_callback('#[\xC2-\xF4][\x80-\xBF]{1,3}#', 'utf8_encode_ncr_callback', $text);
}
/**
* Callback used in encode_ncr()
*
* Takes a UTF-8 char and replaces it with its NCR. Attention, $m is an array
*
* @param array $m 0-based numerically indexed array passed by preg_replace_callback()
* @return string A HTML NCR if the character is valid, or the original string otherwise
*/
function utf8_encode_ncr_callback($m)
{
return '&#' . utf8_ord($m[0]) . ';';
}
/**
* Converts a UTF-8 char to an NCR
*
* @param string $chr UTF-8 char
* @return integer UNICODE code point
*/
function utf8_ord($chr)
{
switch (strlen($chr))
{
case 1:
return ord($chr);
break;
case 2:
return ((ord($chr[0]) & 0x1F) << 6) | (ord($chr[1]) & 0x3F);
break;
case 3:
return ((ord($chr[0]) & 0x0F) << 12) | ((ord($chr[1]) & 0x3F) << 6) | (ord($chr[2]) & 0x3F);
break;
case 4:
return ((ord($chr[0]) & 0x07) << 18) | ((ord($chr[1]) & 0x3F) << 12) | ((ord($chr[2]) & 0x3F) << 6) | (ord($chr[3]) & 0x3F);
break;
default:
return $chr;
}
}
/**
* Converts an NCR to a UTF-8 char
*
* @param int $cp UNICODE code point
* @return string UTF-8 char
*/
function utf8_chr($cp)
{
if ($cp > 0xFFFF)
{
return chr(0xF0 | ($cp >> 18)) . chr(0x80 | (($cp >> 12) & 0x3F)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
}
else if ($cp > 0x7FF)
{
return chr(0xE0 | ($cp >> 12)) . chr(0x80 | (($cp >> 6) & 0x3F)) . chr(0x80 | ($cp & 0x3F));
}
else if ($cp > 0x7F)
{
return chr(0xC0 | ($cp >> 6)) . chr(0x80 | ($cp & 0x3F));
}
else
{
return chr($cp);
}
}
/**
* Convert Numeric Character References to UTF-8 chars
*
* Notes:
* - we do not convert NCRs recursively, if you pass &#38; it will return &
* - we DO NOT check for the existence of the Unicode characters, therefore an entity may be converted to an inexistent codepoint
*
* @param string $text String to convert, encoded in UTF-8 (no normal form required)
* @return string UTF-8 string where NCRs have been replaced with the actual chars
*/
function utf8_decode_ncr($text)
{
return preg_replace_callback('/&#([0-9]{1,6}|x[0-9A-F]{1,5});/i', 'utf8_decode_ncr_callback', $text);
}
/**
* Callback used in decode_ncr()
*
* Takes a NCR (in decimal or hexadecimal) and returns a UTF-8 char. Attention, $m is an array.
* It will ignore most of invalid NCRs, but not all!
*
* @param array $m 0-based numerically indexed array passed by preg_replace_callback()
* @return string UTF-8 char
*/
function utf8_decode_ncr_callback($m)
{
$cp = (strncasecmp($m[1], 'x', 1)) ? $m[1] : hexdec(substr($m[1], 1));
return utf8_chr($cp);
}
/**
* Case folds a unicode string as per Unicode 5.0, section 3.13
*
* @param string $text text to be case folded
* @param string $option determines how we will fold the cases
* @return string case folded text
*/
function utf8_case_fold($text, $option = 'full')
{
static $uniarray = array();
global $phpbb_root_path, $phpEx;
// common is always set
if (!isset($uniarray['c']))
{
$uniarray['c'] = include($phpbb_root_path . 'includes/utf/data/case_fold_c.' . $phpEx);
}
// only set full if we need to
if ($option === 'full' && !isset($uniarray['f']))
{
$uniarray['f'] = include($phpbb_root_path . 'includes/utf/data/case_fold_f.' . $phpEx);
}
// only set simple if we need to
if ($option !== 'full' && !isset($uniarray['s']))
{
$uniarray['s'] = include($phpbb_root_path . 'includes/utf/data/case_fold_s.' . $phpEx);
}
// common is always replaced
$text = strtr($text, $uniarray['c']);
if ($option === 'full')
{
// full replaces a character with multiple characters
$text = strtr($text, $uniarray['f']);
}
else
{
// simple replaces a character with another character
$text = strtr($text, $uniarray['s']);
}
return $text;
}
/**
* Takes the input and does a "special" case fold. It does minor normalization
* and returns NFKC compatable text
*
* @param string $text text to be case folded
* @param string $option determines how we will fold the cases
* @return string case folded text
*/
function utf8_case_fold_nfkc($text, $option = 'full')
{
static $fc_nfkc_closure = array(
"xCDxBA" => "x20xCExB9",
"xCFx92" => "xCFx85",
"xCFx93" => "xCFx8D",
"xCFx94" => "xCFx8B",
"xCFxB2" => "xCFx83",
"xCFxB9" => "xCFx83",
"xE1xB4xAC" => "x61",
"xE1xB4xAD" => "xC3xA6",
"xE1xB4xAE" => "x62",
"xE1xB4xB0" => "x64",
"xE1xB4xB1" => "x65",
"xE1xB4xB2" => "xC7x9D",
"xE1xB4xB3" => "x67",
"xE1xB4xB4" => "x68",
"xE1xB4xB5" => "x69",
"xE1xB4xB6" => "x6A",
"xE1xB4xB7" => "x6B",
"xE1xB4xB8" => "x6C",
"xE1xB4xB9" => "x6D",
"xE1xB4xBA" => "x6E",
"xE1xB4xBC" => "x6F",
"xE1xB4xBD" => "xC8xA3",
"xE1xB4xBE" => "x70",
"xE1xB4xBF" => "x72",
"xE1xB5x80" => "x74",
"xE1xB5x81" => "x75",
"xE1xB5x82" => "x77",
"xE2x82xA8" => "x72x73",
"xE2x84x82" => "x63",
"xE2x84x83" => "xC2xB0x63",
"xE2x84x87" => "xC9x9B",
"xE2x84x89" => "xC2xB0x66",
"xE2x84x8B" => "x68",
"xE2x84x8C" => "x68",
"xE2x84x8D" => "x68",
"xE2x84x90" => "x69",
"xE2x84x91" => "x69",
"xE2x84x92" => "x6C",
"xE2x84x95" => "x6E",
"xE2x84x96" => "x6Ex6F",
"xE2x84x99" => "x70",
"xE2x84x9A" => "x71",
"xE2x84x9B" => "x72",
"xE2x84x9C" => "x72",
"xE2x84x9D" => "x72",
"xE2x84xA0" => "x73x6D",
"xE2x84xA1" => "x74x65x6C",
"xE2x84xA2" => "x74x6D",
"xE2x84xA4" => "x7A",
"xE2x84xA8" => "x7A",
"xE2x84xAC" => "x62",
"xE2x84xAD" => "x63",
"xE2x84xB0" => "x65",
"xE2x84xB1" => "x66",
"xE2x84xB3" => "x6D",
"xE2x84xBB" => "x66x61x78",
"xE2x84xBE" => "xCExB3",
"xE2x84xBF" => "xCFx80",
"xE2x85x85" => "x64",
"xE3x89x90" => "x70x74x65",
"xE3x8Bx8C" => "x68x67",
"xE3x8Bx8E" => "x65x76",
"xE3x8Bx8F" => "x6Cx74x64",
"xE3x8DxB1" => "x68x70x61",
"xE3x8DxB3" => "x61x75",
"xE3x8DxB5" => "x6Fx76",
"xE3x8DxBA" => "x69x75",
"xE3x8Ex80" => "x70x61",
"xE3x8Ex81" => "x6Ex61",
"xE3x8Ex82" => "xCExBCx61",
"xE3x8Ex83" => "x6Dx61",
"xE3x8Ex84" => "x6Bx61",
"xE3x8Ex85" => "x6Bx62",
"xE3x8Ex86" => "x6Dx62",
"xE3x8Ex87" => "x67x62",
"xE3x8Ex8A" => "x70x66",
"xE3x8Ex8B" => "x6Ex66",
"xE3x8Ex8C" => "xCExBCx66",
"xE3x8Ex90" => "x68x7A",
"xE3x8Ex91" => "x6Bx68x7A",
"xE3x8Ex92" => "x6Dx68x7A",
"xE3x8Ex93" => "x67x68x7A",
"xE3x8Ex94" => "x74x68x7A",
"xE3x8ExA9" => "x70x61",
"xE3x8ExAA" => "x6Bx70x61",
"xE3x8ExAB" => "x6Dx70x61",
"xE3x8ExAC" => "x67x70x61",
"xE3x8ExB4" => "x70x76",
"xE3x8ExB5" => "x6Ex76",
"xE3x8ExB6" => "xCExBCx76",
"xE3x8ExB7" => "x6Dx76",
"xE3x8ExB8" => "x6Bx76",
"xE3x8ExB9" => "x6Dx76",
"xE3x8ExBA" => "x70x77",
"xE3x8ExBB" => "x6Ex77",
"xE3x8ExBC" => "xCExBCx77",
"xE3x8ExBD" => "x6Dx77",
"xE3x8ExBE" => "x6Bx77",
"xE3x8ExBF" => "x6Dx77",
"xE3x8Fx80" => "x6BxCFx89",
"xE3x8Fx81" => "x6DxCFx89",
"xE3x8Fx83" => "x62x71",
"xE3x8Fx86" => "x63xE2x88x95x6Bx67",
"xE3x8Fx87" => "x63x6Fx2E",
"xE3x8Fx88" => "x64x62",
"xE3x8Fx89" => "x67x79",
"xE3x8Fx8B" => "x68x70",
"xE3x8Fx8D" => "x6Bx6B",
"xE3x8Fx8E" => "x6Bx6D",
"xE3x8Fx97" => "x70x68",
"xE3x8Fx99" => "x70x70x6D",
"xE3x8Fx9A" => "x70x72",
"xE3x8Fx9C" => "x73x76",
"xE3x8Fx9D" => "x77x62",
"xE3x8Fx9E" => "x76xE2x88x95x6D",
"xE3x8Fx9F" => "x61xE2x88x95x6D",
"xF0x9Dx90x80" => "x61",
"xF0x9Dx90x81" => "x62",
"xF0x9Dx90x82" => "x63",
"xF0x9Dx90x83" => "x64",
"xF0x9Dx90x84" => "x65",
"xF0x9Dx90x85" => "x66",
"xF0x9Dx90x86" => "x67",
"xF0x9Dx90x87" => "x68",
"xF0x9Dx90x88" => "x69",
"xF0x9Dx90x89" => "x6A",
"xF0x9Dx90x8A" => "x6B",
"xF0x9Dx90x8B" => "x6C",
"xF0x9Dx90x8C" => "x6D",
"xF0x9Dx90x8D" => "x6E",
"xF0x9Dx90x8E" => "x6F",
"xF0x9Dx90x8F" => "x70",
"xF0x9Dx90x90" => "x71",
"xF0x9Dx90x91" => "x72",
"xF0x9Dx90x92" => "x73",
"xF0x9Dx90x93" => "x74",
"xF0x9Dx90x94" => "x75",
"xF0x9Dx90x95" => "x76",
"xF0x9Dx90x96" => "x77",
"xF0x9Dx90x97" => "x78",
"xF0x9Dx90x98" => "x79",
"xF0x9Dx90x99" => "x7A",
"xF0x9Dx90xB4" => "x61",
"xF0x9Dx90xB5" => "x62",
"xF0x9Dx90xB6" => "x63",
"xF0x9Dx90xB7" => "x64",
"xF0x9Dx90xB8" => "x65",
"xF0x9Dx90xB9" => "x66",
"xF0x9Dx90xBA" => "x67",
"xF0x9Dx90xBB" => "x68",
"xF0x9Dx90xBC" => "x69",
"xF0x9Dx90xBD" => "x6A",
"xF0x9Dx90xBE" => "x6B",
"xF0x9Dx90xBF" => "x6C",
"xF0x9Dx91x80" => "x6D",
"xF0x9Dx91x81" => "x6E",
"xF0x9Dx91x82" => "x6F",
"xF0x9Dx91x83" => "x70",
"xF0x9Dx91x84" => "x71",
"xF0x9Dx91x85" => "x72",
"xF0x9Dx91x86" => "x73",
"xF0x9Dx91x87" => "x74",
"xF0x9Dx91x88" => "x75",
"xF0x9Dx91x89" => "x76",
"xF0x9Dx91x8A" => "x77",
"xF0x9Dx91x8B" => "x78",
"xF0x9Dx91x8C" => "x79",
"xF0x9Dx91x8D" => "x7A",
"xF0x9Dx91xA8" => "x61",
"xF0x9Dx91xA9" => "x62",
"xF0x9Dx91xAA" => "x63",
"xF0x9Dx91xAB" => "x64",
"xF0x9Dx91xAC" => "x65",
"xF0x9Dx91xAD" => "x66",
"xF0x9Dx91xAE" => "x67",
"xF0x9Dx91xAF" => "x68",
"xF0x9Dx91xB0" => "x69",
"xF0x9Dx91xB1" => "x6A",
"xF0x9Dx91xB2" => "x6B",
"xF0x9Dx91xB3" => "x6C",
"xF0x9Dx91xB4" => "x6D",
"xF0x9Dx91xB5" => "x6E",
"xF0x9Dx91xB6" => "x6F",
"xF0x9Dx91xB7" => "x70",
"xF0x9Dx91xB8" => "x71",
"xF0x9Dx91xB9" => "x72",
"xF0x9Dx91xBA" => "x73",
"xF0x9Dx91xBB" => "x74",
"xF0x9Dx91xBC" => "x75",
"xF0x9Dx91xBD" => "x76",
"xF0x9Dx91xBE" => "x77",
"xF0x9Dx91xBF" => "x78",
"xF0x9Dx92x80" => "x79",
"xF0x9Dx92x81" => "x7A",
"xF0x9Dx92x9C" => "x61",
"xF0x9Dx92x9E" => "x63",
"xF0x9Dx92x9F" => "x64",
"xF0x9Dx92xA2" => "x67",
"xF0x9Dx92xA5" => "x6A",
"xF0x9Dx92xA6" => "x6B",
"xF0x9Dx92xA9" => "x6E",
"xF0x9Dx92xAA" => "x6F",
"xF0x9Dx92xAB" => "x70",
"xF0x9Dx92xAC" => "x71",
"xF0x9Dx92xAE" => "x73",
"xF0x9Dx92xAF" => "x74",
"xF0x9Dx92xB0" => "x75",
"xF0x9Dx92xB1" => "x76",
"xF0x9Dx92xB2" => "x77",
"xF0x9Dx92xB3" => "x78",
"xF0x9Dx92xB4" => "x79",
"xF0x9Dx92xB5" => "x7A",
"xF0x9Dx93x90" => "x61",
"xF0x9Dx93x91" => "x62",
"xF0x9Dx93x92" => "x63",
"xF0x9Dx93x93" => "x64",
"xF0x9Dx93x94" => "x65",
"xF0x9Dx93x95" => "x66",
"xF0x9Dx93x96" => "x67",
"xF0x9Dx93x97" => "x68",
"xF0x9Dx93x98" => "x69",
"xF0x9Dx93x99" => "x6A",
"xF0x9Dx93x9A" => "x6B",
"xF0x9Dx93x9B" => "x6C",
"xF0x9Dx93x9C" => "x6D",
"xF0x9Dx93x9D" => "x6E",
"xF0x9Dx93x9E" => "x6F",
"xF0x9Dx93x9F" => "x70",
"xF0x9Dx93xA0" => "x71",
"xF0x9Dx93xA1" => "x72",
"xF0x9Dx93xA2" => "x73",
"xF0x9Dx93xA3" => "x74",
"xF0x9Dx93xA4" => "x75",
"xF0x9Dx93xA5" => "x76",
"xF0x9Dx93xA6" => "x77",
"xF0x9Dx93xA7" => "x78",
"xF0x9Dx93xA8" => "x79",
"xF0x9Dx93xA9" => "x7A",
"xF0x9Dx94x84" => "x61",
"xF0x9Dx94x85" => "x62",
"xF0x9Dx94x87" => "x64",
"xF0x9Dx94x88" => "x65",
"xF0x9Dx94x89" => "x66",
"xF0x9Dx94x8A" => "x67",
"xF0x9Dx94x8D" => "x6A",
"xF0x9Dx94x8E" => "x6B",
"xF0x9Dx94x8F" => "x6C",
"xF0x9Dx94x90" => "x6D",
"xF0x9Dx94x91" => "x6E",
"xF0x9Dx94x92" => "x6F",
"xF0x9Dx94x93" => "x70",
"xF0x9Dx94x94" => "x71",
"xF0x9Dx94x96" => "x73",
"xF0x9Dx94x97" => "x74",
"xF0x9Dx94x98" => "x75",
"xF0x9Dx94x99" => "x76",
"xF0x9Dx94x9A" => "x77",
"xF0x9Dx94x9B" => "x78",
"xF0x9Dx94x9C" => "x79",
"xF0x9Dx94xB8" => "x61",
"xF0x9Dx94xB9" => "x62",
"xF0x9Dx94xBB" => "x64",
"xF0x9Dx94xBC" => "x65",
"xF0x9Dx94xBD" => "x66",
"xF0x9Dx94xBE" => "x67",
"xF0x9Dx95x80" => "x69",
"xF0x9Dx95x81" => "x6A",
"xF0x9Dx95x82" => "x6B",
"xF0x9Dx95x83" => "x6C",
"xF0x9Dx95x84" => "x6D",
"xF0x9Dx95x86" => "x6F",
"xF0x9Dx95x8A" => "x73",
"xF0x9Dx95x8B" => "x74",
"xF0x9Dx95x8C" => "x75",
"xF0x9Dx95x8D" => "x76",
"xF0x9Dx95x8E" => "x77",
"xF0x9Dx95x8F" => "x78",
"xF0x9Dx95x90" => "x79",
"xF0x9Dx95xAC" => "x61",
"xF0x9Dx95xAD" => "x62",
"xF0x9Dx95xAE" => "x63",
"xF0x9Dx95xAF" => "x64",
"xF0x9Dx95xB0" => "x65",
"xF0x9Dx95xB1" => "x66",
"xF0x9Dx95xB2" => "x67",
"xF0x9Dx95xB3" => "x68",
"xF0x9Dx95xB4" => "x69",
"xF0x9Dx95xB5" => "x6A",
"xF0x9Dx95xB6" => "x6B",
"xF0x9Dx95xB7" => "x6C",
"xF0x9Dx95xB8" => "x6D",
"xF0x9Dx95xB9" => "x6E",
"xF0x9Dx95xBA" => "x6F",
"xF0x9Dx95xBB" => "x70",
"xF0x9Dx95xBC" => "x71",
"xF0x9Dx95xBD" => "x72",
"xF0x9Dx95xBE" => "x73",
"xF0x9Dx95xBF" => "x74",
"xF0x9Dx96x80" => "x75",
"xF0x9Dx96x81" => "x76",
"xF0x9Dx96x82" => "x77",
"xF0x9Dx96x83" => "x78",
"xF0x9Dx96x84" => "x79",
"xF0x9Dx96x85" => "x7A",
"xF0x9Dx96xA0" => "x61",
"xF0x9Dx96xA1" => "x62",
"xF0x9Dx96xA2" => "x63",
"xF0x9Dx96xA3" => "x64",
"xF0x9Dx96xA4" => "x65",
"xF0x9Dx96xA5" => "x66",
"xF0x9Dx96xA6" => "x67",
"xF0x9Dx96xA7" => "x68",
"xF0x9Dx96xA8" => "x69",
"xF0x9Dx96xA9" => "x6A",
"xF0x9Dx96xAA" => "x6B",
"xF0x9Dx96xAB" => "x6C",
"xF0x9Dx96xAC" => "x6D",
"xF0x9Dx96xAD" => "x6E",
"xF0x9Dx96xAE" => "x6F",
"xF0x9Dx96xAF" => "x70",
"xF0x9Dx96xB0" => "x71",
"xF0x9Dx96xB1" => "x72",
"xF0x9Dx96xB2" => "x73",
"xF0x9Dx96xB3" => "x74",
"xF0x9Dx96xB4" => "x75",
"xF0x9Dx96xB5" => "x76",
"xF0x9Dx96xB6" => "x77",
"xF0x9Dx96xB7" => "x78",
"xF0x9Dx96xB8" => "x79",
"xF0x9Dx96xB9" => "x7A",
"xF0x9Dx97x94" => "x61",
"xF0x9Dx97x95" => "x62",
"xF0x9Dx97x96" => "x63",
"xF0x9Dx97x97" => "x64",
"xF0x9Dx97x98" => "x65",
"xF0x9Dx97x99" => "x66",
"xF0x9Dx97x9A" => "x67",
"xF0x9Dx97x9B" => "x68",
"xF0x9Dx97x9C" => "x69",
"xF0x9Dx97x9D" => "x6A",
"xF0x9Dx97x9E" => "x6B",
"xF0x9Dx97x9F" => "x6C",
"xF0x9Dx97xA0" => "x6D",
"xF0x9Dx97xA1" => "x6E",
"xF0x9Dx97xA2" => "x6F",
"xF0x9Dx97xA3" => "x70",
"xF0x9Dx97xA4" => "x71",
"xF0x9Dx97xA5" => "x72",
"xF0x9Dx97xA6" => "x73",
"xF0x9Dx97xA7" => "x74",
"xF0x9Dx97xA8" => "x75",
"xF0x9Dx97xA9" => "x76",
"xF0x9Dx97xAA" => "x77",
"xF0x9Dx97xAB" => "x78",
"xF0x9Dx97xAC" => "x79",
"xF0x9Dx97xAD" => "x7A",
"xF0x9Dx98x88" => "x61",
"xF0x9Dx98x89" => "x62",
"xF0x9Dx98x8A" => "x63",
"xF0x9Dx98x8B" => "x64",
"xF0x9Dx98x8C" => "x65",
"xF0x9Dx98x8D" => "x66",
"xF0x9Dx98x8E" => "x67",
"xF0x9Dx98x8F" => "x68",
"xF0x9Dx98x90" => "x69",
"xF0x9Dx98x91" => "x6A",
"xF0x9Dx98x92" => "x6B",
"xF0x9Dx98x93" => "x6C",
"xF0x9Dx98x94" => "x6D",
"xF0x9Dx98x95" => "x6E",
"xF0x9Dx98x96" => "x6F",
"xF0x9Dx98x97" => "x70",
"xF0x9Dx98x98" => "x71",
"xF0x9Dx98x99" => "x72",
"xF0x9Dx98x9A" => "x73",
"xF0x9Dx98x9B" => "x74",
"xF0x9Dx98x9C" => "x75",
"xF0x9Dx98x9D" => "x76",
"xF0x9Dx98x9E" => "x77",
"xF0x9Dx98x9F" => "x78",
"xF0x9Dx98xA0" => "x79",
"xF0x9Dx98xA1" => "x7A",
"xF0x9Dx98xBC" => "x61",
"xF0x9Dx98xBD" => "x62",
"xF0x9Dx98xBE" => "x63",
"xF0x9Dx98xBF" => "x64",
"xF0x9Dx99x80" => "x65",
"xF0x9Dx99x81" => "x66",
"xF0x9Dx99x82" => "x67",
"xF0x9Dx99x83" => "x68",
"xF0x9Dx99x84" => "x69",
"xF0x9Dx99x85" => "x6A",
"xF0x9Dx99x86" => "x6B",
"xF0x9Dx99x87" => "x6C",
"xF0x9Dx99x88" => "x6D",
"xF0x9Dx99x89" => "x6E",
"xF0x9Dx99x8A" => "x6F",
"xF0x9Dx99x8B" => "x70",
"xF0x9Dx99x8C" => "x71",
"xF0x9Dx99x8D" => "x72",
"xF0x9Dx99x8E" => "x73",
"xF0x9Dx99x8F" => "x74",
"xF0x9Dx99x90" => "x75",
"xF0x9Dx99x91" => "x76",
"xF0x9Dx99x92" => "x77",
"xF0x9Dx99x93" => "x78",
"xF0x9Dx99x94" => "x79",
"xF0x9Dx99x95" => "x7A",
"xF0x9Dx99xB0" => "x61",
"xF0x9Dx99xB1" => "x62",
"xF0x9Dx99xB2" => "x63",
"xF0x9Dx99xB3" => "x64",
"xF0x9Dx99xB4" => "x65",
"xF0x9Dx99xB5" => "x66",
"xF0x9Dx99xB6" => "x67",
"xF0x9Dx99xB7" => "x68",
"xF0x9Dx99xB8" => "x69",
"xF0x9Dx99xB9" => "x6A",
"xF0x9Dx99xBA" => "x6B",
"xF0x9Dx99xBB" => "x6C",
"xF0x9Dx99xBC" => "x6D",
"xF0x9Dx99xBD" => "x6E",
"xF0x9Dx99xBE" => "x6F",
"xF0x9Dx99xBF" => "x70",
"xF0x9Dx9Ax80" => "x71",
"xF0x9Dx9Ax81" => "x72",
"xF0x9Dx9Ax82" => "x73",
"xF0x9Dx9Ax83" => "x74",
"xF0x9Dx9Ax84" => "x75",
"xF0x9Dx9Ax85" => "x76",
"xF0x9Dx9Ax86" => "x77",
"xF0x9Dx9Ax87" => "x78",
"xF0x9Dx9Ax88" => "x79",
"xF0x9Dx9Ax89" => "x7A",
"xF0x9Dx9AxA8" => "xCExB1",
"xF0x9Dx9AxA9" => "xCExB2",
"xF0x9Dx9AxAA" => "xCExB3",
"xF0x9Dx9AxAB" => "xCExB4",
"xF0x9Dx9AxAC" => "xCExB5",
"xF0x9Dx9AxAD" => "xCExB6",
"xF0x9Dx9AxAE" => "xCExB7",
"xF0x9Dx9AxAF" => "xCExB8",
"xF0x9Dx9AxB0" => "xCExB9",
"xF0x9Dx9AxB1" => "xCExBA",
"xF0x9Dx9AxB2" => "xCExBB",
"xF0x9Dx9AxB3" => "xCExBC",
"xF0x9Dx9AxB4" => "xCExBD",
"xF0x9Dx9AxB5" => "xCExBE",
"xF0x9Dx9AxB6" => "xCExBF",
"xF0x9Dx9AxB7" => "xCFx80",
"xF0x9Dx9AxB8" => "xCFx81",
"xF0x9Dx9AxB9" => "xCExB8",
"xF0x9Dx9AxBA" => "xCFx83",
"xF0x9Dx9AxBB" => "xCFx84",
"xF0x9Dx9AxBC" => "xCFx85",
"xF0x9Dx9AxBD" => "xCFx86",
"xF0x9Dx9AxBE" => "xCFx87",
"xF0x9Dx9AxBF" => "xCFx88",
"xF0x9Dx9Bx80" => "xCFx89",
"xF0x9Dx9Bx93" => "xCFx83",
"xF0x9Dx9BxA2" => "xCExB1",
"xF0x9Dx9BxA3" => "xCExB2",
"xF0x9Dx9BxA4" => "xCExB3",
"xF0x9Dx9BxA5" => "xCExB4",
"xF0x9Dx9BxA6" => "xCExB5",
"xF0x9Dx9BxA7" => "xCExB6",
"xF0x9Dx9BxA8" => "xCExB7",
"xF0x9Dx9BxA9" => "xCExB8",
"xF0x9Dx9BxAA" => "xCExB9",
"xF0x9Dx9BxAB" => "xCExBA",
"xF0x9Dx9BxAC" => "xCExBB",
"xF0x9Dx9BxAD" => "xCExBC",
"xF0x9Dx9BxAE" => "xCExBD",
"xF0x9Dx9BxAF" => "xCExBE",
"xF0x9Dx9BxB0" => "xCExBF",
"xF0x9Dx9BxB1" => "xCFx80",
"xF0x9Dx9BxB2" => "xCFx81",
"xF0x9Dx9BxB3" => "xCExB8",
"xF0x9Dx9BxB4" => "xCFx83",
"xF0x9Dx9BxB5" => "xCFx84",
"xF0x9Dx9BxB6" => "xCFx85",
"xF0x9Dx9BxB7" => "xCFx86",
"xF0x9Dx9BxB8" => "xCFx87",
"xF0x9Dx9BxB9" => "xCFx88",
"xF0x9Dx9BxBA" => "xCFx89",
"xF0x9Dx9Cx8D" => "xCFx83",
"xF0x9Dx9Cx9C" => "xCExB1",
"xF0x9Dx9Cx9D" => "xCExB2",
"xF0x9Dx9Cx9E" => "xCExB3",
"xF0x9Dx9Cx9F" => "xCExB4",
"xF0x9Dx9CxA0" => "xCExB5",
"xF0x9Dx9CxA1" => "xCExB6",
"xF0x9Dx9CxA2" => "xCExB7",
"xF0x9Dx9CxA3" => "xCExB8",
"xF0x9Dx9CxA4" => "xCExB9",
"xF0x9Dx9CxA5" => "xCExBA",
"xF0x9Dx9CxA6" => "xCExBB",
"xF0x9Dx9CxA7" => "xCExBC",
"xF0x9Dx9CxA8" => "xCExBD",
"xF0x9Dx9CxA9" => "xCExBE",
"xF0x9Dx9CxAA" => "xCExBF",
"xF0x9Dx9CxAB" => "xCFx80",
"xF0x9Dx9CxAC" => "xCFx81",
"xF0x9Dx9CxAD" => "xCExB8",
"xF0x9Dx9CxAE" => "xCFx83",
"xF0x9Dx9CxAF" => "xCFx84",
"xF0x9Dx9CxB0" => "xCFx85",
"xF0x9Dx9CxB1" => "xCFx86",
"xF0x9Dx9CxB2" => "xCFx87",
"xF0x9Dx9CxB3" => "xCFx88",
"xF0x9Dx9CxB4" => "xCFx89",
"xF0x9Dx9Dx87" => "xCFx83",
"xF0x9Dx9Dx96" => "xCExB1",
"xF0x9Dx9Dx97" => "xCExB2",
"xF0x9Dx9Dx98" => "xCExB3",
"xF0x9Dx9Dx99" => "xCExB4",
"xF0x9Dx9Dx9A" => "xCExB5",
"xF0x9Dx9Dx9B" => "xCExB6",
"xF0x9Dx9Dx9C" => "xCExB7",
"xF0x9Dx9Dx9D" => "xCExB8",
"xF0x9Dx9Dx9E" => "xCExB9",
"xF0x9Dx9Dx9F" => "xCExBA",
"xF0x9Dx9DxA0" => "xCExBB",
"xF0x9Dx9DxA1" => "xCExBC",
"xF0x9Dx9DxA2" => "xCExBD",
"xF0x9Dx9DxA3" => "xCExBE",
"xF0x9Dx9DxA4" => "xCExBF",
"xF0x9Dx9DxA5" => "xCFx80",
"xF0x9Dx9DxA6" => "xCFx81",
"xF0x9Dx9DxA7" => "xCExB8",
"xF0x9Dx9DxA8" => "xCFx83",
"xF0x9Dx9DxA9" => "xCFx84",
"xF0x9Dx9DxAA" => "xCFx85",
"xF0x9Dx9DxAB" => "xCFx86",
"xF0x9Dx9DxAC" => "xCFx87",
"xF0x9Dx9DxAD" => "xCFx88",
"xF0x9Dx9DxAE" => "xCFx89",
"xF0x9Dx9Ex81" => "xCFx83",
"xF0x9Dx9Ex90" => "xCExB1",
"xF0x9Dx9Ex91" => "xCExB2",
"xF0x9Dx9Ex92" => "xCExB3",
"xF0x9Dx9Ex93" => "xCExB4",
"xF0x9Dx9Ex94" => "xCExB5",
"xF0x9Dx9Ex95" => "xCExB6",
"xF0x9Dx9Ex96" => "xCExB7",
"xF0x9Dx9Ex97" => "xCExB8",
"xF0x9Dx9Ex98" => "xCExB9",
"xF0x9Dx9Ex99" => "xCExBA",
"xF0x9Dx9Ex9A" => "xCExBB",
"xF0x9Dx9Ex9B" => "xCExBC",
"xF0x9Dx9Ex9C" => "xCExBD",
"xF0x9Dx9Ex9D" => "xCExBE",
"xF0x9Dx9Ex9E" => "xCExBF",
"xF0x9Dx9Ex9F" => "xCFx80",
"xF0x9Dx9ExA0" => "xCFx81",
"xF0x9Dx9ExA1" => "xCExB8",
"xF0x9Dx9ExA2" => "xCFx83",
"xF0x9Dx9ExA3" => "xCFx84",
"xF0x9Dx9ExA4" => "xCFx85",
"xF0x9Dx9ExA5" => "xCFx86",
"xF0x9Dx9ExA6" => "xCFx87",
"xF0x9Dx9ExA7" => "xCFx88",
"xF0x9Dx9ExA8" => "xCFx89",
"xF0x9Dx9ExBB" => "xCFx83",
"xF0x9Dx9Fx8A" => "xCFx9D",
);
global $phpbb_root_path, $phpEx;
// do the case fold
$text = utf8_case_fold($text, $option);
if (!class_exists('utf_normalizer'))
{
global $phpbb_root_path, $phpEx;
include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
}
// convert to NFKC
utf_normalizer::nfkc($text);
// FC_NFKC_Closure, http://www.unicode.org/Public/5.0.0/ucd/DerivedNormalizationProps.txt
$text = strtr($text, $fc_nfkc_closure);
return $text;
}
/**
* Assume the input is NFC:
* Takes the input and does a "special" case fold. It does minor normalization as well.
*
* @param string $text text to be case folded
* @param string $option determines how we will fold the cases
* @return string case folded text
*/
function utf8_case_fold_nfc($text, $option = 'full')
{
static $uniarray = array();
static $ypogegrammeni = array(
"xCDxBA" => "x20xCDx85",
"xE1xBEx80" => "xE1xBCx80xCDx85",
"xE1xBEx81" => "xE1xBCx81xCDx85",
"xE1xBEx82" => "xE1xBCx82xCDx85",
"xE1xBEx83" => "xE1xBCx83xCDx85",
"xE1xBEx84" => "xE1xBCx84xCDx85",
"xE1xBEx85" => "xE1xBCx85xCDx85",
"xE1xBEx86" => "xE1xBCx86xCDx85",
"xE1xBEx87" => "xE1xBCx87xCDx85",
"xE1xBEx88" => "xE1xBCx88xCDx85",
"xE1xBEx89" => "xE1xBCx89xCDx85",
"xE1xBEx8A" => "xE1xBCx8AxCDx85",
"xE1xBEx8B" => "xE1xBCx8BxCDx85",
"xE1xBEx8C" => "xE1xBCx8CxCDx85",
"xE1xBEx8D" => "xE1xBCx8DxCDx85",
"xE1xBEx8E" => "xE1xBCx8ExCDx85",
"xE1xBEx8F" => "xE1xBCx8FxCDx85",
"xE1xBEx90" => "xE1xBCxA0xCDx85",
"xE1xBEx91" => "xE1xBCxA1xCDx85",
"xE1xBEx92" => "xE1xBCxA2xCDx85",
"xE1xBEx93" => "xE1xBCxA3xCDx85",
"xE1xBEx94" => "xE1xBCxA4xCDx85",
"xE1xBEx95" => "xE1xBCxA5xCDx85",
"xE1xBEx96" => "xE1xBCxA6xCDx85",
"xE1xBEx97" => "xE1xBCxA7xCDx85",
"xE1xBEx98" => "xE1xBCxA8xCDx85",
"xE1xBEx99" => "xE1xBCxA9xCDx85",
"xE1xBEx9A" => "xE1xBCxAAxCDx85",
"xE1xBEx9B" => "xE1xBCxABxCDx85",
"xE1xBEx9C" => "xE1xBCxACxCDx85",
"xE1xBEx9D" => "xE1xBCxADxCDx85",
"xE1xBEx9E" => "xE1xBCxAExCDx85",
"xE1xBEx9F" => "xE1xBCxAFxCDx85",
"xE1xBExA0" => "xE1xBDxA0xCDx85",
"xE1xBExA1" => "xE1xBDxA1xCDx85",
"xE1xBExA2" => "xE1xBDxA2xCDx85",
"xE1xBExA3" => "xE1xBDxA3xCDx85",
"xE1xBExA4" => "xE1xBDxA4xCDx85",
"xE1xBExA5" => "xE1xBDxA5xCDx85",
"xE1xBExA6" => "xE1xBDxA6xCDx85",
"xE1xBExA7" => "xE1xBDxA7xCDx85",
"xE1xBExA8" => "xE1xBDxA8xCDx85",
"xE1xBExA9" => "xE1xBDxA9xCDx85",
"xE1xBExAA" => "xE1xBDxAAxCDx85",
"xE1xBExAB" => "xE1xBDxABxCDx85",
"xE1xBExAC" => "xE1xBDxACxCDx85",
"xE1xBExAD" => "xE1xBDxADxCDx85",
"xE1xBExAE" => "xE1xBDxAExCDx85",
"xE1xBExAF" => "xE1xBDxAFxCDx85",
"xE1xBExB2" => "xE1xBDxB0xCDx85",
"xE1xBExB3" => "xCExB1xCDx85",
"xE1xBExB4" => "xCExACxCDx85",
"xE1xBExB7" => "xE1xBExB6xCDx85",
"xE1xBExBC" => "xCEx91xCDx85",
"xE1xBFx82" => "xE1xBDxB4xCDx85",
"xE1xBFx83" => "xCExB7xCDx85",
"xE1xBFx84" => "xCExAExCDx85",
"xE1xBFx87" => "xE1xBFx86xCDx85",
"xE1xBFx8C" => "xCEx97xCDx85",
"xE1xBFxB2" => "xE1xBDxBCxCDx85",
"xE1xBFxB3" => "xCFx89xCDx85",
"xE1xBFxB4" => "xCFx8ExCDx85",
"xE1xBFxB7" => "xE1xBFxB6xCDx85",
"xE1xBFxBC" => "xCExA9xCDx85",
);
global $phpbb_root_path, $phpEx;
// perform a small trick, avoid further normalization on composed points that contain U+0345 in their decomposition
$text = strtr($text, $ypogegrammeni);
// do the case fold
$text = utf8_case_fold($text, $option);
return $text;
}
if (extension_loaded('intl'))
{
/**
* wrapper around PHP's native normalizer from intl
* previously a PECL extension, included in the core since PHP 5.3.0
* http://php.net/manual/en/normalizer.normalize.php
*
* @param mixed $strings a string or an array of strings to normalize
* @return mixed the normalized content, preserving array keys if array given.
*/
function utf8_normalize_nfc($strings)
{
if (empty($strings))
{
return $strings;
}
if (!is_array($strings))
{
if (Normalizer::isNormalized($strings))
{
return $strings;
}
return (string) Normalizer::normalize($strings);
}
else
{
foreach ($strings as $key => $string)
{
if (is_array($string))
{
foreach ($string as $_key => $_string)
{
if (Normalizer::isNormalized($strings[$key][$_key]))
{
continue;
}
$strings[$key][$_key] = (string) Normalizer::normalize($strings[$key][$_key]);
}
}
else
{
if (Normalizer::isNormalized($strings[$key]))
{
continue;
}
$strings[$key] = (string) Normalizer::normalize($strings[$key]);
}
}
}
return $strings;
}
}
else
{
/**
* A wrapper function for the normalizer which takes care of including the class if
* required and modifies the passed strings to be in NFC (Normalization Form Composition).
*
* @param mixed $strings a string or an array of strings to normalize
* @return mixed the normalized content, preserving array keys if array given.
*/
function utf8_normalize_nfc($strings)
{
if (empty($strings))
{
return $strings;
}
if (!class_exists('utf_normalizer'))
{
global $phpbb_root_path, $phpEx;
include($phpbb_root_path . 'includes/utf/utf_normalizer.' . $phpEx);
}
if (!is_array($strings))
{
utf_normalizer::nfc($strings);
}
else if (is_array($strings))
{
foreach ($strings as $key => $string)
{
if (is_array($string))
{
foreach ($string as $_key => $_string)
{
utf_normalizer::nfc($strings[$key][$_key]);
}
}
else
{
utf_normalizer::nfc($strings[$key]);
}
}
}
return $strings;
}
}
/**
* This function is used to generate a "clean" version of a string.
* Clean means that it is a case insensitive form (case folding) and that it is normalized (NFC).
* Additionally a homographs of one character are transformed into one specific character (preferably ASCII
* if it is an ASCII character).
*
* Please be aware that if you change something within this function or within
* functions used here you need to rebuild/update the username_clean column in the users table. And all other
* columns that store a clean string otherwise you will break this functionality.
*
* @param string $text An unclean string, mabye user input (has to be valid UTF-8!)
* @return string Cleaned up version of the input string
*/
function utf8_clean_string($text)
{
global $phpbb_root_path, $phpEx;
static $homographs = array();
if (empty($homographs))
{
$homographs = include($phpbb_root_path . 'includes/utf/data/confusables.' . $phpEx);
}
$text = utf8_case_fold_nfkc($text);
$text = strtr($text, $homographs);
// Other control characters
$text = preg_replace('#(?:[x00-x1Fx7F]+|(?:xC2[x80-x9F])+)#', '', $text);
// we need to reduce multiple spaces to a single one
$text = preg_replace('# {2,}#', ' ', $text);
// we can use trim here as all the other space characters should have been turned
// into normal ASCII spaces by now
return trim($text);
}
/**
* A wrapper for htmlspecialchars($value, ENT_COMPAT, 'UTF-8')
*/
function utf8_htmlspecialchars($value)
{
return htmlspecialchars($value, ENT_COMPAT, 'UTF-8');
}
/**
* Trying to convert returned system message to utf8
*
* PHP assumes such messages are ISO-8859-1 so we'll do that too
* and if it breaks messages we'll blame it on them ;-)
*/
function utf8_convert_message($message)
{
// First of all check if conversion is neded at all, as there is no point
// in converting ASCII messages from ISO-8859-1 to UTF-8
if (!preg_match('/[x80-xFF]/', $message))
{
return utf8_htmlspecialchars($message);
}
// else we need to convert some part of the message
return utf8_htmlspecialchars(utf8_recode($message, 'ISO-8859-1'));
}
/**
* UTF8-compatible wordwrap replacement
*
* @param string $string The input string
* @param int $width The column width. Defaults to 75.
* @param string $break The line is broken using the optional break parameter. Defaults to 'n'.
* @param bool $cut If the cut is set to TRUE, the string is always wrapped at the specified width. So if you have a word that is larger than the given width, it is broken apart.
*
* @return string the given string wrapped at the specified column.
*
*/
function utf8_wordwrap($string, $width = 75, $break = "n", $cut = false)
{
// We first need to explode on $break, not destroying existing (intended) breaks
$lines = explode($break, $string);
$new_lines = array(0 => '');
$index = 0;
foreach ($lines as $line)
{
$words = explode(' ', $line);
for ($i = 0, $size = sizeof($words); $i < $size; $i++)
{
$word = $words[$i];
// If cut is true we need to cut the word if it is > width chars
if ($cut && utf8_strlen($word) > $width)
{
$words[$i] = utf8_substr($word, $width);
$word = utf8_substr($word, 0, $width);
$i--;
}
if (utf8_strlen($new_lines[$index] . $word) > $width)
{
$new_lines[$index] = substr($new_lines[$index], 0, -1);
$index++;
$new_lines[$index] = '';
}
$new_lines[$index] .= $word . ' ';
}
$new_lines[$index] = substr($new_lines[$index], 0, -1);
$index++;
$new_lines[$index] = '';
}
unset($new_lines[$index]);
return implode($break, $new_lines);
}
/**
* UTF8-safe basename() function
*
* basename() has some limitations and is dependent on the locale setting
* according to the PHP manual. Therefore we provide our own locale independent
* basename function.
*
* @param string $filename The filename basename() should be applied to
* @return string The basenamed filename
*/
function utf8_basename($filename)
{
// We always check for forward slash AND backward slash
// because they could be mixed or "sneaked" in. ;)
// You know, never trust user input...
if (strpos($filename, '/') !== false)
{
$filename = utf8_substr($filename, utf8_strrpos($filename, '/') + 1);
}
if (strpos($filename, '\') !== false)
{
$filename = utf8_substr($filename, utf8_strrpos($filename, '\') + 1);
}
return $filename;
}
/**
* UTF8-safe str_replace() function
*
* @param string $search The value to search for
* @param string $replace The replacement string
* @param string $subject The target string
* @return string The resultant string
*/
function utf8_str_replace($search, $replace, $subject)
{
if (!is_array($search))
{
$search = array($search);
if (is_array($replace))
{
$replace = (string) $replace;
trigger_error('Array to string conversion', E_USER_NOTICE);
}
}
$length = sizeof($search);
if (!is_array($replace))
{
$replace = array_fill(0, $length, $replace);
}
else
{
$replace = array_pad($replace, $length, '');
}
for ($i = 0; $i < $length; $i++)
{
$search_length = utf8_strlen($search[$i]);
$replace_length = utf8_strlen($replace[$i]);
$offset = 0;
while (($start = utf8_strpos($subject, $search[$i], $offset)) !== false)
{
$subject = utf8_substr($subject, 0, $start) . $replace[$i] . utf8_substr($subject, $start + $search_length);
$offset = $start + $replace_length;
}
}
return $subject;
}