Вход Регистрация
Файл: concrete5.7.5.6/concrete/vendor/patchwork/utf8/class/Patchwork/PHP/Shim/Normalizer.php
Строк: 119
<?php // vi: set fenc=utf-8 ts=4 sw=4 et:
/*
 * Copyright (C) 2013 Nicolas Grekas - p@tchwork.com
 *
 * This library is free software; you can redistribute it and/or modify it
 * under the terms of the (at your option):
 * Apache License v2.0 (http://apache.org/licenses/LICENSE-2.0.txt), or
 * GNU General Public License v2.0 (http://gnu.org/licenses/gpl-2.0.txt).
 */

namespace PatchworkPHPShim;

/**
 * Normalizer is a PHP fallback implementation of the Normalizer class provided by the intl extension.
 *
 * It has been validated with Unicode 6.3 Normalization Conformance Test.
 * See http://www.unicode.org/reports/tr15/ for detailed info about Unicode normalizations.
 */
class Normalizer
{
    const

    
NONE 1,
    
FORM_D  2NFD  2,
    
FORM_KD 3NFKD 3,
    
FORM_C  4NFC  4,
    
FORM_KC 5NFKC 5;


    protected static

    
$C$D$KD$cC,
    
$ulen_mask = array("xC0" => 2"xD0" => 2"xE0" => 3"xF0" => 4),
    
$ASCII "x20x65x69x61x73x6Ex74x72x6Fx6Cx75x64x5Dx5Bx63x6Dx70x27x0Ax67x7Cx68x76x2Ex66x62x2Cx3Ax3Dx2Dx71x31x30x43x32x2Ax79x78x29x28x4Cx39x41x53x2Fx50x22x45x6Ax4Dx49x6Bx33x3Ex35x54x3Cx44x34x7Dx42x7Bx38x46x77x52x36x37x55x47x4Ex3Bx4Ax7Ax56x23x48x4Fx57x5Fx26x21x4Bx3Fx58x51x25x59x5Cx09x5Ax2Bx7Ex5Ex24x40x60x7Fx00x01x02x03x04x05x06x07x08x0Bx0Cx0Dx0Ex0Fx10x11x12x13x14x15x16x17x18x19x1Ax1Bx1Cx1Dx1Ex1F";


    static function 
isNormalized($s$form self::NFC)
    {
        if (
strspn($s .= ''self::$ASCII) === strlen($s)) return true;
        if (
self::NFC === $form && preg_match('//u'$s) && !preg_match('/[^x00-x{2FF}]/u'$s)) return true;
        return 
false// Pretend false as quick checks implementented in PHP won't be so quick
    
}

    static function 
normalize($s$form self::NFC)
    {
        if (!
preg_match('//u'$s .= '')) return false;

        switch (
$form)
        {
        case 
self::NONE: return $s;
        case 
self::NFC:  $C true;  $K false; break;
        case 
self::NFD:  $C false$K false; break;
        case 
self::NFKC$C true;  $K true;  break;
        case 
self::NFKD$C false$K true;  break;
        default: return 
false;
        }

        if (
'' === $s) return '';

        if (
$K && empty(self::$KD)) self::$KD = static::getData('compatibilityDecomposition');

        if (empty(
self::$D))
        {
            
self::$D = static::getData('canonicalDecomposition');
            
self::$cC = static::getData('combiningClass');
        }

        if (
$C)
        {
            if (empty(
self::$C)) self::$C = static::getData('canonicalComposition');
            return 
self::recompose(self::decompose($s$K));
        }
        else return 
self::decompose($s$K);
    }

    protected static function 
recompose($s)
    {
        
$ASCII self::$ASCII;
        
$compMap self::$C;
        
$combClass self::$cC;
        
$ulen_mask self::$ulen_mask;

        
$result $tail '';

        
$i $s[0] < "x80" $ulen_mask[$s[0] & "xF0"];
        
$len strlen($s);

        
$last_uchr substr($s0$i);
        
$last_ucls = isset($combClass[$last_uchr]) ? 256 0;

        while (
$i $len)
        {
            if (
$s[$i] < "x80")
            {
                
// ASCII chars

                
if ($tail)
                {
                    
$last_uchr .= $tail;
                    
$tail '';
                }

                if (
$j strspn($s$ASCII$i+1))
                {
                    
$last_uchr .= substr($s$i$j);
                    
$i += $j;
                }

                
$result .= $last_uchr;
                
$last_uchr $s[$i];
                ++
$i;
            }
            else
            {
                
$ulen $ulen_mask[$s[$i] & "xF0"];
                
$uchr substr($s$i$ulen);

                if (
$last_uchr "xE1x84x80" || "xE1x84x92" $last_uchr
                    
||   $uchr "xE1x85xA1" || "xE1x85xB5" $uchr
                    
|| $last_ucls)
                {
                    
// Table lookup and combining chars composition

                    
$ucls = isset($combClass[$uchr]) ? $combClass[$uchr] : 0;

                    if (isset(
$compMap[$last_uchr $uchr]) && (!$last_ucls || $last_ucls $ucls))
                    {
                        
$last_uchr $compMap[$last_uchr $uchr];
                    }
                    else if (
$last_ucls $ucls$tail .= $uchr;
                    else
                    {
                        if (
$tail)
                        {
                            
$last_uchr .= $tail;
                            
$tail '';
                        }

                        
$result .= $last_uchr;
                        
$last_uchr $uchr;
                    }
                }
                else
                {
                    
// Hangul chars

                    
$L ord($last_uchr[2]) - 0x80;
                    
$V ord($uchr[2]) - 0xA1;
                    
$T 0;

                    
$uchr substr($s$i $ulen3);

                    if (
"xE1x86xA7" <= $uchr && $uchr <= "xE1x87x82")
                    {
                        
$T ord($uchr[2]) - 0xA7;
                        
$T && $T += 0x40;
                        
$ulen += 3;
                    }

                    
$L 0xAC00 + ($L 21 $V) * 28 $T;
                    
$last_uchr chr(0xE0 $L>>12) . chr(0x80 $L>>0x3F) . chr(0x80 $L 0x3F);
                }

                
$i += $ulen;
            }
        }

        return 
$result $last_uchr $tail;
    }

    protected static function 
decompose($s$c)
    {
        
$result '';

        
$ASCII self::$ASCII;
        
$decompMap self::$D;
        
$combClass self::$cC;
        
$ulen_mask self::$ulen_mask;
        if (
$c$compatMap self::$KD;

        
$c = array();
        
$i 0;
        
$len strlen($s);

        while (
$i $len)
        {
            if (
$s[$i] < "x80")
            {
                
// ASCII chars

                
if ($c)
                {
                    
ksort($c);
                    
$result .= implode(''$c);
                    
$c = array();
                }

                
$j strspn($s$ASCII$i+1);
                
$result .= substr($s$i$j);
                
$i += $j;
            }
            else
            {
                
$ulen $ulen_mask[$s[$i] & "xF0"];
                
$uchr substr($s$i$ulen);
                
$i += $ulen;

                if (isset(
$combClass[$uchr]))
                {
                    
// Combining chars, for sorting

                    
isset($c[$combClass[$uchr]]) || $c[$combClass[$uchr]] = '';
                    
$c[$combClass[$uchr]] .= isset($compatMap[$uchr]) ? $compatMap[$uchr] : (isset($decompMap[$uchr]) ? $decompMap[$uchr] : $uchr);
                }
                else
                {
                    if (
$c)
                    {
                        
ksort($c);
                        
$result .= implode(''$c);
                        
$c = array();
                    }

                    if (
$uchr "xEAxB0x80" || "xEDx9ExA3" $uchr)
                    {
                        
// Table lookup

                        
$j = isset($compatMap[$uchr]) ? $compatMap[$uchr] : (isset($decompMap[$uchr]) ? $decompMap[$uchr] : $uchr);

                        if (
$uchr != $j)
                        {
                            
$uchr $j;

                            
$j strlen($uchr);
                            
$ulen $uchr[0] < "x80" $ulen_mask[$uchr[0] & "xF0"];

                            if (
$ulen != $j)
                            {
                                
// Put trailing chars in $s

                                
$j -= $ulen;
                                
$i -= $j;

                                if (
$i)
                                {
                                    
$s str_repeat(' ', -$i) . $s;
                                    
$len -= $i;
                                    
$i 0;
                                }

                                while (
$j--) $s[$i+$j] = $uchr[$ulen+$j];

                                
$uchr substr($uchr0$ulen);
                            }
                        }
                    }
                    else
                    {
                        
// Hangul chars

                        
$uchr unpack('C*'$uchr);
                        
$j = (($uchr[1]-224) << 12) + (($uchr[2]-128) << 6) + $uchr[3] - 0xAC80;

                        
$uchr "xE1x84" chr(0x80 + (int)  ($j 588))
                              . 
"xE1x85" chr(0xA1 + (int) (($j 588) / 28));

                        if (
$j %= 28)
                        {
                            
$uchr .= $j 25
                                
? ("xE1x86" chr(0xA7 $j))
                                : (
"xE1x87" chr(0x67 $j));
                        }
                    }

                    
$result .= $uchr;
                }
            }
        }

        if (
$c)
        {
            
ksort($c);
            
$result .= implode(''$c);
        }

        return 
$result;
    }

    protected static function 
getData($file)
    {
        
$file __DIR__ '/unidata/' $file '.ser';
        if (
file_exists($file)) return unserialize(file_get_contents($file));
        else return 
false;
    }
}
Онлайн: 2
Реклама