Вход Регистрация
Файл: includes/library/aws/JmesPath/Lexer.php
Строк: 298
<?php
namespace JmesPath;

/**
 * Tokenizes JMESPath expressions
 */
class Lexer
{
    const 
T_DOT 'dot';
    const 
T_STAR 'star';
    const 
T_COMMA 'comma';
    const 
T_COLON 'colon';
    const 
T_CURRENT 'current';
    const 
T_EXPREF 'expref';
    const 
T_LPAREN 'lparen';
    const 
T_RPAREN 'rparen';
    const 
T_LBRACE 'lbrace';
    const 
T_RBRACE 'rbrace';
    const 
T_LBRACKET 'lbracket';
    const 
T_RBRACKET 'rbracket';
    const 
T_FLATTEN 'flatten';
    const 
T_IDENTIFIER 'identifier';
    const 
T_NUMBER 'number';
    const 
T_QUOTED_IDENTIFIER 'quoted_identifier';
    const 
T_UNKNOWN 'unknown';
    const 
T_PIPE 'pipe';
    const 
T_OR 'or';
    const 
T_FILTER 'filter';
    const 
T_LITERAL 'literal';
    const 
T_EOF 'eof';
    const 
T_COMPARATOR 'comparator';

    const 
STATE_IDENTIFIER 0;
    const 
STATE_NUMBER 1;
    const 
STATE_SINGLE_CHAR 2;
    const 
STATE_WHITESPACE 3;
    const 
STATE_STRING_LITERAL 4;
    const 
STATE_QUOTED_STRING 5;
    const 
STATE_JSON_LITERAL 6;
    const 
STATE_LBRACKET 7;
    const 
STATE_PIPE 8;
    const 
STATE_LT 9;
    const 
STATE_GT 10;
    const 
STATE_EQ 11;
    const 
STATE_NOT 12;

    
/** @var array We know what token we are consuming based on each char */
    
private static $transitionTable = [
        
'<'  => self::STATE_LT,
        
'>'  => self::STATE_GT,
        
'='  => self::STATE_EQ,
        
'!'  => self::STATE_NOT,
        
'['  => self::STATE_LBRACKET,
        
'|'  => self::STATE_PIPE,
        
'`'  => self::STATE_JSON_LITERAL,
        
'"'  => self::STATE_QUOTED_STRING,
        
"'"  => self::STATE_STRING_LITERAL,
        
'-'  => self::STATE_NUMBER,
        
'0'  => self::STATE_NUMBER,
        
'1'  => self::STATE_NUMBER,
        
'2'  => self::STATE_NUMBER,
        
'3'  => self::STATE_NUMBER,
        
'4'  => self::STATE_NUMBER,
        
'5'  => self::STATE_NUMBER,
        
'6'  => self::STATE_NUMBER,
        
'7'  => self::STATE_NUMBER,
        
'8'  => self::STATE_NUMBER,
        
'9'  => self::STATE_NUMBER,
        
' '  => self::STATE_WHITESPACE,
        
"t" => self::STATE_WHITESPACE,
        
"n" => self::STATE_WHITESPACE,
        
"r" => self::STATE_WHITESPACE,
        
'.'  => self::STATE_SINGLE_CHAR,
        
'*'  => self::STATE_SINGLE_CHAR,
        
']'  => self::STATE_SINGLE_CHAR,
        
','  => self::STATE_SINGLE_CHAR,
        
':'  => self::STATE_SINGLE_CHAR,
        
'@'  => self::STATE_SINGLE_CHAR,
        
'&'  => self::STATE_SINGLE_CHAR,
        
'('  => self::STATE_SINGLE_CHAR,
        
')'  => self::STATE_SINGLE_CHAR,
        
'{'  => self::STATE_SINGLE_CHAR,
        
'}'  => self::STATE_SINGLE_CHAR,
        
'_'  => self::STATE_IDENTIFIER,
        
'A'  => self::STATE_IDENTIFIER,
        
'B'  => self::STATE_IDENTIFIER,
        
'C'  => self::STATE_IDENTIFIER,
        
'D'  => self::STATE_IDENTIFIER,
        
'E'  => self::STATE_IDENTIFIER,
        
'F'  => self::STATE_IDENTIFIER,
        
'G'  => self::STATE_IDENTIFIER,
        
'H'  => self::STATE_IDENTIFIER,
        
'I'  => self::STATE_IDENTIFIER,
        
'J'  => self::STATE_IDENTIFIER,
        
'K'  => self::STATE_IDENTIFIER,
        
'L'  => self::STATE_IDENTIFIER,
        
'M'  => self::STATE_IDENTIFIER,
        
'N'  => self::STATE_IDENTIFIER,
        
'O'  => self::STATE_IDENTIFIER,
        
'P'  => self::STATE_IDENTIFIER,
        
'Q'  => self::STATE_IDENTIFIER,
        
'R'  => self::STATE_IDENTIFIER,
        
'S'  => self::STATE_IDENTIFIER,
        
'T'  => self::STATE_IDENTIFIER,
        
'U'  => self::STATE_IDENTIFIER,
        
'V'  => self::STATE_IDENTIFIER,
        
'W'  => self::STATE_IDENTIFIER,
        
'X'  => self::STATE_IDENTIFIER,
        
'Y'  => self::STATE_IDENTIFIER,
        
'Z'  => self::STATE_IDENTIFIER,
        
'a'  => self::STATE_IDENTIFIER,
        
'b'  => self::STATE_IDENTIFIER,
        
'c'  => self::STATE_IDENTIFIER,
        
'd'  => self::STATE_IDENTIFIER,
        
'e'  => self::STATE_IDENTIFIER,
        
'f'  => self::STATE_IDENTIFIER,
        
'g'  => self::STATE_IDENTIFIER,
        
'h'  => self::STATE_IDENTIFIER,
        
'i'  => self::STATE_IDENTIFIER,
        
'j'  => self::STATE_IDENTIFIER,
        
'k'  => self::STATE_IDENTIFIER,
        
'l'  => self::STATE_IDENTIFIER,
        
'm'  => self::STATE_IDENTIFIER,
        
'n'  => self::STATE_IDENTIFIER,
        
'o'  => self::STATE_IDENTIFIER,
        
'p'  => self::STATE_IDENTIFIER,
        
'q'  => self::STATE_IDENTIFIER,
        
'r'  => self::STATE_IDENTIFIER,
        
's'  => self::STATE_IDENTIFIER,
        
't'  => self::STATE_IDENTIFIER,
        
'u'  => self::STATE_IDENTIFIER,
        
'v'  => self::STATE_IDENTIFIER,
        
'w'  => self::STATE_IDENTIFIER,
        
'x'  => self::STATE_IDENTIFIER,
        
'y'  => self::STATE_IDENTIFIER,
        
'z'  => self::STATE_IDENTIFIER,
    ];

    
/** @var array Valid identifier characters after first character */
    
private $validIdentifier = [
        
'A' => true'B' => true'C' => true'D' => true'E' => true,
        
'F' => true'G' => true'H' => true'I' => true'J' => true,
        
'K' => true'L' => true'M' => true'N' => true'O' => true,
        
'P' => true'Q' => true'R' => true'S' => true'T' => true,
        
'U' => true'V' => true'W' => true'X' => true'Y' => true,
        
'Z' => true'a' => true'b' => true'c' => true'd' => true,
        
'e' => true'f' => true'g' => true'h' => true'i' => true,
        
'j' => true'k' => true'l' => true'm' => true'n' => true,
        
'o' => true'p' => true'q' => true'r' => true's' => true,
        
't' => true'u' => true'v' => true'w' => true'x' => true,
        
'y' => true'z' => true'_' => true'0' => true'1' => true,
        
'2' => true'3' => true'4' => true'5' => true'6' => true,
        
'7' => true'8' => true'9' => true,
    ];

    
/** @var array Valid number characters after the first character */
    
private $numbers = [
        
'0' => true'1' => true'2' => true'3' => true'4' => true,
        
'5' => true'6' => true'7' => true'8' => true'9' => true
    
];

    
/** @var array Map of simple single character tokens */
    
private $simpleTokens = [
        
'.' => self::T_DOT,
        
'*' => self::T_STAR,
        
']' => self::T_RBRACKET,
        
',' => self::T_COMMA,
        
':' => self::T_COLON,
        
'@' => self::T_CURRENT,
        
'&' => self::T_EXPREF,
        
'(' => self::T_LPAREN,
        
')' => self::T_RPAREN,
        
'{' => self::T_LBRACE,
        
'}' => self::T_RBRACE,
    ];

    
/**
     * Tokenize the JMESPath expression into an array of tokens hashes that
     * contain a 'type', 'value', and 'key'.
     *
     * @param string $input JMESPath input
     *
     * @return array
     * @throws SyntaxErrorException
     */
    
public function tokenize($input)
    {
        
$tokens = [];

        if (
$input === '') {
            goto 
eof;
        }

        
$chars str_split($input);

        while (
false !== ($current current($chars))) {

            
// Every character must be in the transition character table.
            
if (!isset(self::$transitionTable[$current])) {
                
$tokens[] = [
                    
'type'  => self::T_UNKNOWN,
                    
'pos'   => key($chars),
                    
'value' => $current
                
];
                
next($chars);
                continue;
            }

            
$state self::$transitionTable[$current];

            if (
$state === self::STATE_SINGLE_CHAR) {

                
// Consume simple tokens like ".", ",", "@", etc.
                
$tokens[] = [
                    
'type'  => $this->simpleTokens[$current],
                    
'pos'   => key($chars),
                    
'value' => $current
                
];
                
next($chars);

            } elseif (
$state === self::STATE_IDENTIFIER) {

                
// Consume identifiers
                
$start key($chars);
                
$buffer '';
                do {
                    
$buffer .= $current;
                    
$current next($chars);
                } while (
$current !== false && isset($this->validIdentifier[$current]));
                
$tokens[] = [
                    
'type'  => self::T_IDENTIFIER,
                    
'value' => $buffer,
                    
'pos'   => $start
                
];

            } elseif (
$state === self::STATE_WHITESPACE) {

                
// Skip whitespace
                
next($chars);

            } elseif (
$state === self::STATE_LBRACKET) {

                
// Consume "[", "[?", and "[]"
                
$position key($chars);
                
$actual next($chars);
                if (
$actual === ']') {
                    
next($chars);
                    
$tokens[] = [
                        
'type'  => self::T_FLATTEN,
                        
'pos'   => $position,
                        
'value' => '[]'
                    
];
                } elseif (
$actual === '?') {
                    
next($chars);
                    
$tokens[] = [
                        
'type'  => self::T_FILTER,
                        
'pos'   => $position,
                        
'value' => '[?'
                    
];
                } else {
                    
$tokens[] = [
                        
'type'  => self::T_LBRACKET,
                        
'pos'   => $position,
                        
'value' => '['
                    
];
                }

            } elseif (
$state === self::STATE_STRING_LITERAL) {

                
// Consume raw string literals
                
$tokens[] = $this->inside($chars"'"self::T_LITERAL);

            } elseif (
$state === self::STATE_PIPE) {

                
// Consume pipe and OR
                
$tokens[] = $this->matchOr($chars'|''|'self::T_ORself::T_PIPE);

            } elseif (
$state == self::STATE_JSON_LITERAL) {

                
// Consume JSON literals
                
$token $this->inside($chars'`'self::T_LITERAL);
                if (
$token['type'] === self::T_LITERAL) {
                    
$token['value'] = str_replace('\`''`'$token['value']);
                    
$token $this->parseJson($token);
                }
                
$tokens[] = $token;

            } elseif (
$state == self::STATE_NUMBER) {

                
// Consume numbers
                
$start key($chars);
                
$buffer '';
                do {
                    
$buffer .= $current;
                    
$current next($chars);
                } while (
$current !== false && isset($this->numbers[$current]));
                
$tokens[] = [
                    
'type'  => self::T_NUMBER,
                    
'value' => (int)$buffer,
                    
'pos'   => $start
                
];

            } elseif (
$state === self::STATE_QUOTED_STRING) {

                
// Consume quoted identifiers
                
$token $this->inside($chars'"'self::T_QUOTED_IDENTIFIER);
                if (
$token['type'] === self::T_QUOTED_IDENTIFIER) {
                    
$token['value'] = '"' $token['value'] . '"';
                    
$token $this->parseJson($token);
                }
                
$tokens[] = $token;

            } elseif (
$state === self::STATE_EQ) {

                
// Consume equals
                
$tokens[] = $this->matchOr($chars'=''='self::T_COMPARATORself::T_UNKNOWN);

            } elseif (
$state === self::STATE_NOT) {

                
// Consume not equal
                
$tokens[] = $this->matchOr($chars'!''='self::T_COMPARATORself::T_UNKNOWN);

            } else {

                
// either '<' or '>'
                // Consume less than and greater than
                
$tokens[] = $this->matchOr($chars$current'='self::T_COMPARATORself::T_COMPARATOR);

            }
        }

        
eof:
        
$tokens[] = [
            
'type'  => self::T_EOF,
            
'pos'   => strlen($input),
            
'value' => null
        
];

        return 
$tokens;
    }

    
/**
     * Returns a token based on whether or not the next token matches the
     * expected value. If it does, a token of "$type" is returned. Otherwise,
     * a token of "$orElse" type is returned.
     *
     * @param array  $chars    Array of characters by reference.
     * @param string $current  The current character.
     * @param string $expected Expected character.
     * @param string $type     Expected result type.
     * @param string $orElse   Otherwise return a token of this type.
     *
     * @return array Returns a conditional token.
     */
    
private function matchOr(array &$chars$current$expected$type$orElse)
    {
        if (
next($chars) === $expected) {
            
next($chars);
            return [
                
'type'  => $type,
                
'pos'   => key($chars) - 1,
                
'value' => $current $expected
            
];
        }

        return [
            
'type'  => $orElse,
            
'pos'   => key($chars) - 1,
            
'value' => $current
        
];
    }

    
/**
     * Returns a token the is the result of consuming inside of delimiter
     * characters. Escaped delimiters will be adjusted before returning a
     * value. If the token is not closed, "unknown" is returned.
     *
     * @param array  $chars Array of characters by reference.
     * @param string $delim The delimiter character.
     * @param string $type  Token type.
     *
     * @return array Returns the consumed token.
     */
    
private function inside(array &$chars$delim$type)
    {
        
$position key($chars);
        
$current next($chars);
        
$buffer '';

        while (
$current !== $delim) {
            if (
$current === '\') {
                $buffer .= '
\';
                $current = next($chars);
            }
            if ($current === false) {
                // Unclosed delimiter
                return [
                    '
type'  => self::T_UNKNOWN,
                    '
value' => $buffer,
                    '
pos'   => $position
                ];
            }
            $buffer .= $current;
            $current = next($chars);
        }

        next($chars);

        return ['
type' => $type, 'value' => $buffer, 'pos' => $position];
    }

    /**
     * Parses a JSON token or sets the token type to "unknown" on error.
     *
     * @param array $token Token that needs parsing.
     *
     * @return array Returns a token with a parsed value.
     */
    private function parseJson(array $token)
    {
        $value = json_decode($token['
value'], true);

        if ($error = json_last_error()) {
            // Legacy support for elided quotes. Try to parse again by adding
            // quotes around the bad input value.
            $value = json_decode('"' . 
$token['value'] . '"', true);
            if ($error = json_last_error()) {
                $token['
type'] = self::T_UNKNOWN;
                return $token;
            }
        }

        $token['
value'] = $value;
        return $token;
    }
}
Онлайн: 0
Реклама