httpie-cli/httpie/cli/nested_json/parse.py

194 lines
5.3 KiB
Python
Raw Normal View History

from typing import Iterator
from .errors import NestedJSONSyntaxError
from .tokens import (
EMPTY_STRING,
BACKSLASH,
TokenKind,
OPERATORS,
SPECIAL_CHARS,
LITERAL_TOKENS,
Token,
PathAction,
Path,
)
__all__ = [
'parse',
'assert_cant_happen',
]
def parse(source: str) -> Iterator[Path]:
"""
start: root_path path*
root_path: (literal | index_path | append_path)
literal: TEXT | NUMBER
path:
key_path
| index_path
| append_path
key_path: LEFT_BRACKET TEXT RIGHT_BRACKET
index_path: LEFT_BRACKET NUMBER RIGHT_BRACKET
append_path: LEFT_BRACKET RIGHT_BRACKET
"""
tokens = list(tokenize(source))
cursor = 0
def can_advance():
return cursor < len(tokens)
# noinspection PyShadowingNames
def expect(*kinds):
nonlocal cursor
assert kinds
if can_advance():
token = tokens[cursor]
cursor += 1
if token.kind in kinds:
return token
elif tokens:
token = tokens[-1]._replace(
start=tokens[-1].end + 0,
end=tokens[-1].end + 1,
)
else:
token = None
if len(kinds) == 1:
suffix = kinds[0].to_name()
else:
suffix = ', '.join(kind.to_name() for kind in kinds[:-1])
suffix += ' or ' + kinds[-1].to_name()
message = f'Expecting {suffix}'
raise NestedJSONSyntaxError(source, token, message)
# noinspection PyShadowingNames
def parse_root():
tokens = []
if not can_advance():
return Path(
kind=PathAction.KEY,
accessor=EMPTY_STRING,
is_root=True
)
# (literal | index_path | append_path)?
token = expect(*LITERAL_TOKENS, TokenKind.LEFT_BRACKET)
tokens.append(token)
if token.kind in LITERAL_TOKENS:
action = PathAction.KEY
value = str(token.value)
elif token.kind is TokenKind.LEFT_BRACKET:
token = expect(TokenKind.NUMBER, TokenKind.RIGHT_BRACKET)
tokens.append(token)
if token.kind is TokenKind.NUMBER:
action = PathAction.INDEX
value = token.value
tokens.append(expect(TokenKind.RIGHT_BRACKET))
elif token.kind is TokenKind.RIGHT_BRACKET:
action = PathAction.APPEND
value = None
else:
assert_cant_happen()
else:
assert_cant_happen()
# noinspection PyUnboundLocalVariable
return Path(
kind=action,
accessor=value,
tokens=tokens,
is_root=True
)
yield parse_root()
# path*
while can_advance():
path_tokens = [expect(TokenKind.LEFT_BRACKET)]
token = expect(TokenKind.TEXT, TokenKind.NUMBER, TokenKind.RIGHT_BRACKET)
path_tokens.append(token)
if token.kind is TokenKind.RIGHT_BRACKET:
path = Path(PathAction.APPEND, tokens=path_tokens)
elif token.kind is TokenKind.TEXT:
path = Path(PathAction.KEY, token.value, tokens=path_tokens)
path_tokens.append(expect(TokenKind.RIGHT_BRACKET))
elif token.kind is TokenKind.NUMBER:
path = Path(PathAction.INDEX, token.value, tokens=path_tokens)
path_tokens.append(expect(TokenKind.RIGHT_BRACKET))
else:
assert_cant_happen()
# noinspection PyUnboundLocalVariable
yield path
def tokenize(source: str) -> Iterator[Token]:
cursor = 0
backslashes = 0
buffer = []
def send_buffer() -> Iterator[Token]:
nonlocal backslashes
if not buffer:
return None
value = ''.join(buffer)
kind = TokenKind.TEXT
if not backslashes:
for variation, kind in [
(int, TokenKind.NUMBER),
(check_escaped_int, TokenKind.TEXT),
]:
try:
value = variation(value)
except ValueError:
continue
else:
break
yield Token(
kind=kind,
value=value,
start=cursor - (len(buffer) + backslashes),
end=cursor,
)
buffer.clear()
backslashes = 0
def can_advance() -> bool:
return cursor < len(source)
while can_advance():
index = source[cursor]
if index in OPERATORS:
yield from send_buffer()
yield Token(OPERATORS[index], index, cursor, cursor + 1)
elif index == BACKSLASH and can_advance():
if source[cursor + 1] in SPECIAL_CHARS:
backslashes += 1
else:
buffer.append(index)
buffer.append(source[cursor + 1])
cursor += 1
else:
buffer.append(index)
cursor += 1
yield from send_buffer()
def check_escaped_int(value: str) -> str:
if not value.startswith(BACKSLASH):
raise ValueError('Not an escaped int')
try:
int(value[1:])
except ValueError as exc:
raise ValueError('Not an escaped int') from exc
else:
return value[1:]
def assert_cant_happen():
raise ValueError('Unexpected value')