Coverage for /usr/lib/python3/dist-packages/yaml/scanner.py: 11%

100

101

102

103

# Scanner produces tokens of the following types:

# STREAM-START

# STREAM-END

# DIRECTIVE(name, value)

# DOCUMENT-START

# DOCUMENT-END

# BLOCK-SEQUENCE-START

# BLOCK-MAPPING-START

# BLOCK-END

# FLOW-SEQUENCE-START

# FLOW-MAPPING-START

# FLOW-SEQUENCE-END

# FLOW-MAPPING-END

# BLOCK-ENTRY

# FLOW-ENTRY

# KEY

# VALUE

# ALIAS(value)

# ANCHOR(value)

# TAG(value)

# SCALAR(value, plain, style)

# Read comments in the Scanner code for more details.

__all__ = ['Scanner', 'ScannerError']

from .error import MarkedYAMLError

from .tokens import *

class ScannerError(MarkedYAMLError):

pass

class SimpleKey:

# See below simple keys treatment.

def __init__(self, token_number, required, index, line, column, mark):

self.token_number = token_number

self.required = required

self.index = index

self.line = line

self.column = column

self.mark = mark

class Scanner:

def __init__(self):

"""Initialize the scanner."""

# It is assumed that Scanner and Reader will have a common descendant.

# Reader do the dirty work of checking for BOM and converting the

# input data to Unicode. It also adds NUL to the end.

# Reader supports the following methods

# self.peek(i=0) # peek the next i-th character

# self.prefix(l=1) # peek the next l characters

# self.forward(l=1) # read the next l characters and move the pointer.

# Had we reached the end of the stream?

self.done = False

# The number of unclosed '{' and '['. `flow_level == 0` means block

# context.

self.flow_level = 0

# List of processed tokens that are not yet emitted.

self.tokens = []

# Add the STREAM-START token.

self.fetch_stream_start()

# Number of tokens that were emitted through the `get_token` method.

self.tokens_taken = 0

# The current indentation level.

self.indent = -1

# Past indentation levels.

self.indents = []

# Variables related to simple keys treatment.

# A simple key is a key that is not denoted by the '?' indicator.

# Example of simple keys:

# ---

# block simple key: value

# ? not a simple key:

# : { flow simple key: value }

# We emit the KEY token before all keys, so when we find a potential

# simple key, we try to locate the corresponding ':' indicator.

# Simple keys should be limited to a single line and 1024 characters.

# Can a simple key start at the current position? A simple key may

# start:

# - at the beginning of the line, not counting indentation spaces

# (in block context),

# - after '{', '[', ',' (in the flow context),

# - after '?', ':', '-' (in the block context).

# In the block context, this flag also signifies if a block collection

# may start at the current position.

self.allow_simple_key = True

# Keep track of possible simple keys. This is a dictionary. The key

# is `flow_level`; there can be no more that one possible simple key

# for each level. The value is a SimpleKey record:

# (token_number, required, index, line, column, mark)

# A simple key may start with ALIAS, ANCHOR, TAG, SCALAR(flow),

# '[', or '{' tokens.

self.possible_simple_keys = {}

# Public methods.

def check_token(self, *choices):

# Check if the next token is one of the given types.

while self.need_more_tokens():

self.fetch_more_tokens()

if self.tokens:

if not choices:

return True

for choice in choices:

if isinstance(self.tokens[0], choice):

return True

return False

def peek_token(self):

# Return the next token, but do not delete if from the queue.

while self.need_more_tokens():

self.fetch_more_tokens()

if self.tokens:

return self.tokens[0]

def get_token(self):

# Return the next token.

while self.need_more_tokens():

self.fetch_more_tokens()

if self.tokens:

self.tokens_taken += 1

return self.tokens.pop(0)

# Private methods.

def need_more_tokens(self):

if self.done:

return False

if not self.tokens:

return True

# The current token may be a potential simple key, so we

# need to look further.

self.stale_possible_simple_keys()

if self.next_possible_simple_key() == self.tokens_taken:

return True

def fetch_more_tokens(self):

# Eat whitespaces and comments until we reach the next token.

self.scan_to_next_token()

# Remove obsolete possible simple keys.

self.stale_possible_simple_keys()

# Compare the current indentation and column. It may add some tokens

# and decrease the current indentation level.

self.unwind_indent(self.column)

# Peek the next character.

ch = self.peek()

# Is it the end of stream?

if ch == '\0':

return self.fetch_stream_end()

# Is it a directive?

if ch == '%' and self.check_directive():

return self.fetch_directive()

# Is it the document start?

if ch == '-' and self.check_document_start():

return self.fetch_document_start()

# Is it the document end?

if ch == '.' and self.check_document_end():

return self.fetch_document_end()

# TODO: support for BOM within a stream.

#if ch == '\uFEFF':

# return self.fetch_bom() <-- issue BOMToken

# Note: the order of the following checks is NOT significant.

# Is it the flow sequence start indicator?

if ch == '[':

return self.fetch_flow_sequence_start()

# Is it the flow mapping start indicator?

if ch == '{':

return self.fetch_flow_mapping_start()

# Is it the flow sequence end indicator?

if ch == ']':

return self.fetch_flow_sequence_end()

# Is it the flow mapping end indicator?

if ch == '}':

return self.fetch_flow_mapping_end()

# Is it the flow entry indicator?

if ch == ',':

return self.fetch_flow_entry()

# Is it the block entry indicator?

if ch == '-' and self.check_block_entry():

return self.fetch_block_entry()

# Is it the key indicator?

if ch == '?' and self.check_key():

return self.fetch_key()

# Is it the value indicator?

if ch == ':' and self.check_value():

return self.fetch_value()

# Is it an alias?

if ch == '*':

return self.fetch_alias()

# Is it an anchor?

if ch == '&':

return self.fetch_anchor()

# Is it a tag?

if ch == '!':

return self.fetch_tag()

# Is it a literal scalar?

if ch == '|' and not self.flow_level:

return self.fetch_literal()

# Is it a folded scalar?

if ch == '>' and not self.flow_level:

return self.fetch_folded()

# Is it a single quoted scalar?

if ch == '\'':

return self.fetch_single()

# Is it a double quoted scalar?

if ch == '\"':

return self.fetch_double()

# It must be a plain scalar then.

if self.check_plain():

return self.fetch_plain()

# No? It's an error. Let's produce a nice error message.

raise ScannerError("while scanning for the next token", None,

"found character %r that cannot start any token" % ch,

self.get_mark())

# Simple keys treatment.

def next_possible_simple_key(self):

# Return the number of the nearest possible simple key. Actually we

# don't need to loop through the whole dictionary. We may replace it

# with the following code:

# if not self.possible_simple_keys:

# return None

# return self.possible_simple_keys[

# min(self.possible_simple_keys.keys())].token_number

min_token_number = None

for level in self.possible_simple_keys:

key = self.possible_simple_keys[level]

if min_token_number is None or key.token_number < min_token_number:

min_token_number = key.token_number

return min_token_number

def stale_possible_simple_keys(self):

# Remove entries that are no longer possible simple keys. According to

# the YAML specification, simple keys

# - should be limited to a single line,

# - should be no longer than 1024 characters.

# Disabling this procedure will allow simple keys of any length and

# height (may cause problems if indentation is broken though).

for level in list(self.possible_simple_keys):

key = self.possible_simple_keys[level]

if key.line != self.line \

or self.index-key.index > 1024:

if key.required:

raise ScannerError("while scanning a simple key", key.mark,

"could not find expected ':'", self.get_mark())

del self.possible_simple_keys[level]

def save_possible_simple_key(self):

# The next token may start a simple key. We check if it's possible

# and save its position. This function is called for

# ALIAS, ANCHOR, TAG, SCALAR(flow), '[', and '{'.

# Check if a simple key is required at the current position.

required = not self.flow_level and self.indent == self.column

# The next token might be a simple key. Let's save it's number and

# position.

if self.allow_simple_key:

self.remove_possible_simple_key()

token_number = self.tokens_taken+len(self.tokens)

key = SimpleKey(token_number, required,

self.index, self.line, self.column, self.get_mark())

self.possible_simple_keys[self.flow_level] = key

def remove_possible_simple_key(self):

# Remove the saved possible key position at the current flow level.

if self.flow_level in self.possible_simple_keys:

key = self.possible_simple_keys[self.flow_level]

if key.required:

raise ScannerError("while scanning a simple key", key.mark,

"could not find expected ':'", self.get_mark())

del self.possible_simple_keys[self.flow_level]

# Indentation functions.

def unwind_indent(self, column):

## In flow context, tokens should respect indentation.

## Actually the condition should be `self.indent >= column` according to

## the spec. But this condition will prohibit intuitively correct

## constructions such as

## key : {

## }

#if self.flow_level and self.indent > column:

# raise ScannerError(None, None,

# "invalid intendation or unclosed '[' or '{'",

# self.get_mark())

# In the flow context, indentation is ignored. We make the scanner less

# restrictive then specification requires.

if self.flow_level:

return

# In block context, we may need to issue the BLOCK-END tokens.

while self.indent > column:

mark = self.get_mark()

self.indent = self.indents.pop()

self.tokens.append(BlockEndToken(mark, mark))

def add_indent(self, column):

# Check if we need to increase indentation.

if self.indent < column:

self.indents.append(self.indent)

self.indent = column

return True

return False

# Fetchers.

def fetch_stream_start(self):

# We always add STREAM-START as the first token and STREAM-END as the

# last token.

# Read the token.

mark = self.get_mark()

# Add STREAM-START.

self.tokens.append(StreamStartToken(mark, mark,

encoding=self.encoding))

def fetch_stream_end(self):

# Set the current intendation to -1.

self.unwind_indent(-1)

# Reset simple keys.

self.remove_possible_simple_key()

self.allow_simple_key = False

self.possible_simple_keys = {}

# Read the token.

mark = self.get_mark()

# Add STREAM-END.

self.tokens.append(StreamEndToken(mark, mark))

# The steam is finished.

self.done = True

def fetch_directive(self):

# Set the current intendation to -1.

self.unwind_indent(-1)

# Reset simple keys.

self.remove_possible_simple_key()

self.allow_simple_key = False

# Scan and add DIRECTIVE.

self.tokens.append(self.scan_directive())

def fetch_document_start(self):

self.fetch_document_indicator(DocumentStartToken)

def fetch_document_end(self):

self.fetch_document_indicator(DocumentEndToken)

def fetch_document_indicator(self, TokenClass):

# Set the current intendation to -1.

self.unwind_indent(-1)

# Reset simple keys. Note that there could not be a block collection

# after '---'.

self.remove_possible_simple_key()

self.allow_simple_key = False

# Add DOCUMENT-START or DOCUMENT-END.

start_mark = self.get_mark()

self.forward(3)

end_mark = self.get_mark()

self.tokens.append(TokenClass(start_mark, end_mark))

def fetch_flow_sequence_start(self):

self.fetch_flow_collection_start(FlowSequenceStartToken)

def fetch_flow_mapping_start(self):

self.fetch_flow_collection_start(FlowMappingStartToken)

def fetch_flow_collection_start(self, TokenClass):

# '[' and '{' may start a simple key.

self.save_possible_simple_key()

# Increase the flow level.

self.flow_level += 1

# Simple keys are allowed after '[' and '{'.

self.allow_simple_key = True

# Add FLOW-SEQUENCE-START or FLOW-MAPPING-START.

start_mark = self.get_mark()

self.forward()

end_mark = self.get_mark()

self.tokens.append(TokenClass(start_mark, end_mark))

def fetch_flow_sequence_end(self):

self.fetch_flow_collection_end(FlowSequenceEndToken)

def fetch_flow_mapping_end(self):

self.fetch_flow_collection_end(FlowMappingEndToken)

def fetch_flow_collection_end(self, TokenClass):

# Reset possible simple key on the current level.

self.remove_possible_simple_key()

# Decrease the flow level.

self.flow_level -= 1

# No simple keys after ']' or '}'.

self.allow_simple_key = False

# Add FLOW-SEQUENCE-END or FLOW-MAPPING-END.

start_mark = self.get_mark()

self.forward()

end_mark = self.get_mark()

self.tokens.append(TokenClass(start_mark, end_mark))

def fetch_flow_entry(self):

# Simple keys are allowed after ','.

self.allow_simple_key = True

# Reset possible simple key on the current level.

self.remove_possible_simple_key()

# Add FLOW-ENTRY.

start_mark = self.get_mark()

self.forward()

end_mark = self.get_mark()

self.tokens.append(FlowEntryToken(start_mark, end_mark))

def fetch_block_entry(self):

# Block context needs additional checks.

if not self.flow_level:

# Are we allowed to start a new entry?

if not self.allow_simple_key:

raise ScannerError(None, None,

"sequence entries are not allowed here",

self.get_mark())

# We may need to add BLOCK-SEQUENCE-START.

if self.add_indent(self.column):

mark = self.get_mark()

self.tokens.append(BlockSequenceStartToken(mark, mark))

# It's an error for the block entry to occur in the flow context,

# but we let the parser detect this.

else:

pass

# Simple keys are allowed after '-'.

self.allow_simple_key = True

# Reset possible simple key on the current level.

self.remove_possible_simple_key()

# Add BLOCK-ENTRY.

start_mark = self.get_mark()

self.forward()

end_mark = self.get_mark()

self.tokens.append(BlockEntryToken(start_mark, end_mark))

def fetch_key(self):

# Block context needs additional checks.

if not self.flow_level:

# Are we allowed to start a key (not nessesary a simple)?

if not self.allow_simple_key:

raise ScannerError(None, None,

"mapping keys are not allowed here",

self.get_mark())

# We may need to add BLOCK-MAPPING-START.

if self.add_indent(self.column):

mark = self.get_mark()

self.tokens.append(BlockMappingStartToken(mark, mark))

# Simple keys are allowed after '?' in the block context.

self.allow_simple_key = not self.flow_level

# Reset possible simple key on the current level.

self.remove_possible_simple_key()

# Add KEY.

start_mark = self.get_mark()

self.forward()

end_mark = self.get_mark()

self.tokens.append(KeyToken(start_mark, end_mark))

def fetch_value(self):

# Do we determine a simple key?

if self.flow_level in self.possible_simple_keys:

# Add KEY.

key = self.possible_simple_keys[self.flow_level]

del self.possible_simple_keys[self.flow_level]

self.tokens.insert(key.token_number-self.tokens_taken,

KeyToken(key.mark, key.mark))

# If this key starts a new block mapping, we need to add

# BLOCK-MAPPING-START.

if not self.flow_level:

if self.add_indent(key.column):

self.tokens.insert(key.token_number-self.tokens_taken,

BlockMappingStartToken(key.mark, key.mark))

# There cannot be two simple keys one after another.

self.allow_simple_key = False

# It must be a part of a complex key.

else:

# Block context needs additional checks.

# (Do we really need them? They will be catched by the parser

# anyway.)

if not self.flow_level:

# We are allowed to start a complex value if and only if

# we can start a simple key.

if not self.allow_simple_key:

raise ScannerError(None, None,

"mapping values are not allowed here",

self.get_mark())

# If this value starts a new block mapping, we need to add

# BLOCK-MAPPING-START. It will be detected as an error later by

# the parser.

if not self.flow_level:

if self.add_indent(self.column):

mark = self.get_mark()

self.tokens.append(BlockMappingStartToken(mark, mark))

# Simple keys are allowed after ':' in the block context.

self.allow_simple_key = not self.flow_level

# Reset possible simple key on the current level.

self.remove_possible_simple_key()

# Add VALUE.

start_mark = self.get_mark()

self.forward()

end_mark = self.get_mark()

self.tokens.append(ValueToken(start_mark, end_mark))

def fetch_alias(self):

# ALIAS could be a simple key.

self.save_possible_simple_key()

# No simple keys after ALIAS.

self.allow_simple_key = False

# Scan and add ALIAS.

self.tokens.append(self.scan_anchor(AliasToken))

def fetch_anchor(self):

# ANCHOR could start a simple key.

self.save_possible_simple_key()

# No simple keys after ANCHOR.

self.allow_simple_key = False

# Scan and add ANCHOR.

self.tokens.append(self.scan_anchor(AnchorToken))

def fetch_tag(self):

# TAG could start a simple key.

self.save_possible_simple_key()

# No simple keys after TAG.

self.allow_simple_key = False

# Scan and add TAG.

self.tokens.append(self.scan_tag())

def fetch_literal(self):

self.fetch_block_scalar(style='|')

def fetch_folded(self):

self.fetch_block_scalar(style='>')

def fetch_block_scalar(self, style):

# A simple key may follow a block scalar.

self.allow_simple_key = True

# Reset possible simple key on the current level.

self.remove_possible_simple_key()

# Scan and add SCALAR.

self.tokens.append(self.scan_block_scalar(style))

def fetch_single(self):

self.fetch_flow_scalar(style='\'')

def fetch_double(self):

self.fetch_flow_scalar(style='"')

def fetch_flow_scalar(self, style):

# A flow scalar could be a simple key.

self.save_possible_simple_key()

# No simple keys after flow scalars.

self.allow_simple_key = False

# Scan and add SCALAR.

self.tokens.append(self.scan_flow_scalar(style))

def fetch_plain(self):

# A plain scalar could be a simple key.

self.save_possible_simple_key()

# No simple keys after plain scalars. But note that `scan_plain` will

# change this flag if the scan is finished at the beginning of the

# line.

self.allow_simple_key = False

# Scan and add SCALAR. May change `allow_simple_key`.

self.tokens.append(self.scan_plain())

# Checkers.

def check_directive(self):

# DIRECTIVE: ^ '%' ...

# The '%' indicator is already checked.

if self.column == 0:

return True

def check_document_start(self):

# DOCUMENT-START: ^ '---' (' '|'\n')

if self.column == 0:

if self.prefix(3) == '---' \

and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':

return True

def check_document_end(self):

# DOCUMENT-END: ^ '...' (' '|'\n')

if self.column == 0:

if self.prefix(3) == '...' \

and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':

return True

def check_block_entry(self):

# BLOCK-ENTRY: '-' (' '|'\n')

return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'

def check_key(self):

# KEY(flow context): '?'

if self.flow_level:

return True

# KEY(block context): '?' (' '|'\n')

else:

return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'

def check_value(self):

# VALUE(flow context): ':'

if self.flow_level:

return True

# VALUE(block context): ':' (' '|'\n')

else:

return self.peek(1) in '\0 \t\r\n\x85\u2028\u2029'

def check_plain(self):

# A plain scalar may start with any non-space character except:

# '-', '?', ':', ',', '[', ']', '{', '}',

# '#', '&', '*', '!', '|', '>', '\'', '\"',

# '%', '@', '`'.

# It may also start with

# '-', '?', ':'

# if it is followed by a non-space character.

# Note that we limit the last rule to the block context (except the

# '-' character) because we want the flow context to be space

# independent.

ch = self.peek()

return ch not in '\0 \t\r\n\x85\u2028\u2029-?:,[]{}#&*!|>\'\"%@`' \

or (self.peek(1) not in '\0 \t\r\n\x85\u2028\u2029'

and (ch == '-' or (not self.flow_level and ch in '?:')))

# Scanners.

def scan_to_next_token(self):

# We ignore spaces, line breaks and comments.

# If we find a line break in the block context, we set the flag

# `allow_simple_key` on.

# The byte order mark is stripped if it's the first character in the

# stream. We do not yet support BOM inside the stream as the

# specification requires. Any such mark will be considered as a part

# of the document.

# TODO: We need to make tab handling rules more sane. A good rule is

# Tabs cannot precede tokens

# BLOCK-SEQUENCE-START, BLOCK-MAPPING-START, BLOCK-END,

# KEY(block), VALUE(block), BLOCK-ENTRY

# So the checking code is

# if <TAB>:

# self.allow_simple_keys = False

# We also need to add the check for `allow_simple_keys == True` to

# `unwind_indent` before issuing BLOCK-END.

# Scanners for block, flow, and plain scalars need to be modified.

if self.index == 0 and self.peek() == '\uFEFF':

self.forward()

found = False

while not found:

while self.peek() == ' ':

self.forward()

if self.peek() == '#':

while self.peek() not in '\0\r\n\x85\u2028\u2029':

self.forward()

if self.scan_line_break():

if not self.flow_level:

self.allow_simple_key = True

else:

found = True

def scan_directive(self):

# See the specification for details.

start_mark = self.get_mark()

self.forward()

name = self.scan_directive_name(start_mark)

value = None

if name == 'YAML':

value = self.scan_yaml_directive_value(start_mark)

end_mark = self.get_mark()

elif name == 'TAG':

value = self.scan_tag_directive_value(start_mark)

end_mark = self.get_mark()

else:

end_mark = self.get_mark()

while self.peek() not in '\0\r\n\x85\u2028\u2029':

self.forward()

self.scan_directive_ignored_line(start_mark)

return DirectiveToken(name, value, start_mark, end_mark)

def scan_directive_name(self, start_mark):

# See the specification for details.

length = 0

ch = self.peek(length)

while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \

or ch in '-_':

length += 1

ch = self.peek(length)

if not length:

raise ScannerError("while scanning a directive", start_mark,

"expected alphabetic or numeric character, but found %r"

% ch, self.get_mark())

value = self.prefix(length)

self.forward(length)

ch = self.peek()

if ch not in '\0 \r\n\x85\u2028\u2029':

raise ScannerError("while scanning a directive", start_mark,

"expected alphabetic or numeric character, but found %r"

% ch, self.get_mark())

return value

def scan_yaml_directive_value(self, start_mark):

# See the specification for details.

while self.peek() == ' ':

self.forward()

major = self.scan_yaml_directive_number(start_mark)

if self.peek() != '.':

raise ScannerError("while scanning a directive", start_mark,

"expected a digit or '.', but found %r" % self.peek(),

self.get_mark())

self.forward()

minor = self.scan_yaml_directive_number(start_mark)

if self.peek() not in '\0 \r\n\x85\u2028\u2029':

raise ScannerError("while scanning a directive", start_mark,

"expected a digit or ' ', but found %r" % self.peek(),

self.get_mark())

return (major, minor)

def scan_yaml_directive_number(self, start_mark):

# See the specification for details.

ch = self.peek()

if not ('0' <= ch <= '9'):

raise ScannerError("while scanning a directive", start_mark,

"expected a digit, but found %r" % ch, self.get_mark())

length = 0

while '0' <= self.peek(length) <= '9':

length += 1

value = int(self.prefix(length))

self.forward(length)

return value

def scan_tag_directive_value(self, start_mark):

# See the specification for details.

while self.peek() == ' ':

self.forward()

handle = self.scan_tag_directive_handle(start_mark)

while self.peek() == ' ':

self.forward()

prefix = self.scan_tag_directive_prefix(start_mark)

return (handle, prefix)

def scan_tag_directive_handle(self, start_mark):

# See the specification for details.

value = self.scan_tag_handle('directive', start_mark)

ch = self.peek()

if ch != ' ':

raise ScannerError("while scanning a directive", start_mark,

"expected ' ', but found %r" % ch, self.get_mark())

return value

def scan_tag_directive_prefix(self, start_mark):

# See the specification for details.

value = self.scan_tag_uri('directive', start_mark)

ch = self.peek()

if ch not in '\0 \r\n\x85\u2028\u2029':

raise ScannerError("while scanning a directive", start_mark,

"expected ' ', but found %r" % ch, self.get_mark())

return value

def scan_directive_ignored_line(self, start_mark):

# See the specification for details.

while self.peek() == ' ':

self.forward()

if self.peek() == '#':

while self.peek() not in '\0\r\n\x85\u2028\u2029':

self.forward()

ch = self.peek()

if ch not in '\0\r\n\x85\u2028\u2029':

raise ScannerError("while scanning a directive", start_mark,

"expected a comment or a line break, but found %r"

% ch, self.get_mark())

self.scan_line_break()

def scan_anchor(self, TokenClass):

# The specification does not restrict characters for anchors and

# aliases. This may lead to problems, for instance, the document:

# [ *alias, value ]

# can be interpteted in two ways, as

# [ "value" ]

# and

# [ *alias , "value" ]

# Therefore we restrict aliases to numbers and ASCII letters.

start_mark = self.get_mark()

indicator = self.peek()

if indicator == '*':

name = 'alias'

else:

name = 'anchor'

self.forward()

length = 0

ch = self.peek(length)

while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \

or ch in '-_':

length += 1

ch = self.peek(length)

if not length:

raise ScannerError("while scanning an %s" % name, start_mark,

"expected alphabetic or numeric character, but found %r"

% ch, self.get_mark())

value = self.prefix(length)

self.forward(length)

ch = self.peek()

if ch not in '\0 \t\r\n\x85\u2028\u2029?:,]}%@`':

raise ScannerError("while scanning an %s" % name, start_mark,

"expected alphabetic or numeric character, but found %r"

% ch, self.get_mark())

end_mark = self.get_mark()

return TokenClass(value, start_mark, end_mark)

def scan_tag(self):

# See the specification for details.

start_mark = self.get_mark()

ch = self.peek(1)

if ch == '<':

handle = None

self.forward(2)

suffix = self.scan_tag_uri('tag', start_mark)

if self.peek() != '>':

raise ScannerError("while parsing a tag", start_mark,

"expected '>', but found %r" % self.peek(),

self.get_mark())

self.forward()

elif ch in '\0 \t\r\n\x85\u2028\u2029':

handle = None

suffix = '!'

self.forward()

else:

length = 1

use_handle = False

while ch not in '\0 \r\n\x85\u2028\u2029':

if ch == '!':

use_handle = True

break

length += 1

ch = self.peek(length)

handle = '!'

if use_handle:

handle = self.scan_tag_handle('tag', start_mark)

else:

handle = '!'

self.forward()

suffix = self.scan_tag_uri('tag', start_mark)

ch = self.peek()

if ch not in '\0 \r\n\x85\u2028\u2029':

raise ScannerError("while scanning a tag", start_mark,

"expected ' ', but found %r" % ch, self.get_mark())

value = (handle, suffix)

end_mark = self.get_mark()

return TagToken(value, start_mark, end_mark)

def scan_block_scalar(self, style):

# See the specification for details.

if style == '>':

folded = True

else:

folded = False

chunks = []

start_mark = self.get_mark()

# Scan the header.

self.forward()

chomping, increment = self.scan_block_scalar_indicators(start_mark)

self.scan_block_scalar_ignored_line(start_mark)

# Determine the indentation level and go to the first non-empty line.

min_indent = self.indent+1

if min_indent < 1:

min_indent = 1

if increment is None:

breaks, max_indent, end_mark = self.scan_block_scalar_indentation()

indent = max(min_indent, max_indent)

else:

indent = min_indent+increment-1

breaks, end_mark = self.scan_block_scalar_breaks(indent)

line_break = ''

# Scan the inner part of the block scalar.

while self.column == indent and self.peek() != '\0':

chunks.extend(breaks)

leading_non_space = self.peek() not in ' \t'

length = 0

while self.peek(length) not in '\0\r\n\x85\u2028\u2029':

length += 1

chunks.append(self.prefix(length))

self.forward(length)

line_break = self.scan_line_break()

breaks, end_mark = self.scan_block_scalar_breaks(indent)

if self.column == indent and self.peek() != '\0':

# Unfortunately, folding rules are ambiguous.

# This is the folding according to the specification:

if folded and line_break == '\n' \

and leading_non_space and self.peek() not in ' \t':

if not breaks:

chunks.append(' ')

else:

chunks.append(line_break)

# This is Clark Evans's interpretation (also in the spec

# examples):

#if folded and line_break == '\n':

# if not breaks:

# if self.peek() not in ' \t':

# chunks.append(' ')

# else:

# chunks.append(line_break)

#else:

# chunks.append(line_break)

else:

break

# Chomp the tail.

if chomping is not False:

chunks.append(line_break)

if chomping is True:

chunks.extend(breaks)

# We are done.

return ScalarToken(''.join(chunks), False, start_mark, end_mark,

style)

def scan_block_scalar_indicators(self, start_mark):

# See the specification for details.

chomping = None

increment = None

ch = self.peek()

if ch in '+-':

if ch == '+':

chomping = True

else:

chomping = False

self.forward()

ch = self.peek()

if ch in '0123456789':

increment = int(ch)

if increment == 0:

raise ScannerError("while scanning a block scalar", start_mark,

"expected indentation indicator in the range 1-9, but found 0",

self.get_mark())

self.forward()

elif ch in '0123456789':

increment = int(ch)

if increment == 0:

raise ScannerError("while scanning a block scalar", start_mark,

"expected indentation indicator in the range 1-9, but found 0",

self.get_mark())

self.forward()

ch = self.peek()

if ch in '+-':

if ch == '+':

chomping = True

else:

chomping = False

self.forward()

ch = self.peek()

if ch not in '\0 \r\n\x85\u2028\u2029':

raise ScannerError("while scanning a block scalar", start_mark,

"expected chomping or indentation indicators, but found %r"

% ch, self.get_mark())

return chomping, increment

def scan_block_scalar_ignored_line(self, start_mark):

# See the specification for details.

while self.peek() == ' ':

self.forward()

if self.peek() == '#':

while self.peek() not in '\0\r\n\x85\u2028\u2029':

self.forward()

ch = self.peek()

if ch not in '\0\r\n\x85\u2028\u2029':

raise ScannerError("while scanning a block scalar", start_mark,

"expected a comment or a line break, but found %r" % ch,

self.get_mark())

self.scan_line_break()

def scan_block_scalar_indentation(self):

# See the specification for details.

chunks = []

max_indent = 0

end_mark = self.get_mark()

while self.peek() in ' \r\n\x85\u2028\u2029':

if self.peek() != ' ':

chunks.append(self.scan_line_break())

end_mark = self.get_mark()

else:

self.forward()

if self.column > max_indent:

max_indent = self.column

return chunks, max_indent, end_mark

def scan_block_scalar_breaks(self, indent):

# See the specification for details.

chunks = []

end_mark = self.get_mark()

while self.column < indent and self.peek() == ' ':

self.forward()

while self.peek() in '\r\n\x85\u2028\u2029':

chunks.append(self.scan_line_break())

end_mark = self.get_mark()

while self.column < indent and self.peek() == ' ':

self.forward()

return chunks, end_mark

def scan_flow_scalar(self, style):

# See the specification for details.

# Note that we loose indentation rules for quoted scalars. Quoted

# scalars don't need to adhere indentation because " and ' clearly

# mark the beginning and the end of them. Therefore we are less

# restrictive then the specification requires. We only need to check

# that document separators are not included in scalars.

if style == '"':

double = True

else:

double = False

chunks = []

start_mark = self.get_mark()

quote = self.peek()

self.forward()

chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))

while self.peek() != quote:

chunks.extend(self.scan_flow_scalar_spaces(double, start_mark))

chunks.extend(self.scan_flow_scalar_non_spaces(double, start_mark))

self.forward()

end_mark = self.get_mark()

return ScalarToken(''.join(chunks), False, start_mark, end_mark,

style)

ESCAPE_REPLACEMENTS = {

'0': '\0',

'a': '\x07',

'b': '\x08',

't': '\x09',

'\t': '\x09',

'n': '\x0A',

'v': '\x0B',

'f': '\x0C',

'r': '\x0D',

'e': '\x1B',

' ': '\x20',

'\"': '\"',

'\\': '\\',

'N': '\x85',

'_': '\xA0',

'L': '\u2028',

'P': '\u2029',

}

ESCAPE_CODES = {

'x': 2,

'u': 4,

'U': 8,

}

def scan_flow_scalar_non_spaces(self, double, start_mark):

# See the specification for details.

chunks = []

while True:

length = 0

while self.peek(length) not in '\'\"\\\0 \t\r\n\x85\u2028\u2029':

length += 1

if length:

chunks.append(self.prefix(length))

self.forward(length)

ch = self.peek()

if not double and ch == '\'' and self.peek(1) == '\'':

chunks.append('\'')

self.forward(2)

elif (double and ch == '\'') or (not double and ch in '\"\\'):

chunks.append(ch)

self.forward()

elif double and ch == '\\':

self.forward()

ch = self.peek()

if ch in self.ESCAPE_REPLACEMENTS:

chunks.append(self.ESCAPE_REPLACEMENTS[ch])

self.forward()

elif ch in self.ESCAPE_CODES:

length = self.ESCAPE_CODES[ch]

self.forward()

for k in range(length):

if self.peek(k) not in '0123456789ABCDEFabcdef':

raise ScannerError("while scanning a double-quoted scalar", start_mark,

"expected escape sequence of %d hexdecimal numbers, but found %r" %

(length, self.peek(k)), self.get_mark())

code = int(self.prefix(length), 16)

chunks.append(chr(code))

self.forward(length)

elif ch in '\r\n\x85\u2028\u2029':

self.scan_line_break()

chunks.extend(self.scan_flow_scalar_breaks(double, start_mark))

else:

raise ScannerError("while scanning a double-quoted scalar", start_mark,

"found unknown escape character %r" % ch, self.get_mark())

else:

return chunks

def scan_flow_scalar_spaces(self, double, start_mark):

# See the specification for details.

chunks = []

length = 0

while self.peek(length) in ' \t':

length += 1

whitespaces = self.prefix(length)

self.forward(length)

ch = self.peek()

if ch == '\0':

raise ScannerError("while scanning a quoted scalar", start_mark,

"found unexpected end of stream", self.get_mark())

elif ch in '\r\n\x85\u2028\u2029':

line_break = self.scan_line_break()

breaks = self.scan_flow_scalar_breaks(double, start_mark)

if line_break != '\n':

chunks.append(line_break)

elif not breaks:

chunks.append(' ')

chunks.extend(breaks)

else:

chunks.append(whitespaces)

return chunks

def scan_flow_scalar_breaks(self, double, start_mark):

# See the specification for details.

chunks = []

while True:

# Instead of checking indentation, we check for document

# separators.

prefix = self.prefix(3)

if (prefix == '---' or prefix == '...') \

and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':

raise ScannerError("while scanning a quoted scalar", start_mark,

"found unexpected document separator", self.get_mark())

while self.peek() in ' \t':

self.forward()

if self.peek() in '\r\n\x85\u2028\u2029':

chunks.append(self.scan_line_break())

else:

return chunks

def scan_plain(self):

# See the specification for details.

# We add an additional restriction for the flow context:

# plain scalars in the flow context cannot contain ',', ':' and '?'.

# We also keep track of the `allow_simple_key` flag here.

# Indentation rules are loosed for the flow context.

chunks = []

start_mark = self.get_mark()

end_mark = start_mark

indent = self.indent+1

# We allow zero indentation for scalars, but then we need to check for

# document separators at the beginning of the line.

#if indent == 0:

# indent = 1

spaces = []

while True:

length = 0

if self.peek() == '#':

break

while True:

ch = self.peek(length)

if ch in '\0 \t\r\n\x85\u2028\u2029' \

or (not self.flow_level and ch == ':' and

self.peek(length+1) in '\0 \t\r\n\x85\u2028\u2029') \

or (self.flow_level and ch in ',:?[]{}'):

break

length += 1

# It's not clear what we should do with ':' in the flow context.

if (self.flow_level and ch == ':'

and self.peek(length+1) not in '\0 \t\r\n\x85\u2028\u2029,[]{}'):

self.forward(length)

raise ScannerError("while scanning a plain scalar", start_mark,

"found unexpected ':'", self.get_mark(),

"Please check http://pyyaml.org/wiki/YAMLColonInFlowContext for details.")

if length == 0:

break

self.allow_simple_key = False

chunks.extend(spaces)

chunks.append(self.prefix(length))

self.forward(length)

end_mark = self.get_mark()

spaces = self.scan_plain_spaces(indent, start_mark)

if not spaces or self.peek() == '#' \

or (not self.flow_level and self.column < indent):

break

return ScalarToken(''.join(chunks), True, start_mark, end_mark)

def scan_plain_spaces(self, indent, start_mark):

# See the specification for details.

# The specification is really confusing about tabs in plain scalars.

# We just forbid them completely. Do not use tabs in YAML!

chunks = []

length = 0

while self.peek(length) in ' ':

length += 1

whitespaces = self.prefix(length)

self.forward(length)

ch = self.peek()

if ch in '\r\n\x85\u2028\u2029':

line_break = self.scan_line_break()

self.allow_simple_key = True

prefix = self.prefix(3)

if (prefix == '---' or prefix == '...') \

and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':

return

breaks = []

while self.peek() in ' \r\n\x85\u2028\u2029':

if self.peek() == ' ':

self.forward()

else:

breaks.append(self.scan_line_break())

prefix = self.prefix(3)

if (prefix == '---' or prefix == '...') \

and self.peek(3) in '\0 \t\r\n\x85\u2028\u2029':

return

if line_break != '\n':

chunks.append(line_break)

elif not breaks:

chunks.append(' ')

chunks.extend(breaks)

elif whitespaces:

chunks.append(whitespaces)

return chunks

def scan_tag_handle(self, name, start_mark):

# See the specification for details.

# For some strange reasons, the specification does not allow '_' in

# tag handles. I have allowed it anyway.

ch = self.peek()

if ch != '!':

raise ScannerError("while scanning a %s" % name, start_mark,

"expected '!', but found %r" % ch, self.get_mark())

length = 1

ch = self.peek(length)

if ch != ' ':

while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \

or ch in '-_':

length += 1

ch = self.peek(length)

if ch != '!':

self.forward(length)

raise ScannerError("while scanning a %s" % name, start_mark,

"expected '!', but found %r" % ch, self.get_mark())

length += 1

value = self.prefix(length)

self.forward(length)

return value

def scan_tag_uri(self, name, start_mark):

# See the specification for details.

# Note: we do not check if URI is well-formed.

chunks = []

length = 0

ch = self.peek(length)

while '0' <= ch <= '9' or 'A' <= ch <= 'Z' or 'a' <= ch <= 'z' \

or ch in '-;/?:@&=+$,_.!~*\'()[]%':

if ch == '%':

chunks.append(self.prefix(length))

self.forward(length)

length = 0

chunks.append(self.scan_uri_escapes(name, start_mark))

else:

length += 1

ch = self.peek(length)

if length:

chunks.append(self.prefix(length))

self.forward(length)

length = 0

if not chunks:

raise ScannerError("while parsing a %s" % name, start_mark,

"expected URI, but found %r" % ch, self.get_mark())

return ''.join(chunks)

def scan_uri_escapes(self, name, start_mark):

# See the specification for details.

codes = []

mark = self.get_mark()

while self.peek() == '%':

self.forward()

for k in range(2):

if self.peek(k) not in '0123456789ABCDEFabcdef':

raise ScannerError("while scanning a %s" % name, start_mark,

"expected URI escape sequence of 2 hexdecimal numbers, but found %r"

% self.peek(k), self.get_mark())

codes.append(int(self.prefix(2), 16))

self.forward(2)

try:

value = bytes(codes).decode('utf-8')

except UnicodeDecodeError as exc:

raise ScannerError("while scanning a %s" % name, start_mark, str(exc), mark)

return value

def scan_line_break(self):

# Transforms:

# '\r\n' : '\n'

# '\r' : '\n'

# '\n' : '\n'

# '\x85' : '\n'

# '\u2028' : '\u2028'

# '\u2029 : '\u2029'

# default : ''

ch = self.peek()

if ch in '\r\n\x85':

if self.prefix(2) == '\r\n':

self.forward(2)

else:

self.forward()

return '\n'

elif ch in '\u2028\u2029':

self.forward()

return ch

return ''

#try:

# import psyco

# psyco.bind(Scanner)

#except ImportError:

# pass

Coverage for /usr/lib/python3/dist-packages/yaml/scanner.py : 11%

727 statements 81 run 646 missing 33 excluded