Logo Search packages:      
Sourcecode: yapps2 version File versions  Download package

runtime.py

# Yapps 2 Runtime, part of Yapps 2 - yet another python parser system
# Copyright 1999-2003 by Amit J. Patel <amitp@cs.stanford.edu>
# Enhancements copyright 2003-2004 by Matthias Urlichs <smurf@debian.org>
#
# This version of the Yapps 2 Runtime can be distributed under the
# terms of the MIT open source license, either found in the LICENSE file
# included with the Yapps distribution
# <http://theory.stanford.edu/~amitp/yapps/> or at
# <http://www.opensource.org/licenses/mit-license.php>
#

"""Run time libraries needed to run parsers generated by Yapps.

This module defines parse-time exception classes, a scanner class, a
base class for parsers produced by Yapps, and a context class that
keeps track of the parse stack.

"""

import sys, re

MIN_WINDOW=4096
# File lookup window

00025 class SyntaxError(Exception):
      """When we run into an unexpected token, this is the exception to use"""
      def __init__(self, pos=None, msg="Bad Token", context=None):
            Exception.__init__(self)
            self.pos = pos
            self.msg = msg
            self.context = context
            
      def __str__(self):
            if not self.pos: return 'SyntaxError'
            else: return 'SyntaxError@%s(%s)' % (repr(self.pos), self.msg)

00037 class NoMoreTokens(Exception):
      """Another exception object, for when we run out of tokens"""
      pass

00041 class Token(object):
      """Yapps token.

      This is a container for a scanned token.
      """

00047       def __init__(self, type,value, pos=None):
            """Initialize a token."""
            self.type = type
            self.value = value
            self.pos = pos

      def __repr__(self):
            output = '<%s: %s' % (self.type, repr(self.value))
            if self.pos:
                  output += " @ "
                  if self.pos[0]:
                        output += "%s:" % self.pos[0]
                  if self.pos[1]:
                        output += "%d" % self.pos[1]
                  if self.pos[2] is not None:
                        output += ".%d" % self.pos[2]
            output += ">"
            return output

in_name=0
00067 class Scanner(object):
      """Yapps scanner.

      The Yapps scanner can work in context sensitive or context
      insensitive modes.  The token(i) method is used to retrieve the
      i-th token.  It takes a restrict set that limits the set of tokens
      it is allowed to return.  In context sensitive mode, this restrict
      set guides the scanner.  In context insensitive mode, there is no
      restriction (the set is always the full set of tokens).
      
      """
      
00079       def __init__(self, patterns, ignore, input="",
                  file=None,filename=None,stacked=False):
            """Initialize the scanner.

            Parameters:
              patterns : [(terminal, uncompiled regex), ...] or None
              ignore : {terminal:None, ...}
              input : string

            If patterns is None, we assume that the subclass has
            defined self.patterns : [(terminal, compiled regex), ...].
            Note that the patterns parameter expects uncompiled regexes,
            whereas the self.patterns field expects compiled regexes.

            The 'ignore' value is either None or a callable, which is called
            with the scanner and the to-be-ignored match object; this can
            be used for include file or comment handling.
            """

            if not filename:
                  global in_name
                  filename="<f.%d>" % in_name
                  in_name += 1

            self.input = input
            self.ignore = ignore
            self.file = file
            self.filename = filename
            self.pos = 0
            self.del_pos = 0 # skipped
            self.line = 1
            self.del_line = 0 # skipped
            self.col = 0
            self.tokens = []
            self.stack = None
            self.stacked = stacked
            
            self.last_read_token = None
            self.last_token = None
            self.last_types = None

            if patterns is not None:
                  # Compile the regex strings into regex objects
                  self.patterns = []
                  for terminal, regex in patterns:
                        self.patterns.append( (terminal, re.compile(regex)) )

00126       def stack_input(self, input="", file=None, filename=None):
            """Temporarily parse from a second file."""

            # Already reading from somewhere else: Go on top of that, please.
            if self.stack:
                  # autogenerate a recursion-level-identifying filename
                  if not filename:
                        filename = 1
                  else:
                        try:
                              filename += 1
                        except TypeError:
                              pass
                        # now pass off to the include file
                  self.stack.stack_input(input,file,filename)
            else:

                  try:
                        filename += 0
                  except TypeError:
                        pass
                  else:
                        filename = "<str_%d>" % filename

#                 self.stack = object.__new__(self.__class__)
#                 Scanner.__init__(self.stack,self.patterns,self.ignore,input,file,filename, stacked=True)

                  # Note that the pattern+ignore are added by the generated
                  # scanner code
                  self.stack = self.__class__(input,file,filename, stacked=True)

00157       def get_pos(self):
            """Return a file/line/char tuple."""
            if self.stack: return self.stack.get_pos()

            return (self.filename, self.line+self.del_line, self.col)

#     def __repr__(self):
#           """Print the last few tokens that have been scanned in"""
#           output = ''
#           for t in self.tokens:
#                 output += '%s\n' % (repr(t),)
#           return output
      
00170       def print_line_with_pointer(self, pos, length=0, out=sys.stderr):
            """Print the line of 'text' that includes position 'p',
            along with a second line with a single caret (^) at position p"""

            file,line,p = pos
            if file != self.filename:
                  if self.stack: return self.stack.print_line_with_pointer(pos,length=length,out=out)
                  print >>out, "(%s: not in input buffer)" % file
                  return

            text = self.input
            p += length-1 # starts at pos 1

            origline=line
            line -= self.del_line
            spos=0
            if line > 0:
                  while 1:
                        line = line - 1
                        try:
                              cr = text.index("\n",spos)
                        except ValueError:
                              if line:
                                    text = ""
                              break
                        if line == 0:
                              text = text[spos:cr]
                              break
                        spos = cr+1
            else:
                  print >>out, "(%s:%d not in input buffer)" % (file,origline)
                  return

            # Now try printing part of the line
            text = text[max(p-80, 0):p+80]
            p = p - max(p-80, 0)

            # Strip to the left
            i = text[:p].rfind('\n')
            j = text[:p].rfind('\r')
            if i < 0 or (0 <= j < i): i = j
            if 0 <= i < p:
                  p = p - i - 1
                  text = text[i+1:]

            # Strip to the right
            i = text.find('\n', p)
            j = text.find('\r', p)
            if i < 0 or (0 <= j < i): i = j
            if i >= 0:
                  text = text[:i]

            # Now shorten the text
            while len(text) > 70 and p > 60:
                  # Cut off 10 chars
                  text = "..." + text[10:]
                  p = p - 7

            # Now print the string, along with an indicator
            print >>out, '> ',text
            print >>out, '> ',' '*p + '^'
      
00232       def grab_input(self):
            """Get more input if possible."""
            if not self.file: return
            if len(self.input) - self.pos >= MIN_WINDOW: return

            data = self.file.read(MIN_WINDOW)
            if data is None or data == "":
                  self.file = None

            # Drop bytes from the start, if necessary.
            if self.pos > 2*MIN_WINDOW:
                  self.del_pos += MIN_WINDOW
                  self.del_line += self.input[:MIN_WINDOW].count("\n")
                  self.pos -= MIN_WINDOW
                  self.input = self.input[MIN_WINDOW:] + data
            else:
                  self.input = self.input + data

00250       def getchar(self):
            """Return the next character."""
            self.grab_input()

            c = self.input[self.pos]
            self.pos += 1
            return c

00258       def token(self, restrict, context=None):
            """Scan for another token."""

            while 1:
                  if self.stack:
                        try:
                              return self.stack.token(restrict, context)
                        except StopIteration:
                              self.stack = None

            # Keep looking for a token, ignoring any in self.ignore
                  self.grab_input()

                  # special handling for end-of-file
                  if self.stacked and self.pos==len(self.input):
                        raise StopIteration

                  # Search the patterns for the longest match, with earlier
                  # tokens in the list having preference
                  best_match = -1
                  best_pat = '(error)'
                  best_m = None
                  for p, regexp in self.patterns:
                        # First check to see if we're ignoring this token
                        if restrict and p not in restrict and p not in self.ignore:
                              continue
                        m = regexp.match(self.input, self.pos)
                        if m and m.end()-m.start() > best_match:
                              # We got a match that's better than the previous one
                              best_pat = p
                              best_match = m.end()-m.start()
                              best_m = m
                              
                  # If we didn't find anything, raise an error
                  if best_pat == '(error)' and best_match < 0:
                        msg = 'Bad Token'
                        if restrict:
                              msg = 'Trying to find one of '+', '.join(restrict)
                        raise SyntaxError(self.get_pos(), msg, context=context)

                  ignore = best_pat in self.ignore
                  value = self.input[self.pos:self.pos+best_match]
                  if not ignore:
                        tok=Token(type=best_pat, value=value, pos=self.get_pos())

                  self.pos += best_match

                  npos = value.rfind("\n")
                  if npos > -1:
                        self.col = best_match-npos
                        self.line += value.count("\n")
                  else:
                        self.col += best_match

                  # If we found something that isn't to be ignored, return it
                  if not ignore:
                        if len(self.tokens) >= 10:
                              del self.tokens[0]
                        self.tokens.append(tok)
                        self.last_read_token = tok
                        # print repr(tok)
                        return tok
                  else:
                        ignore = self.ignore[best_pat]
                        if ignore:
                              ignore(self, best_m)

00325       def peek(self, *types, **kw):
            """Returns the token type for lookahead; if there are any args
            then the list of args is the set of token types to allow"""
            context = kw.get("context",None)
            if self.last_token is None:
                  self.last_types = types
                  self.last_token = self.token(types,context)
            elif self.last_types:
                  for t in types:
                        if t not in self.last_types:
                              raise NotImplementedError("Unimplemented: restriction set changed")
            return self.last_token.type
            
00338       def scan(self, type, **kw):
            """Returns the matched text, and moves to the next token"""
            context = kw.get("context",None)

            if self.last_token is None:
                  tok = self.token([type],context)
            else:
                  if self.last_types and type not in self.last_types:
                        raise NotImplementedError("Unimplemented: restriction set changed")

                  tok = self.last_token
                  self.last_token = None
            if tok.type != type:
                  if not self.last_types: self.last_types=[]
                  raise SyntaxError(tok.pos, 'Trying to find '+type+': '+ ', '.join(self.last_types)+", got "+tok.type, context=context)
            return tok.value

00355 class Parser(object):
      """Base class for Yapps-generated parsers.

      """
      
      def __init__(self, scanner):
            self._scanner = scanner
            
00363       def _stack(self, input="",file=None,filename=None):
            """Temporarily read from someplace else"""
            self._scanner.stack_input(input,file,filename)
            self._tok = None

00368       def _peek(self, *types, **kw):
            """Returns the token type for lookahead; if there are any args
            then the list of args is the set of token types to allow"""
            return self._scanner.peek(*types, **kw)
            
00373       def _scan(self, type, **kw):
            """Returns the matched text, and moves to the next token"""
            return self._scanner.scan(type, **kw)

00377 class Context(object):
      """Class to represent the parser's call stack.

      Every rule creates a Context that links to its parent rule.  The
      contexts can be used for debugging.

      """
      
00385       def __init__(self, parent, scanner, rule, args=()):
            """Create a new context.

            Args:
            parent: Context object or None
            scanner: Scanner object
            rule: string (name of the rule)
            args: tuple listing parameters to the rule

            """
            self.parent = parent
            self.scanner = scanner
            self.rule = rule
            self.args = args
            while scanner.stack: scanner = scanner.stack
            self.token = scanner.last_read_token

      def __str__(self):
            output = ''
            if self.parent: output = str(self.parent) + ' > '
            output += self.rule
            return output
      
def print_error(err, scanner, max_ctx=None):
      """Print error messages, the parser stack, and the input text -- for human-readable error messages."""
      # NOTE: this function assumes 80 columns :-(
      # Figure out the line number
      pos = err.pos
      if not pos:
            pos = scanner.get_pos()

      file_name, line_number, column_number = pos
      print >>sys.stderr, '%s:%d:%d: %s' % (file_name, line_number, column_number, err.msg)

      scanner.print_line_with_pointer(pos)
            
      context = err.context
      token = None
      while context:
            print >>sys.stderr, 'while parsing %s%s:' % (context.rule, tuple(context.args))
            if context.token:
                  token = context.token
            if token:
                  scanner.print_line_with_pointer(token.pos, length=len(token.value))
            context = context.parent
            if max_ctx:
                  max_ctx = max_ctx-1
                  if not max_ctx:
                        break

def wrap_error_reporter(parser, rule, *args,**kw):
      try:
            return getattr(parser, rule)(*args,**kw)
      except SyntaxError, e:
            print_error(e, parser._scanner)
      except NoMoreTokens:
            print >>sys.stderr, 'Could not complete parsing; stopped around here:'
            print >>sys.stderr, parser._scanner

Generated by  Doxygen 1.6.0   Back to index