ScoDoc/config/softs/pyExcelerator-0.6.3a.patched/pyExcelerator/ExcelFormulaLexer.py

159 lines
5.4 KiB
Python
Executable File
Raw Blame History

#!/usr/bin/env python
# -*- coding: windows-1251 -*-
# Copyright (C) 2005 Roman V. Kiseliov
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
#
# 3. All advertising materials mentioning features or use of this
# software must display the following acknowledgment:
# "This product includes software developed by
# Roman V. Kiseliov <roman@kiseliov.ru>."
#
# 4. Redistributions of any form whatsoever must retain the following
# acknowledgment:
# "This product includes software developed by
# Roman V. Kiseliov <roman@kiseliov.ru>."
#
# THIS SOFTWARE IS PROVIDED BY Roman V. Kiseliov ``AS IS'' AND ANY
# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL Roman V. Kiseliov OR
# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
# OF THE POSSIBILITY OF SUCH DAMAGE.
__rev_id__ = """$Id: ExcelFormulaLexer.py,v 1.4 2005/08/14 06:40:23 rvk Exp $"""
import sys
from antlr import EOF, CommonToken as Tok, TokenStream, TokenStreamException
import struct
import ExcelFormulaParser
from re import compile as recompile, match, LOCALE, UNICODE, IGNORECASE
int_const_pattern = recompile(r"\d+")
flt_const_pattern = recompile(r"\d*\.\d+(?:[Ee][+-]?\d+)?")
str_const_pattern = recompile(r'["][^"]*["]')
#range2d_pattern = recompile(r"\$?[A-I]?[A-Z]\$?\d+:\$?[A-I]?[A-Z]\$?\d+")
ref2d_pattern = recompile(r"\$?[A-I]?[A-Z]\$?\d+")
true_pattern = recompile(r"TRUE", IGNORECASE)
false_pattern = recompile(r"FALSE", IGNORECASE)
name_pattern = recompile(r"[\.\w]+", LOCALE)
pattern_type_tuples = (
(flt_const_pattern, ExcelFormulaParser.NUM_CONST),
(int_const_pattern, ExcelFormulaParser.INT_CONST),
(str_const_pattern, ExcelFormulaParser.STR_CONST),
# (range2d_pattern , ExcelFormulaParser.RANGE2D),
(ref2d_pattern , ExcelFormulaParser.REF2D),
(true_pattern , ExcelFormulaParser.TRUE_CONST),
(false_pattern , ExcelFormulaParser.FALSE_CONST),
(name_pattern , ExcelFormulaParser.NAME)
)
type_text_tuples = (
(ExcelFormulaParser.NE, '<>'),
(ExcelFormulaParser.LE, '<='),
(ExcelFormulaParser.GE, '>='),
(ExcelFormulaParser.EQ, '='),
(ExcelFormulaParser.LT, '<'),
(ExcelFormulaParser.GT, '>'),
(ExcelFormulaParser.ADD, '+'),
(ExcelFormulaParser.SUB, '-'),
(ExcelFormulaParser.MUL, '*'),
(ExcelFormulaParser.DIV, '/'),
(ExcelFormulaParser.COLON, ':'),
(ExcelFormulaParser.SEMICOLON, ';'),
(ExcelFormulaParser.COMMA, ','),
(ExcelFormulaParser.LP, '('),
(ExcelFormulaParser.RP, ')'),
(ExcelFormulaParser.CONCAT, '&'),
(ExcelFormulaParser.PERCENT, '%'),
(ExcelFormulaParser.POWER, '^')
)
class Lexer(TokenStream):
def __init__(self, text):
self._text = text[:]
self._pos = 0
self._line = 0
def isEOF(self):
return len(self._text) <= self._pos
def curr_ch(self):
return self._text[self._pos]
def next_ch(self, n = 1):
self._pos += n
def is_whitespace(self):
return self.curr_ch() in " \t\n\r\f\v"
def match_pattern(self, pattern, toktype):
m = pattern.match(self._text[self._pos:])
if m:
start_pos = self._pos + m.start(0)
end_pos = self._pos + m.end(0)
tt = self._text[start_pos:end_pos]
self._pos = end_pos
return Tok(type = toktype, text = tt, col = start_pos + 1)
else:
return None
def nextToken(self):
# skip whitespace
while not self.isEOF() and self.is_whitespace():
self.next_ch()
if self.isEOF():
return Tok(type = EOF)
# first, try to match token with more chars
for ptt in pattern_type_tuples:
t = self.match_pattern(*ptt);
if t:
return t
# second, we want find short tokens
for ty, te in type_text_tuples:
if self.curr_ch() == te:
self.next_ch()
return Tok(type = ty, text = te, col = self._pos)
# at this point, smth strange is happened
raise TokenStreamException("Unknown char %s at %u col." % (self.curr_ch(), self._pos))
if __name__ == '__main__' :
import locale
locale.setlocale(locale.LC_ALL, 'russian')
try:
for t in Lexer('1+2+3+67.8678 + " @##$$$ klhkh kljhklhkl " + .58e-678*A1:B4 - 1lkjljlkjl3535<33><35><EFBFBD><EFBFBD><EFBFBD><EFBFBD>'):
print t
except TokenStreamException, e:
print "error:", e