Commit 6ffd58b7 authored by Antoine Millet's avatar Antoine Millet
Browse files

Added TQL parser

parent 239784c1
Loading
Loading
Loading
Loading
+0 −0

Empty file added.

+0 −0

Empty file added.

+148 −0
Original line number Diff line number Diff line
""" TQL ast classes.
"""

import re
from fnmatch import fnmatch


class TqlAst(object):

    """ The AST root.
    """

    def __init__(self, expression=None):
        self.expression = expression


class Filter(object):

    """ A filter.
    """

    def __init__(self, name, filter=None, value=None, neg=False):
        self.name = name
        self.filter = filter
        self.value = value
        try:
            self.value = int(value)
        except ValueError:
            try:
                self.value = float(value)
            except ValueError:
                self.is_number = False
            else:
                self.is_number = True
        else:
            self.is_number = True
        self.neg = neg

    def match(self, value):
        if self.is_number:
            # If provided filter value is a number, we must try to cast the tag
            # value as a number to do a value comparison:
            try:
                value = int(value)
            except ValueError:
                try:
                    value = float(value)
                except ValueError:
                    pass

        match = False
        if self.filter == '=' and value == self.value:
            match = True
        elif self.filter == '>' and value > self.value:
            match = True
        elif self.filter == '<' and value < self.value:
            match = True
        elif self.filter == '>=' and value >= self.value:
            match = True
        elif self.filter == '<=' and value <= self.value:
            match = True
        elif self.filter == ':' and fnmatch(str(value), str(self.value)):
            match = True
        elif self.filter == '~' and re.match(str(self.value), str(value)):
            match = True

        if self.neg:
            match = not match

        return match


class FilterPresence(object):

    """ A filter for tag presence in object.
    """

    def __init__(self, name):
        if name.startswith('-'):
            name = name[1:]
            self.invert = True
        else:
            self.invert = False
        self.name = name


class BinaryOperator(object):
    """ Base class for binary operators.
    """

    def __init__(self, left_expression, right_expression):
        self.left_expression = left_expression
        self.right_expression = right_expression


class UnionOperator(BinaryOperator):
    """ An union operator (eg: expr|expr).
    """

    SYMBOL = '|'


class IntersectionOperator(BinaryOperator):
    """ An intersection operator (eg: expr&expr).
    """

    SYMBOL = '&'


class ShowOperator(object):

    """ A show operator (eg: $tag).
    """

    def __init__(self, expression, pattern):
        self.expression = expression
        if pattern.startswith('-'):
            pattern = pattern[1:]
            self.invert = True
        else:
            self.invert = False
        self.pattern = pattern


class SortOperator(object):

    """ A sorting operator (eg: %tag).
    """

    def __init__(self, expression, name):
        self.expression = expression
        if name.startswith('-'):
            name = name[1:]
            self.invert = True
        else:
            self.invert = False
        self.name = name


class LimitOperator(object):

    """ A limitation operator (eg: ^1:100).
    """

    def __init__(self, expression, start=None, stop=None):
        self.expression = expression
        self.start = start
        self.stop = stop
+12 −0
Original line number Diff line number Diff line
""" Exception raised while the parsing process.
"""

class ParsingError(Exception):

    """ Error raised on a parsing error.
    """

    def __init__(self, msg, line=None, column=None):
        super(ParsingError, self).__init__(msg)
        self.line = line
        self.column = column
+237 −0
Original line number Diff line number Diff line
""" The TQL lexer and parser.
"""

import re

import ply.lex as lex
import ply.yacc as yacc

from ast import (TqlAst, Filter, FilterPresence, UnionOperator,
                 IntersectionOperator, ShowOperator, SortOperator,
                 LimitOperator)
from errors import ParsingError


UNITS = {'k': 10 ** 3,
         'm': 10 ** 6,
         'g': 10 ** 9,
         't': 10 ** 12,
         'p': 10 ** 15,
         'e': 10 ** 18,
         'z': 10 ** 21,
         'y': 10 ** 24,
         'ki': 2 ** 10,
         'mi': 2 ** 20,
         'gi': 2 ** 30,
         'ti': 2 ** 40,
         'pi': 2 ** 50,
         'ei': 2 ** 60,
         'zi': 2 ** 70,
         'yi': 2 ** 80}


RE_UNIT = re.compile(r'(?P<num>[-+]?[0-9]+(\.[0-9]+)?)[ ]?'
                      '(?P<unit>%s)' % '|'.join(UNITS), re.IGNORECASE)

class TqlLexer(object):

    """ Lexer for the TQL format.
    """

    def __init__(self, **kwargs):
        self._lexer = lex.lex(module=self, **kwargs)

    #
    # Tokens
    #

    tokens = ('UNION', 'INTERSECTION', 'CIRCUMFLEX', 'DOLLAR', 'PERCENT',
              'EQUAL', 'COLON', 'TILDE', 'GT', 'GTE', 'LT', 'LTE', 'NOT',
              'WORD', 'TEXT', 'LEFT_PAR', 'RIGHT_PAR', 'EOL')

    t_UNION = '\|'
    t_INTERSECTION = '&'
    t_CIRCUMFLEX = '\^'
    t_DOLLAR = '\$'
    t_PERCENT = '%'
    t_EQUAL = '='
    t_COLON = ':'
    t_TILDE = '~'
    t_GT = '>'
    t_GTE = '>='
    t_LT = '<'
    t_LTE = '<='
    t_NOT = '!'
    t_WORD = '[^ |&^$%=:~>>=<<=!()]+'
    t_LEFT_PAR = '\('
    t_RIGHT_PAR = '\)'
    t_ignore = ' \t'

    def t_TEXT(self, token):
        r'(["]([\\]["]|[^"]|)*["]|[\']([\\][\']|[^\'])*[\'])'
        value = token.value[1:-1].replace('\\' + token.value[0], token.value[0])
        token.value = value
        return token

    def t_EOL(self, token):
        r'[\n]+'
        token.lexer.lineno += len(token.value)

    def t_error(self, token):
        raise ParsingError('Illegal character %r' % token.value[0],
                           line=self._lexer.lineno, column=self._lexer.lexpos)

    #
    # Public methods
    #

    def column(self, lexpos):
        """ Find the column according to the lexpos.
        """
        # This code is taken from the python-ply documentation
        # see: http://www.dabeaz.com/ply/ply.html section 4.6
        last_cr = self._current_input.rfind('\n', 0, lexpos)
        if last_cr < 0:
            last_cr = 0
        column = (lexpos - last_cr)
        return column

    #
    # Bindings to the internal _lexer object
    #

    def input(self, input):
        self._current_input = input
        return self._lexer.input(input)

    def __getattr__(self, name):
        attr = getattr(self._lexer, name)
        if attr is None:
            raise AttributeError("'%s' object has no attribute '%s'" % (self, name))
        else:
            return attr


class TqlParser(object):

    """ Parser for the TQL format.
    """

    tokens = TqlLexer.tokens

    precedence = (('left', 'UNION', 'INTERSECTION'),
                  ('right', 'CIRCUMFLEX', 'DOLLAR', 'PERCENT'))

    def __init__(self, input, **kwargs):
        self._input = input
        self._input_name = kwargs.pop('input_name', '<unknown>')
        self._lexer = kwargs.pop('lexer', TqlLexer())
        self._parser = yacc.yacc(module=self, **kwargs)

    #
    # Rules
    #

    start = 'input'

    def p_input(self, p):
        """input : expression
                 |"""
        if len(p) == 1:
            p[0] = TqlAst()
        else:
            p[0] = TqlAst(p[1])

    def p_value(self, p):
        """value : expanded_word
                 | TEXT"""
        p[0] = p[1]

    def p_expanded_word(self, p):
        """ expanded_word : WORD"""
        match = RE_UNIT.match(p[1])
        if match:
            try:
                num = int(match.group('num'))
            except ValueError:
                num = float(match.group('num'))
            word = num * UNITS[match.group('unit').lower()]
        else:
            word = p[1]
        p[0] = word

    def p_expression_filter(self, p):
        """expression : WORD filter value
                      | WORD NOT filter value"""
        if len(p) == 4:
            p[0] = Filter(p[1], p[2], p[3])
        elif len(p) == 5:
            p[0] = Filter(p[1], p[3], p[4], neg=True)

    def p_expression_filter_presence(self, p):
        """expression : WORD"""
        p[0] = FilterPresence(p[1])

    def p_expression_par(self, p):
        """expression : LEFT_PAR expression RIGHT_PAR"""
        p[0] = p[2]

    def p_expression_binary(self, p):
        """expression : expression UNION expression
                      | expression INTERSECTION expression"""
        if p[2] == '|':
            p[0] = UnionOperator(p[1], p[3])
        else:
            p[0] = IntersectionOperator(p[1], p[3])

    def p_expression_show(self, p):
        """expression : expression DOLLAR WORD"""
        p[0] = ShowOperator(p[1], p[3])

    def p_expression_sort(self, p):
        """expression : expression PERCENT WORD"""
        p[0] = SortOperator(p[1], p[3])

    def p_expression_limit(self, p):
        """expression : expression CIRCUMFLEX WORD
                      | expression CIRCUMFLEX COLON WORD
                      | expression CIRCUMFLEX WORD COLON WORD"""
        try:
            if len(p) == 4:
                p[0] = LimitOperator(p[1], start=int(p[3]))
            elif len(p) == 5:
                p[0] = LimitOperator(p[1], stop=int(p[4]))
            elif len(p) == 6:
                p[0] = LimitOperator(p[1], start=int(p[3]), stop=int(p[5]))
        except ValueError:
            column = self._lexer.column(p.lexpos(2))
            raise ParsingError('Syntax error near of "^": limit argument '
                               'must be an integer', column = column)

    def p_filter(self, p):
        """filter : EQUAL
                  | COLON
                  | TILDE
                  | GT
                  | GTE
                  | LT
                  | LTE"""
        p[0] = p[1]

    def p_error(self, token):
        column = self._lexer.column(token.lexpos)
        raise ParsingError('Syntax error near of "%s"' % token.value, column=column)

    #
    # Bindings to the internel _parser object
    #

    def parse(self):
        return self._parser.parse(self._input, self._lexer, tracking=True)

    def __getattr__(self, name):
        attr = getattr(self._parser, name)
        if attr is None:
            raise AttributeError("'%s' object has no attribute '%s'" % (self, name))
        else:
            return attr