from dataclasses import dataclass from typing import List from abc import ABC, abstractmethod import enum class TokenType(enum.Enum): ALL = 1 COMMA = 2 SEMICOLON = 3 STRING = 4 SELECT = 5 @dataclass class Token: tokenType: TokenType lexeme: str def __str__(self): return str(self.tokenType) + ' ' + self.lexeme def __repr__(self): return self.__str__() @dataclass class Place: name: str def __str__(self): return self.name def __repr__(self): return self.__str__() class PlaceExpression(ABC): pass class AllColumns(PlaceExpression): def __str__(self): return '*' def __repr__(self): return self.__str__() @dataclass class Places(PlaceExpression): columns: List[Place] def __str__(self): return ', '.join([str(p) for p in self.columns]) def __repr__(self): return self.__str__() @dataclass class Expression: from_columns: PlaceExpression def __str__(self): return 'SELECT {\n ' + str(self.from_columns) + '\n}' def __repr__(self): return self.__str__() from typing import List import word_letters import re class Scanner: def scan_token(self, token): if token == '*': return word_letters.Token(word_letters.TokenType.ALL, '*') elif token == ',': return word_letters.Token(word_letters.TokenType.COMMA, ',') elif token == ';': return word_letters.Token(word_letters.TokenType.SEMICOLON, ';') elif token == 'SELECT': return word_letters.Token(word_letters.TokenType.SELECT, 'SELECT') return word_letters.Token(word_letters.TokenType.STRING, token) def scan_tokens(self, tokens) -> List[word_letters.Token]: string = re.sub(r'(?<=[,;])(?=[^\s])', r' ', tokens) string = re.sub(r'(?<=[^\s])(?=[,;])', r' ', string) string = string.split(' ') return [self.scan_token(t) for t in string] class Parser: def places(self, tokens: List[word_letters.Token]): columns = [] for i, token in enumerate(tokens): if (i % 2 == 0 and token.tokenType != word_letters.TokenType.STRING) or (i % 2 == 1 and token.tokenType != word_letters.TokenType.COMMA): raise Exception() if i % 2 == 0: columns.append(word_letters.Place(token.lexeme)) return word_letters.Places(columns=columns) def place_expression(self, tokens: List[word_letters.Token]): if len(tokens) % 2 == 0: raise Exception() if tokens[0].tokenType == word_letters.TokenType.ALL: return word_letters.AllColumns() return self.places(tokens) def expression(self, tokens: List[word_letters.Token]): return word_letters.Expression(self.place_expression(tokens)) def parse(self, tokens: List[word_letters.Token]): if tokens[0].tokenType == word_letters.TokenType.SELECT: tokens = tokens[1:] return self.expression(tokens) else: raise Exception() scanner = Scanner() parser = Parser() tokens = scanner.scan_tokens('SELECT first_name, second_name') print(parser.parse(tokens))
Public
F-strings are powerful and very easy to read. Compare: 'text ' + str(number) + ' text'
vs f'text {number} text'
This code is not really needed or may be simplified
return ', '.join(str(p) for p in self.columns)
Copy-paste lead to errors very, very often because developers forget to change value on one of copy-pasted lines . You should avoid it as much as possible. Usually a good solution is to extract the things that differ into separate variables.
Could've used mapping
Suggested change:token_to_type = { '*': word_letters.TokenType.ALL, ',': word_letters.TokenType.COMMA, ... } token_type = token_to_type.get(token, word_letters.TokenType.STRING) return word_letters.Token(token_type, token)
Type hints help humans and linters (like mypy
) to understand what to expect "in" and "out" for a function. Not only it serves as a documentation for others (and you after some time, when the code is wiped from your "brain cache"), but also allows using automated tools to find type errors.
Missing return type hint.
Seems like things could be organized in a better way.
If you want to require even/odd tokens be of types string/comma, you could just write exactly that.
Suggested change:string_tokens = token[::2] if not all(token.tokenType == word_letters.TokenType.STRING for token in string_tokens): raise ValueError('Wrong token type') comma_tokens = token[1::2] if not all(token.tokenType == word_letters.TokenType.COMMA for token in comma_tokens): raise ValueError('Wrong token type') columns = [word_letters.Place(token.lexeme) for token in string_tokens] return word_letters.Places(columns=columns)
Using len
and range
in python's for
loop smells. Idiomatic python iteration looks like for element in collection
. If you need element's index as well, use for i, element in enumerate(collection)
.
Exceptions should be easy to catch. If your code throws only Exception
or ValueError
, then it's very hard to catch specific errors, because all thrown exception classes are the same. Create application-specific exceptions, so that every logical error has its own exception class: class VerySpecificException(Exception): pass
Create new review request