from dataclasses import dataclass
from typing import List
from abc import ABC, abstractmethod
import enum


class TokenType(enum.Enum):
    ALL = 1
    COMMA = 2
    SEMICOLON = 3

    STRING = 4

    SELECT = 5

@dataclass
class Token:
    tokenType: TokenType
    lexeme: str

    def __str__(self):
        return str(self.tokenType) + ' ' + self.lexeme

    def __repr__(self):
        return self.__str__()


@dataclass
class Place:
    name: str
    
    def __str__(self):
        return self.name

    def __repr__(self):
        return self.__str__()

class PlaceExpression(ABC):
    pass

class AllColumns(PlaceExpression):
    def __str__(self):
        return '*'

    def __repr__(self):
        return self.__str__()

@dataclass
class Places(PlaceExpression):
    columns: List[Place]
    
    def __str__(self):
        return ', '.join([str(p) for p in self.columns])

    def __repr__(self):
        return self.__str__()

@dataclass
class Expression:
    from_columns: PlaceExpression

    def __str__(self):
        return 'SELECT {\n  ' + str(self.from_columns) + '\n}'

    def __repr__(self):
        return self.__str__()





from typing import List
import word_letters
import re

class Scanner:
    def scan_token(self, token):
        if token == '*':
            return word_letters.Token(word_letters.TokenType.ALL, '*')
        elif token == ',':
            return word_letters.Token(word_letters.TokenType.COMMA, ',')
        elif token == ';':
            return word_letters.Token(word_letters.TokenType.SEMICOLON, ';')
        elif token == 'SELECT':
            return word_letters.Token(word_letters.TokenType.SELECT, 'SELECT')

        return word_letters.Token(word_letters.TokenType.STRING, token)

    def scan_tokens(self, tokens) -> List[word_letters.Token]:
        string = re.sub(r'(?<=[,;])(?=[^\s])', r' ', tokens)
        string = re.sub(r'(?<=[^\s])(?=[,;])', r' ', string)
        string = string.split(' ')
        return [self.scan_token(t) for t in string]

class Parser:
    def places(self, tokens: List[word_letters.Token]):
        columns = []
        for i, token in enumerate(tokens):
            if (i % 2 == 0 and token.tokenType != word_letters.TokenType.STRING) or (i % 2 == 1 and token.tokenType != word_letters.TokenType.COMMA):
                raise Exception()
            if i % 2 == 0:
                columns.append(word_letters.Place(token.lexeme))
            
        return word_letters.Places(columns=columns)
    
    def place_expression(self, tokens: List[word_letters.Token]):
        if len(tokens) % 2 == 0:
            raise Exception()
        
        if tokens[0].tokenType == word_letters.TokenType.ALL: 
            return word_letters.AllColumns()

        return self.places(tokens)
        
    def expression(self, tokens: List[word_letters.Token]):
        return word_letters.Expression(self.place_expression(tokens))
        

    def parse(self, tokens: List[word_letters.Token]):
        if tokens[0].tokenType == word_letters.TokenType.SELECT:
            tokens = tokens[1:]
            return self.expression(tokens)
        else:
            raise Exception()
        


scanner = Scanner()
parser = Parser()

tokens = scanner.scan_tokens('SELECT first_name, second_name')
print(parser.parse(tokens))

 Public
Share a link to this review

4.55% issue ratio

R61 Not using f-strings

F-strings are powerful and very easy to read. Compare: 'text ' + str(number) + ' text' vs f'text {number} text'

L12 Redundant code / overengineering

This code is not really needed or may be simplified

Suggested change:
return ', '.join(str(p) for p in self.columns)
R6 Copy-paste

Copy-paste lead to errors very, very often because developers forget to change value on one of copy-pasted lines . You should avoid it as much as possible. Usually a good solution is to extract the things that differ into separate variables.

Could've used mapping

Suggested change:
token_to_type = {
    '*': word_letters.TokenType.ALL,
    ',': word_letters.TokenType.COMMA,
    ...
}
token_type = token_to_type.get(token, word_letters.TokenType.STRING)
return word_letters.Token(token_type, token)
R1 Missing type hints

Type hints help humans and linters (like mypy) to understand what to expect "in" and "out" for a function. Not only it serves as a documentation for others (and you after some time, when the code is wiped from your "brain cache"), but also allows using automated tools to find type errors.

Missing return type hint.

L9 Bad design

Seems like things could be organized in a better way.

If you want to require even/odd tokens be of types string/comma, you could just write exactly that.

Suggested change:
string_tokens = token[::2]
if not all(token.tokenType == word_letters.TokenType.STRING for token in string_tokens):
    raise ValueError('Wrong token type')

comma_tokens = token[1::2]
if not all(token.tokenType == word_letters.TokenType.COMMA for token in comma_tokens):
    raise ValueError('Wrong token type')

columns = [word_letters.Place(token.lexeme) for token in string_tokens]
return word_letters.Places(columns=columns)
Nice!
You did a great job avoiding this case. Many developers don't.
R4 Range-based iteration

Using len and range in python's for loop smells. Idiomatic python iteration looks like for element in collection. If you need element's index as well, use for i, element in enumerate(collection).

L39 Using generic exception

Exceptions should be easy to catch. If your code throws only Exception or ValueError, then it's very hard to catch specific errors, because all thrown exception classes are the same. Create application-specific exceptions, so that every logical error has its own exception class: class VerySpecificException(Exception): pass


Create new review request