#include "lexal.h" #include "convert.h" Lexeme::Lexeme(Type t, const std::wstring &cont, int line, int pos) { type = t; content = cont; this->line = line; this->pos = pos; } std::wstring Lexeme::getPosStr() const { return Lexal::posToStr(line, pos); } Lexal::Lexal(UtfStreamReader &rdr): reader(rdr) { line = 1; pos = 0; } static bool isIdentStart(wchar_t ch) { return ((L'a' <= ch) && (L'z' >= ch)) || ((L'A' <= ch) && (L'Z' >= ch)) || (L'_' == ch); } static bool isIdentCont(wchar_t ch) { return ((L'a' <= ch) && (L'z' >= ch)) || ((L'A' <= ch) && (L'Z' >= ch)) || (L'_' == ch) || (L'.' == ch) || ((L'0' <= ch) && (L'9' >= ch)); } static bool isWhiteSpace(wchar_t ch) { return (L' ' == ch) || (L'\t' == ch) || (L'\n' == ch) || (L'\r' == ch); } static bool isDigit(wchar_t ch) { return (L'0' <= ch) && (L'9' >= ch); } static bool isSymbol(wchar_t ch) { return (L'{' == ch) || (L'}' == ch) || (L',' == ch) || (L'=' == ch) || (L';' == ch); } static bool isQuote(wchar_t ch) { return (L'\'' == ch) || (L'"' == ch); } std::wstring Lexal::posToStr(int line, int pos) { return L"(" + toString(line) + L":" + toString(pos) + L")"; } Lexeme Lexal::getNext() { skipSpaces(); if (reader.isEof()) return Lexeme(Lexeme::Eof, L"", line, pos); int startLine = line; int startPos = pos; wchar_t ch = reader.getNextChar(); pos++; if (isIdentStart(ch)) return readIdent(startLine, startPos, ch); else if (isDigit(ch)) return readNumber(startLine, startPos, ch); else if (isQuote(ch)) return readString(startLine, startPos, ch); else if (isSymbol(ch)) return Lexeme(Lexeme::Symbol, toString(ch), startLine, startPos); throw Exception(L"Invalid character at "+ posToStr(startLine, startPos)); } Lexeme Lexal::readString(int startLine, int startPos, wchar_t quote) { std::wstring str; bool closed = false; while (! reader.isEof()) { wchar_t ch = reader.getNextChar(); pos++; if ('\n' == ch) { line++; pos = 0; } else if ('\\' == ch) { if (! reader.isEof()) { wchar_t nextCh = reader.getNextChar(); if (isWhiteSpace(nextCh)) throw Exception(L"Invalid escape sequence at " + posToStr(line, pos)); pos++; switch (nextCh) { case L'\t': str += L'\t'; break; case L'\n': str += L'\n'; break; case L'\r': str += L'\r'; break; default: str += nextCh; } } } else if (quote == ch) { closed = true; break; } else str += ch; } if (! closed) throw Exception(L"String at " + posToStr(startLine, startPos) + L" doesn't closed"); return Lexeme(Lexeme::String, str, startLine, startPos); } Lexeme Lexal::readNumber(int startLine, int startPos, wchar_t first) { std::wstring number; number += first; Lexeme::Type type = Lexeme::Integer; while (! reader.isEof()) { wchar_t ch = reader.getNextChar(); pos++; if (isDigit(ch)) number += ch; else if (L'.' == ch) { if (Lexeme::Integer == type) { type = Lexeme::Float; number += ch; } else throw Exception(L"To many dots in number at " + posToStr(line, pos)); } else if ((! isSymbol(ch)) && (! isWhiteSpace(ch))) throw Exception(L"invalid number at " + posToStr(line, pos)); else { pos--; reader.ungetChar(ch); break; } } if (L'.' == number[number.length() - 1]) throw Exception(L"Missing digit after dot at " + posToStr(line, pos)); return Lexeme(type, number, startLine, startPos); } Lexeme Lexal::readIdent(int startLine, int startPos, wchar_t first) { std::wstring ident; ident += first; while (! reader.isEof()) { wchar_t ch = reader.getNextChar(); if (! isIdentCont(ch)) { reader.ungetChar(ch); break; } ident += ch; pos++; } return Lexeme(Lexeme::Ident, ident, startLine, startPos); } void Lexal::skipToLineEnd() { while (! reader.isEof()) { wchar_t ch = reader.getNextChar(); pos++; if ('\n' == ch) { pos = 0; line++; return; } } } void Lexal::skipMultilineComment(int startLine, int startPos) { while (! reader.isEof()) { wchar_t ch = reader.getNextChar(); pos++; if ('\n' == ch) { pos = 0; line++; } else if (('*' == ch) && (! reader.isEof())) { wchar_t nextCh = reader.getNextChar(); if ('/' != nextCh) reader.ungetChar(nextCh); } } throw Exception(L"Remark started at " + posToStr(startLine, startPos) + L" is not closed"); } void Lexal::skipSpaces() { while (! reader.isEof()) { wchar_t ch = reader.getNextChar(); pos++; if (! isWhiteSpace(ch)) { if ('#' == ch) skipToLineEnd(); else { bool finish = false; if (('/' == ch) && (! reader.isEof())) { wchar_t nextCh = reader.getNextChar(); pos++; if ('/' == nextCh) skipToLineEnd(); else if ('*' == nextCh) skipMultilineComment(line, pos); else { pos--; reader.ungetChar(nextCh); finish = true; } } else finish = true; if (finish) { pos--; reader.ungetChar(ch); return; } } } else if ('\n' == ch) { pos = 0; line++; } } }