einstein/lexal.cpp

249 lines
6.2 KiB
C++

#include "lexal.h"
#include "convert.h"
Lexeme::Lexeme(Type t, const std::wstring &cont, int line, int pos)
{
type = t;
content = cont;
this->line = line;
this->pos = pos;
}
std::wstring Lexeme::getPosStr() const
{
return Lexal::posToStr(line, pos);
}
Lexal::Lexal(UtfStreamReader &rdr): reader(rdr)
{
line = 1;
pos = 0;
}
static bool isIdentStart(wchar_t ch) {
return ((L'a' <= ch) && (L'z' >= ch)) || ((L'A' <= ch) && (L'Z' >= ch))
|| (L'_' == ch);
}
static bool isIdentCont(wchar_t ch) {
return ((L'a' <= ch) && (L'z' >= ch)) || ((L'A' <= ch) && (L'Z' >= ch))
|| (L'_' == ch) || (L'.' == ch) || ((L'0' <= ch) && (L'9' >= ch));
}
static bool isWhiteSpace(wchar_t ch) {
return (L' ' == ch) || (L'\t' == ch) || (L'\n' == ch) || (L'\r' == ch);
}
static bool isDigit(wchar_t ch) {
return (L'0' <= ch) && (L'9' >= ch);
}
static bool isSymbol(wchar_t ch) {
return (L'{' == ch) || (L'}' == ch) || (L',' == ch) || (L'=' == ch)
|| (L';' == ch);
}
static bool isQuote(wchar_t ch) {
return (L'\'' == ch) || (L'"' == ch);
}
std::wstring Lexal::posToStr(int line, int pos)
{
return L"(" + toString(line) + L":" + toString(pos) + L")";
}
Lexeme Lexal::getNext()
{
skipSpaces();
if (reader.isEof())
return Lexeme(Lexeme::Eof, L"", line, pos);
int startLine = line;
int startPos = pos;
wchar_t ch = reader.getNextChar();
pos++;
if (isIdentStart(ch))
return readIdent(startLine, startPos, ch);
else if (isDigit(ch))
return readNumber(startLine, startPos, ch);
else if (isQuote(ch))
return readString(startLine, startPos, ch);
else if (isSymbol(ch))
return Lexeme(Lexeme::Symbol, toString(ch), startLine, startPos);
throw Exception(L"Invalid character at "+ posToStr(startLine, startPos));
}
Lexeme Lexal::readString(int startLine, int startPos, wchar_t quote)
{
std::wstring str;
bool closed = false;
while (! reader.isEof()) {
wchar_t ch = reader.getNextChar();
pos++;
if ('\n' == ch) {
line++;
pos = 0;
} else if ('\\' == ch) {
if (! reader.isEof()) {
wchar_t nextCh = reader.getNextChar();
if (isWhiteSpace(nextCh))
throw Exception(L"Invalid escape sequence at " +
posToStr(line, pos));
pos++;
switch (nextCh) {
case L'\t': str += L'\t'; break;
case L'\n': str += L'\n'; break;
case L'\r': str += L'\r'; break;
default:
str += nextCh;
}
}
} else if (quote == ch) {
closed = true;
break;
} else
str += ch;
}
if (! closed)
throw Exception(L"String at " + posToStr(startLine, startPos)
+ L" doesn't closed");
return Lexeme(Lexeme::String, str, startLine, startPos);
}
Lexeme Lexal::readNumber(int startLine, int startPos, wchar_t first)
{
std::wstring number;
number += first;
Lexeme::Type type = Lexeme::Integer;
while (! reader.isEof()) {
wchar_t ch = reader.getNextChar();
pos++;
if (isDigit(ch))
number += ch;
else if (L'.' == ch) {
if (Lexeme::Integer == type) {
type = Lexeme::Float;
number += ch;
} else
throw Exception(L"To many dots in number at " +
posToStr(line, pos));
} else if ((! isSymbol(ch)) && (! isWhiteSpace(ch)))
throw Exception(L"invalid number at " + posToStr(line, pos));
else {
pos--;
reader.ungetChar(ch);
break;
}
}
if (L'.' == number[number.length() - 1])
throw Exception(L"Missing digit after dot at " + posToStr(line, pos));
return Lexeme(type, number, startLine, startPos);
}
Lexeme Lexal::readIdent(int startLine, int startPos, wchar_t first)
{
std::wstring ident;
ident += first;
while (! reader.isEof()) {
wchar_t ch = reader.getNextChar();
if (! isIdentCont(ch)) {
reader.ungetChar(ch);
break;
}
ident += ch;
pos++;
}
return Lexeme(Lexeme::Ident, ident, startLine, startPos);
}
void Lexal::skipToLineEnd()
{
while (! reader.isEof()) {
wchar_t ch = reader.getNextChar();
pos++;
if ('\n' == ch) {
pos = 0;
line++;
return;
}
}
}
void Lexal::skipMultilineComment(int startLine, int startPos)
{
while (! reader.isEof()) {
wchar_t ch = reader.getNextChar();
pos++;
if ('\n' == ch) {
pos = 0;
line++;
} else if (('*' == ch) && (! reader.isEof())) {
wchar_t nextCh = reader.getNextChar();
if ('/' != nextCh)
reader.ungetChar(nextCh);
}
}
throw Exception(L"Remark started at " + posToStr(startLine, startPos)
+ L" is not closed");
}
void Lexal::skipSpaces()
{
while (! reader.isEof()) {
wchar_t ch = reader.getNextChar();
pos++;
if (! isWhiteSpace(ch)) {
if ('#' == ch)
skipToLineEnd();
else {
bool finish = false;
if (('/' == ch) && (! reader.isEof())) {
wchar_t nextCh = reader.getNextChar();
pos++;
if ('/' == nextCh)
skipToLineEnd();
else if ('*' == nextCh)
skipMultilineComment(line, pos);
else {
pos--;
reader.ungetChar(nextCh);
finish = true;
}
} else
finish = true;
if (finish) {
pos--;
reader.ungetChar(ch);
return;
}
}
} else
if ('\n' == ch) {
pos = 0;
line++;
}
}
}