lib/lexer.js (468 lines of code) (raw):
'use strict';
const {
isLetter, isDecimalDigit
} = require('./helper');
const {
Tag
} = require('./tag');
const Keyword = require('./keyword');
const {
Lexer: BaseLexer,
Token
} = require('@jacksontian/skyline');
const {
StringLiteral,
NumberLiteral,
Annotation,
Comment,
TemplateElement,
WordToken,
OperatorToken
} = require('./tokens');
class Lexer extends BaseLexer {
constructor(source, filename) {
super(source, filename);
this.reserve(new Keyword('import', Tag.IMPORT));
this.reserve(new Keyword('extends', Tag.EXTENDS));
this.reserve(new Keyword('super', Tag.SUPER));
this.reserve(new Keyword('const', Tag.CONST));
this.reserve(new Keyword('rpc', Tag.RPC));
this.reserve(new Keyword('static', Tag.STATIC));
// data types
this.reserve(new Keyword('class', Tag.TYPE));
this.reserve(new Keyword('void', Tag.TYPE));
this.reserve(new Keyword('string', Tag.TYPE));
this.reserve(new Keyword('number', Tag.TYPE));
this.reserve(new Keyword('integer', Tag.TYPE));
this.reserve(new Keyword('int8', Tag.TYPE));
this.reserve(new Keyword('int16', Tag.TYPE));
this.reserve(new Keyword('int32', Tag.TYPE));
this.reserve(new Keyword('int64', Tag.TYPE));
this.reserve(new Keyword('long', Tag.TYPE));
this.reserve(new Keyword('uint8', Tag.TYPE));
this.reserve(new Keyword('uint16', Tag.TYPE));
this.reserve(new Keyword('uint32', Tag.TYPE));
this.reserve(new Keyword('uint64', Tag.TYPE));
this.reserve(new Keyword('ulong', Tag.TYPE));
this.reserve(new Keyword('float', Tag.TYPE));
this.reserve(new Keyword('double', Tag.TYPE));
this.reserve(new Keyword('boolean', Tag.TYPE));
this.reserve(new Keyword('bytes', Tag.TYPE));
this.reserve(new Keyword('any', Tag.TYPE));
this.reserve(new Keyword('map', Tag.TYPE));
this.reserve(new Keyword('entry', Tag.TYPE));
this.reserve(new Keyword('object', Tag.TYPE));
this.reserve(new Keyword('writable', Tag.TYPE));
this.reserve(new Keyword('readable', Tag.TYPE));
this.reserve(new Keyword('asyncIterator', Tag.TYPE));
this.reserve(new Keyword('iterator', Tag.TYPE));
// boolean
this.reserve(new Keyword('true', Tag.BOOL));
this.reserve(new Keyword('false', Tag.BOOL));
// null
this.reserve(new Keyword('null', Tag.NULL));
this.reserve(new Keyword('if', Tag.IF));
this.reserve(new Keyword('else', Tag.ELSE));
this.reserve(new Keyword('return', Tag.RETURN));
this.reserve(new Keyword('yield', Tag.YIELD));
this.reserve(new Keyword('throw', Tag.THROW));
this.reserve(new Keyword('while', Tag.WHILE));
this.reserve(new Keyword('for', Tag.FOR));
this.reserve(new Keyword('break', Tag.BREAK));
this.reserve(new Keyword('var', Tag.VAR));
// module
this.reserve(new Keyword('new', Tag.NEW));
// try/catch/finally
this.reserve(new Keyword('try', Tag.TRY));
this.reserve(new Keyword('catch', Tag.CATCH));
this.reserve(new Keyword('finally', Tag.FINALLY));
// add $type to assign instance
this.reserve(new Keyword('$type', Tag.TYPE));
// the state for template string
this.inTemplate = false;
}
error(message) {
console.error(`${this.filename}:${this.line}:${this.column}`);
console.error(`${this.source.split('\n')[this.line - 1]}`);
console.error(`${' '.repeat(this.column - 1)}^`);
throw new SyntaxError(message);
}
loc() {
return {
line: this.line,
column: this.column
};
}
parseString() {
var quote = this.peek;
let str = '';
this.getch();
let start = this.loc();
var end;
for (; ;) {
if (this.peek === quote) {
end = this.loc();
this.getch();
break;
}
var c = this.peek;
if (this.peek === '\\') {
this.getch();
switch (this.peek) { // 解析转义字符
case '0':
c = '\0';
break;
case 'b':
c = '\b';
break;
case 't':
c = '\t';
break;
case 'n':
c = '\n';
break;
case 'v':
c = '\v';
break;
case 'f':
c = '\f';
break;
case 'r':
c = '\r';
break;
case '\'':
c = '\'';
break;
case '"':
c = '"';
break;
case '\\':
c = '\\';
break;
default:
this.error(`Invalid char: \\0x${this.peek}/'\\0x${this.peek.charCodeAt(0)}'`);
}
str += c;
this.getch();
} else if (this.peek) {
str += this.peek;
this.getch();
} else {
this.error('Unexpect end of file');
}
}
return new StringLiteral(str, {
start, end
});
}
parseTemplateString() {
this.getch();
var tpl = '';
let start = this.loc();
while (this.peek) {
if (this.peek === '$') {
if (this.readch(1) === '{') {
let end = this.loc();
// consume the '${'
this.getch();
this.getch();
return new TemplateElement(tpl, false, {
start, end
});
}
}
if (this.peek === '`') {
let end = this.loc();
this.inTemplate = false;
this.getch();
return new TemplateElement(tpl, true, {
start, end
});
}
tpl += this.peek;
this.getch();
}
this.error('Unexpect end of file');
}
decimalLit() {
let v = '';
if(isDecimalDigit(this.peek)){
do {
v += this.peek;
this.getch();
} while (isDecimalDigit(this.peek));
}
return v;
}
optionalFraction() {
let v = '';
if (this.peek === '.' && isDecimalDigit(this.readch(1))) {
v += this.peek;
this.getch();
v += this.decimalLit();
}
return v;
}
parseNumber() {
let start = this.loc();
let v = '';
let type = 'integer';
if (this.peek === '-') { //optionalSign
v += '-';
this.getch();
}
v += this.decimalLit();
let fraction = this.optionalFraction();
if (fraction) {
type = 'float';
}
v += fraction;
//optionalType
if (this.peek === 'f') {
this.getch();
type = 'float';
return new NumberLiteral(parseFloat(v), type, {
start,
end: this.loc()
});
} else if (this.peek === 'd') {
this.getch();
type = 'double';
return new NumberLiteral(parseFloat(v), type, {
start,
end: this.loc()
});
} else if (this.peek === 'L') {
this.getch();
type = 'long';
return new NumberLiteral(parseInt(v), type, {
start,
end: this.loc()
});
}
if (type === 'integer') {
return new NumberLiteral(parseInt(v), type, {
start,
end: this.loc()
});
}
return new NumberLiteral(parseFloat(v), type, {
start,
end: this.loc()
});
}
scan() {
this.skipWhitespaces();
let start = this.loc();
if (this.peek === '/') {
if (this.readch(1) === '/') {
// consume the //
this.getch();
this.getch();
// comments
let str = '//';
while (this.peek !== '\n' && this.peek) {
str += this.peek;
this.getch();
}
return new Comment(str, {
start: start,
end: this.loc()
});
}
if (this.readch(1) === '*') {
if(this.readch(2) === '*') {
// consume the /**
this.getch();
this.getch();
this.getch();
let str = '/**';
do {
str += this.peek;
this.getch();
} while (!(this.peek === '*' && this.readch(1) === '/')); // ends with '*/'
// consume */
str += '*/';
this.getch();
this.getch();
return new Annotation(str, {
start: start,
end: this.loc()
});
}
//only /*
this.error(`Only '//' or '/**' allowed`);
}
}
if (this.inTemplate) {
if (this.peek === '}') {
let str = '';
this.getch();
let start = this.loc();
while (this.peek) {
if (this.peek === '$') {
if (this.readch(1) === '{') {
let end = this.loc();
// consume '${'
this.getch();
this.getch();
return new TemplateElement(str, false, {
start, end
});
}
}
if (this.peek === '`') {
let end = this.loc();
this.inTemplate = false;
this.getch();
return new TemplateElement(str, true, {
start, end
});
}
str += this.peek;
this.getch();
}
this.error('Unexpect end of file');
}
}
if (this.peek === '+' && this.readch(1) === '+') {
// consume '++'
this.getch();
this.getch();
return new OperatorToken(Tag.INCREMENT, '++', {
start,
end: this.loc()
});
}
if (this.peek === '-' && this.readch(1) === '-') {
// consume '--'
this.getch();
this.getch();
return new OperatorToken(Tag.DECREMENT, '--', {
start,
end: this.loc()
});
}
if (this.peek === '=' && this.readch(1) === '=') {
// consume '=='
this.getch();
this.getch();
return new OperatorToken(Tag.EQ, '==', {
start,
end: this.loc()
});
}
if(this.peek === '!' && this.readch(1) === '=') {
// consume '!='
this.getch();
this.getch();
return new OperatorToken(Tag.NEQ, '!=', {
start,
end: this.loc()
});
}
if(this.peek === '>') {
this.getch();
if(this.peek === '=') {
// consume '>='
this.getch();
return new OperatorToken(Tag.GTE, '>=', {
start,
end: this.loc()
});
}
return new OperatorToken(Tag.GT, '>', {
start,
end: this.loc()
});
}
if(this.peek === '<') {
this.getch();
if(this.peek === '=') {
// consume '>='
this.getch();
return new OperatorToken(Tag.LTE, '<=', {
start,
end: this.loc()
});
}
return new OperatorToken(Tag.LT, '<', {
start,
end: this.loc()
});
}
switch (this.peek) {
case '\'':
case '"':
return this.parseString();
case '`': {
this.inTemplate = true;
return this.parseTemplateString();
}
}
// number = optionalSign decimalLit optionalFraction optionalType
// optionalFraction = .decimalLit | ε
// decimalLit = decimalDigit { decimalDigit }
// optionalType = "L" | "f" | "d" | ε
// decimalDigit = "0" … "9"
// optionalSign = "-" | ε
if (isDecimalDigit(this.peek) ||
(this.peek === '-' && isDecimalDigit(this.readch(1)))) {
return this.parseNumber();
}
if (isLetter(this.peek) || this.peek === '_' ||
this.peek === '$') {
let str = '';
do {
if(this.peek === '-' && !isLetter(this.readch(1))) {
break;
}
str += this.peek;
this.getch();
} while (isLetter(this.peek) ||
isDecimalDigit(this.peek) ||
this.peek === '_' ||
this.peek === '-');
// reserve words
if (this.words.has(str)) {
var keyword = this.words.get(str);
return new WordToken(keyword.tag, keyword.lexeme, {
start: start,
end: this.loc()
});
}
return new WordToken(Tag.ID, str, {
start: start,
end: this.loc()
});
}
if (this.peek === '@') {
const vid = this.parseVID();
if(this.peek === '('){
return new WordToken(Tag.NOTE, vid, {
start,
end: this.loc()
});
}
return new WordToken(Tag.VID, vid, {
start,
end: this.loc()
});
}
if (this.peek === '&') {
this.getch();
if (this.peek === '&') {
this.getch();
return new OperatorToken(Tag.AND, '&&', {
start,
end: this.loc()
});
}
this.error(`Unexpect ${this.peek} after '&', expect '&'`);
}
if (this.peek === '|') {
this.getch();
if (this.peek === '|') {
this.getch();
return new OperatorToken(Tag.OR, '||', {
start,
end: this.loc()
});
}
this.error(`Unexpect ${this.peek} after '|', expect '|'`);
}
var tok = new Token(this.peek, {
start,
end: this.loc()
});
this.peek = ' ';
return tok;
}
parseVID() {
let str = '@';
this.getch();
if (!isLetter(this.peek)) {
this.error(`Unexpect ${this.peek} after @`);
}
do {
str += this.peek;
this.getch();
} while (isLetter(this.peek) ||
isDecimalDigit(this.peek) ||
this.peek === '_');
return str;
}
}
module.exports = Lexer;