1 Star 0 Fork 0

盲大人/slang

加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
lexer.c 12.32 KB
一键复制 编辑 原始数据 按行查看 历史
盲大人 提交于 2015-11-06 11:32 . 修复lexer中的bug
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801
#include "lexer.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <memory.h>
#include "lexer.h"
/*****************************
digit 0-9
letter a-z|A-Z|_
letterOrdigit digit|letter
identify letter letterOrdigit*
number 0 | [1-9] digit*
operator +|-|*|/|%|=
| =>|(|)|{|} | >| <|>=|<=|==|!=
| ,
key var | print
|if|else |do |while |for
|def | return |break |continue
| true |false
comment_line //. newline
comment_block / *.* /
float number.digit+
string '.'| "."
**************************/
Lexer *createLexer()
{
Lexer *lexer;
lexer = (Lexer*)calloc(sizeof(Lexer), 1);
return lexer;
}
Lexer *initLexer(Lexer *lexer, char *buffer, int size)
{
assert(lexer);
lexer->buffer = buffer;
lexer->size = size;
lexer->pos = 0;
resetLocation(&(lexer->location));
return lexer;
}
Lexer *initLexerByFile(Lexer *lexer,FILE *fl)
{
assert(lexer);
assert(fl);
fseek(fl,0,SEEK_END);
long int size;
size=ftell(fl);
if(size>1024*1024*256){
printf("文件太长,超过256m");
return 0;
}
char *buffer;
buffer=(char*)malloc(size*sizeof(char));
fseek(fl,0,SEEK_SET);
if(fread(buffer,1,size,fl)!=size){
printf("文件读取错误.\n");
return 0;
}
return initLexer(lexer,buffer,(int)size);
}
Token *endToken(Lexer *self)
{
Token *token;
token = createToken();
return initToken(token, TKOPT_END, &(self->location), 0);
}
Token *readNKToken(Lexer *self)
{
Token *token;
token = readLexerToken(self);
if(token)
{
if(self->hasComment == 0)
{
if(token->type == TK_COMMENT)
{
freeToken(token);
return readToken(self);
}
}
return token;
}
printLexerError(self, "无法读取出记号", 0);
return 0;
}
Token *readToken(Lexer *self)
{
Token *token;
token=readNKToken(self);
if(token){
checkKeyToken(token);
}
return token;
}
Token *readLexerToken(Lexer *self)
{
assert(self);
if(!readChar(self))
{
//表示读取结束
Token *token;
token = createToken();
return initToken(token, TK_END, &(self->location), 0);
}
if(isSkip(self->ch))
{
return readLexerToken(self);
}
if(isNewLine(self->ch))
{
return readLexerToken(self);
}
if(isLetter(self->ch))
{
return readIdentify(self);
}
if(isDigit(self->ch))
{
return readNumber(self);
}
if(self->ch == '\'')
{
return readStringSingle(self);
}
if(self->ch == '"')
{
return readString(self);
}
return readOptToken(self);
}
int readChar(Lexer *self)
{
if(self->pos >= self->size)
{
return 0;
}
self->ch = self->buffer[self->pos++];
if(isNewLine(self->ch))
{
self->column = self->location.column;
enterLocation(&(self->location));
}
else
{
moveLocation(&(self->location));
}
return 1;
}
void ungetChar(Lexer *self)
{
self->pos--;
if(self->location.column == 0)
{
self->location.line--;
self->location.column = self->column;
}
else
{
self->location.column--;
}
}
Token *readIdentify(Lexer *self)
{
Token *token;
token = createToken();
char *start;
int len = 0;
start = self->buffer + self->pos - 1;
initToken(token, TK_IDENTIFY, &(self->location), 0);
token->location.column--;
while(readChar(self))
{
if(isLetterOrDigit(self->ch))
{
len++;
}
else
{
ungetChar(self);
break;
}
}
len++;
char *buffer = malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
return token;
}
Token *readNumber(Lexer *self)
{
Token *token;
token = createToken();
char *start;
int len = 0;
start = self->buffer + self->pos-1;
initToken(token, TK_NUMBER, &(self->location), 0);
token->location.column--;
while(readChar(self))
{
if(isDigit(self->ch))
{
len++;
}
else if(isLetter(self->ch))
{
printLexerError(self, "不合法的数值.数值中含有非法字符", 0);
}
else
{
token->text = start;
if(self->ch == '.')
{
return readFloat(self, token, len);
}
ungetChar(self);
break;
}
}
len++;
char *buffer = malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
return token;
}
Token *readFloat(Lexer *self, Token *token, int len)
{
char *start;
start = token->text;
//token->text[len] = self->ch;
token->type = TK_FLOAT;
len++;
while(readChar(self))
{
if(isDigit(self->ch))
{
len++;
}
else if(isLetter(self->ch))
{
printLexerError(self, "不合法的浮点数.浮点中含有非法字符", 0);
}
else if(self->ch == '.')
{
printLexerError(self, "不合法的浮点数.浮点中有多个.", 0);
}
else
{
ungetChar(self);
break;
}
}
len++;
char *buffer = malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
token->type = TK_FLOAT;
return token;
}
Token *readOptToken(Lexer *self)
{
TokenType type;
type=TK_NONE;
Token *token;
token = createToken();
initToken(token, type, &(self->location), 0);
switch (self->ch)
{
case '+':
{
if(readChar(self))
{
if(self->ch == '+')
{
type = TKOPT_INC;
break;
}
else if(self->ch == '=')
{
type = TKOPT_ADDASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_ADD;
break;
}
case '-':
{
if(readChar(self))
{
if(self->ch == '-')
{
type = TKOPT_DEC;
break;
}
else if(self->ch == '=')
{
type = TKOPT_SUBASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_SUB;
break;
}
case '*':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_MULASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_MUL;
break;
}
case '/':
{
if(readChar(self))
{
if(self->ch == '/')
{
return readCommentLine(self);
}
else if(self->ch == '*')
{
return readCommentBlock(self);
}
else if(self->ch == '=')
{
type = TKOPT_DIVASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_DIV;
break;
}
case '%':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_MODASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_MOD;
break;
}
case '=':
{
if(readChar(self))
{
if(self->ch == '>')
{
type = TKOPT_TO;
break;
}
else if(self->ch == '=')
{
type = TKOPT_EQUAL;
break;
}
ungetChar(self);
}
type = TKOPT_ASSIGN;
break;
}
case '&':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_BITANDASSIGN;
break;
}
else if(self->ch == '&')
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_ANDASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_AND;
break;
}
ungetChar(self);
}
type = TKOPT_BITAND;
break;
}
case '|':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_BITORASSIGN;
break;
}
else if(self->ch == '|')
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_ORASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_OR;
break;
}
ungetChar(self);
}
type = TKOPT_BITOR;
break;
}
case '~':
{
type = TKOPT_BITNOT;
break;
}
case '^':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_BITXORASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_BITXOR;
break;
}
case '>':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_MORETHAN;
break;
}
else if(self->ch == '>')
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_SHIFTRIGHTASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_SHIFTRIGHT;
break;
}
ungetChar(self);
}
type = TKOPT_MORE;
break;
}
case '<':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_LESSTHAN;
break;
}
else if(self->ch == '<')
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_SHIFTLEFTASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_SHIFTLEFT;
break;
}
ungetChar(self);
}
type = TKOPT_LESS;
break;
}
case '!':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_NOTEQUAL;
break;
}
ungetChar(self);
}
token->type = TKOPT_NOT;
break;
}
case '(':
{
type = TKOPT_LEFTBRACKET;
break;
}
case ')':
{
type = TKOPT_RIGHTBRACKET;
break;
}
case '{':
{
type = TKOPT_LEFTBLOCK;
break;
}
case '}':
{
type = TKOPT_RIGHTBLOCK;
break;
}
case '[':
{
type = TKOPT_LEFTINDEX;
break;
}
case ']':
{
type = TKOPT_RIGHTINDEX;
break;
}
case ':':
{
type = TKOPT_AS;
break;
}
case ',':
{
type = TKOPT_COMMA;
break;
}
case ';':
{
type = TKOPT_END;
break;
}
case '.':
{
type = TKOPT_DOT;
break;
}
default:
{
token->type = TK_IDENTIFY;
char buff[32];
token->text = buff;
sprintf(buff, "(%x)", self->ch);
printLexerError(self, "未知的字符 ", token);
}
}
token->type = type;
return token;
}
Token *readCommentLine(Lexer *self)
{
Token *token;
token = createToken();
char *buffer;
int len = 0;
char *start;
start = self->buffer + self->pos;
initToken(token, TK_COMMENT, &(self->location), 0);
token->location.column -= 2;
while(readChar(self))
{
if(isNewLine(self->ch))
{
break;
}
len++;
}
buffer = (char*)malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
return token;
}
Token *readCommentBlock(Lexer *self)
{
Token *token;
token = createToken();
char *buffer;
int len = 0;
char *start;
start = self->buffer + self->pos;
initToken(token, TK_COMMENT, &(self->location), 0);
token->location.column -= 2;
while(readChar(self))
{
if(self->ch == '*')
{
len++;
if(!readChar(self))
{
break;
}
if(self->ch == '/')
{
buffer = (char*)malloc(sizeof(char) * (len));
memcpy(buffer, start, len - 1);
buffer[len - 1] = '\0';
token->text = buffer;
return token;
}
}
len++;
}
printLexerError(self, "块注释后面缺少*/结尾", 0);
return 0;
}
Token *readString(Lexer *self)
{
Token *token;
token = createToken();
char *buffer;
int len = 0;
char *start;
start = self->buffer + self->pos;
initToken(token, TK_STRING, &(self->location), 0);
token->location.column -= 1;
while(readChar(self))
{
if(self->ch == '"')
{
buffer = (char*)malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
//对缓冲进行字符串处理
handleStringToken(token);
return token;
}
if(self->ch == '\'')
{
len++;
if(!readChar(self))
{
break;
}
}
len++;
}
printLexerError(self, "字符串后面缺少\"结尾", 0);
return 0;
}
Token *readStringSingle(Lexer *self)
{
Token *token;
token = createToken();
char *buffer;
int len = 0;
char *start;
start = self->buffer + self->pos;
initToken(token, TK_STRING, &(self->location), 0);
token->location.column -= 1;
while(readChar(self))
{
if(self->ch == '\'')
{
buffer = (char*)malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
return token;
}
if(self->ch == '\'')
{
len++;
if(!readChar(self))
{
break;
}
}
len++;
}
printLexerError(self, "字符串后面缺少'结尾", 0);
return 0;
}
inline int isDigit(char c)
{
return '0' <= c && c <= '9';
}
inline int isDigitOct(char c)
{
return '0' <= c && c <= '7';
}
inline int isDigitHex(char c)
{
return isDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
}
inline int isLetter(char c)
{
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_';
}
inline int isLetterOrDigit(char c)
{
return isLetter(c) || isDigit(c);
}
inline int isSkip(char c)
{
//window下的换行,unix下不用,不适用于mac
if(c == '\r')
return 1;
if( c == '\t' || c == '\v' || c == ' ')
{
return 1;
}
return 0;
}
inline int isNewLine(char c)
{
//下个字符
if(c == '\n')
{
return 1;
}
return 0;
}
void printLexerError(Lexer *lexer, char *text, Token *token)
{
assert(text);
assert(lexer);
printf("词法错误[行: %d]: %s\n", lexer->location.line, text);
if(token)
{
printToken(token);
}
exit(1);
}
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化
C
1
https://gitee.com/xkaying/slang.git
[email protected]:xkaying/slang.git
xkaying
slang
slang
master

搜索帮助