代码拉取完成,页面将自动刷新
#include "lexer.h"
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <memory.h>
#include "lexer.h"
/*****************************
digit 0-9
letter a-z|A-Z|_
letterOrdigit digit|letter
identify letter letterOrdigit*
number 0 | [1-9] digit*
operator +|-|*|/|%|=
| =>|(|)|{|} | >| <|>=|<=|==|!=
| ,
key var | print
|if|else |do |while |for
|def | return |break |continue
| true |false
comment_line //. newline
comment_block / *.* /
float number.digit+
string '.'| "."
**************************/
Lexer *createLexer()
{
Lexer *lexer;
lexer = (Lexer*)calloc(sizeof(Lexer), 1);
return lexer;
}
Lexer *initLexer(Lexer *lexer, char *buffer, int size)
{
assert(lexer);
lexer->buffer = buffer;
lexer->size = size;
lexer->pos = 0;
resetLocation(&(lexer->location));
return lexer;
}
Lexer *initLexerByFile(Lexer *lexer,FILE *fl)
{
assert(lexer);
assert(fl);
fseek(fl,0,SEEK_END);
long int size;
size=ftell(fl);
if(size>1024*1024*256){
printf("文件太长,超过256m");
return 0;
}
char *buffer;
buffer=(char*)malloc(size*sizeof(char));
fseek(fl,0,SEEK_SET);
if(fread(buffer,1,size,fl)!=size){
printf("文件读取错误.\n");
return 0;
}
return initLexer(lexer,buffer,(int)size);
}
Token *endToken(Lexer *self)
{
Token *token;
token = createToken();
return initToken(token, TKOPT_END, &(self->location), 0);
}
Token *readNKToken(Lexer *self)
{
Token *token;
token = readLexerToken(self);
if(token)
{
if(self->hasComment == 0)
{
if(token->type == TK_COMMENT)
{
freeToken(token);
return readToken(self);
}
}
return token;
}
printLexerError(self, "无法读取出记号", 0);
return 0;
}
Token *readToken(Lexer *self)
{
Token *token;
token=readNKToken(self);
if(token){
checkKeyToken(token);
}
return token;
}
Token *readLexerToken(Lexer *self)
{
assert(self);
if(!readChar(self))
{
//表示读取结束
Token *token;
token = createToken();
return initToken(token, TK_END, &(self->location), 0);
}
if(isSkip(self->ch))
{
return readLexerToken(self);
}
if(isNewLine(self->ch))
{
return readLexerToken(self);
}
if(isLetter(self->ch))
{
return readIdentify(self);
}
if(isDigit(self->ch))
{
return readNumber(self);
}
if(self->ch == '\'')
{
return readStringSingle(self);
}
if(self->ch == '"')
{
return readString(self);
}
return readOptToken(self);
}
int readChar(Lexer *self)
{
if(self->pos >= self->size)
{
return 0;
}
self->ch = self->buffer[self->pos++];
if(isNewLine(self->ch))
{
self->column = self->location.column;
enterLocation(&(self->location));
}
else
{
moveLocation(&(self->location));
}
return 1;
}
void ungetChar(Lexer *self)
{
self->pos--;
if(self->location.column == 0)
{
self->location.line--;
self->location.column = self->column;
}
else
{
self->location.column--;
}
}
Token *readIdentify(Lexer *self)
{
Token *token;
token = createToken();
char *start;
int len = 0;
start = self->buffer + self->pos - 1;
initToken(token, TK_IDENTIFY, &(self->location), 0);
token->location.column--;
while(readChar(self))
{
if(isLetterOrDigit(self->ch))
{
len++;
}
else
{
ungetChar(self);
break;
}
}
len++;
char *buffer = malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
return token;
}
Token *readNumber(Lexer *self)
{
Token *token;
token = createToken();
char *start;
int len = 0;
start = self->buffer + self->pos-1;
initToken(token, TK_NUMBER, &(self->location), 0);
token->location.column--;
while(readChar(self))
{
if(isDigit(self->ch))
{
len++;
}
else if(isLetter(self->ch))
{
printLexerError(self, "不合法的数值.数值中含有非法字符", 0);
}
else
{
token->text = start;
if(self->ch == '.')
{
return readFloat(self, token, len);
}
ungetChar(self);
break;
}
}
len++;
char *buffer = malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
return token;
}
Token *readFloat(Lexer *self, Token *token, int len)
{
char *start;
start = token->text;
//token->text[len] = self->ch;
token->type = TK_FLOAT;
len++;
while(readChar(self))
{
if(isDigit(self->ch))
{
len++;
}
else if(isLetter(self->ch))
{
printLexerError(self, "不合法的浮点数.浮点中含有非法字符", 0);
}
else if(self->ch == '.')
{
printLexerError(self, "不合法的浮点数.浮点中有多个.", 0);
}
else
{
ungetChar(self);
break;
}
}
len++;
char *buffer = malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
token->type = TK_FLOAT;
return token;
}
Token *readOptToken(Lexer *self)
{
TokenType type;
type=TK_NONE;
Token *token;
token = createToken();
initToken(token, type, &(self->location), 0);
switch (self->ch)
{
case '+':
{
if(readChar(self))
{
if(self->ch == '+')
{
type = TKOPT_INC;
break;
}
else if(self->ch == '=')
{
type = TKOPT_ADDASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_ADD;
break;
}
case '-':
{
if(readChar(self))
{
if(self->ch == '-')
{
type = TKOPT_DEC;
break;
}
else if(self->ch == '=')
{
type = TKOPT_SUBASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_SUB;
break;
}
case '*':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_MULASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_MUL;
break;
}
case '/':
{
if(readChar(self))
{
if(self->ch == '/')
{
return readCommentLine(self);
}
else if(self->ch == '*')
{
return readCommentBlock(self);
}
else if(self->ch == '=')
{
type = TKOPT_DIVASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_DIV;
break;
}
case '%':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_MODASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_MOD;
break;
}
case '=':
{
if(readChar(self))
{
if(self->ch == '>')
{
type = TKOPT_TO;
break;
}
else if(self->ch == '=')
{
type = TKOPT_EQUAL;
break;
}
ungetChar(self);
}
type = TKOPT_ASSIGN;
break;
}
case '&':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_BITANDASSIGN;
break;
}
else if(self->ch == '&')
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_ANDASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_AND;
break;
}
ungetChar(self);
}
type = TKOPT_BITAND;
break;
}
case '|':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_BITORASSIGN;
break;
}
else if(self->ch == '|')
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_ORASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_OR;
break;
}
ungetChar(self);
}
type = TKOPT_BITOR;
break;
}
case '~':
{
type = TKOPT_BITNOT;
break;
}
case '^':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_BITXORASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_BITXOR;
break;
}
case '>':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_MORETHAN;
break;
}
else if(self->ch == '>')
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_SHIFTRIGHTASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_SHIFTRIGHT;
break;
}
ungetChar(self);
}
type = TKOPT_MORE;
break;
}
case '<':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_LESSTHAN;
break;
}
else if(self->ch == '<')
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_SHIFTLEFTASSIGN;
break;
}
ungetChar(self);
}
type = TKOPT_SHIFTLEFT;
break;
}
ungetChar(self);
}
type = TKOPT_LESS;
break;
}
case '!':
{
if(readChar(self))
{
if(self->ch == '=')
{
type = TKOPT_NOTEQUAL;
break;
}
ungetChar(self);
}
token->type = TKOPT_NOT;
break;
}
case '(':
{
type = TKOPT_LEFTBRACKET;
break;
}
case ')':
{
type = TKOPT_RIGHTBRACKET;
break;
}
case '{':
{
type = TKOPT_LEFTBLOCK;
break;
}
case '}':
{
type = TKOPT_RIGHTBLOCK;
break;
}
case '[':
{
type = TKOPT_LEFTINDEX;
break;
}
case ']':
{
type = TKOPT_RIGHTINDEX;
break;
}
case ':':
{
type = TKOPT_AS;
break;
}
case ',':
{
type = TKOPT_COMMA;
break;
}
case ';':
{
type = TKOPT_END;
break;
}
case '.':
{
type = TKOPT_DOT;
break;
}
default:
{
token->type = TK_IDENTIFY;
char buff[32];
token->text = buff;
sprintf(buff, "(%x)", self->ch);
printLexerError(self, "未知的字符 ", token);
}
}
token->type = type;
return token;
}
Token *readCommentLine(Lexer *self)
{
Token *token;
token = createToken();
char *buffer;
int len = 0;
char *start;
start = self->buffer + self->pos;
initToken(token, TK_COMMENT, &(self->location), 0);
token->location.column -= 2;
while(readChar(self))
{
if(isNewLine(self->ch))
{
break;
}
len++;
}
buffer = (char*)malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
return token;
}
Token *readCommentBlock(Lexer *self)
{
Token *token;
token = createToken();
char *buffer;
int len = 0;
char *start;
start = self->buffer + self->pos;
initToken(token, TK_COMMENT, &(self->location), 0);
token->location.column -= 2;
while(readChar(self))
{
if(self->ch == '*')
{
len++;
if(!readChar(self))
{
break;
}
if(self->ch == '/')
{
buffer = (char*)malloc(sizeof(char) * (len));
memcpy(buffer, start, len - 1);
buffer[len - 1] = '\0';
token->text = buffer;
return token;
}
}
len++;
}
printLexerError(self, "块注释后面缺少*/结尾", 0);
return 0;
}
Token *readString(Lexer *self)
{
Token *token;
token = createToken();
char *buffer;
int len = 0;
char *start;
start = self->buffer + self->pos;
initToken(token, TK_STRING, &(self->location), 0);
token->location.column -= 1;
while(readChar(self))
{
if(self->ch == '"')
{
buffer = (char*)malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
//对缓冲进行字符串处理
handleStringToken(token);
return token;
}
if(self->ch == '\'')
{
len++;
if(!readChar(self))
{
break;
}
}
len++;
}
printLexerError(self, "字符串后面缺少\"结尾", 0);
return 0;
}
Token *readStringSingle(Lexer *self)
{
Token *token;
token = createToken();
char *buffer;
int len = 0;
char *start;
start = self->buffer + self->pos;
initToken(token, TK_STRING, &(self->location), 0);
token->location.column -= 1;
while(readChar(self))
{
if(self->ch == '\'')
{
buffer = (char*)malloc(sizeof(char) * (len + 1));
memcpy(buffer, start, len);
buffer[len] = '\0';
token->text = buffer;
return token;
}
if(self->ch == '\'')
{
len++;
if(!readChar(self))
{
break;
}
}
len++;
}
printLexerError(self, "字符串后面缺少'结尾", 0);
return 0;
}
inline int isDigit(char c)
{
return '0' <= c && c <= '9';
}
inline int isDigitOct(char c)
{
return '0' <= c && c <= '7';
}
inline int isDigitHex(char c)
{
return isDigit(c) || ('a' <= c && c <= 'f') || ('A' <= c && c <= 'F');
}
inline int isLetter(char c)
{
return ('a' <= c && c <= 'z') || ('A' <= c && c <= 'Z') || c == '_';
}
inline int isLetterOrDigit(char c)
{
return isLetter(c) || isDigit(c);
}
inline int isSkip(char c)
{
//window下的换行,unix下不用,不适用于mac
if(c == '\r')
return 1;
if( c == '\t' || c == '\v' || c == ' ')
{
return 1;
}
return 0;
}
inline int isNewLine(char c)
{
//下个字符
if(c == '\n')
{
return 1;
}
return 0;
}
void printLexerError(Lexer *lexer, char *text, Token *token)
{
assert(text);
assert(lexer);
printf("词法错误[行: %d]: %s\n", lexer->location.line, text);
if(token)
{
printToken(token);
}
exit(1);
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。