编译原理课程设计报告课题名称:编译原理课程设计C-语言词法与语法分析器的实现C-词法与语法分析器的实现1.课程设计目标(1)题目实用性C-语言拥有一个完整语言的基本属性,通过编写C-语言的词法分析和语法分析,对于理解编译原理的相关理论和知识有很大的作用。
通过编写C-语言词法和语法分析程序,能够对编译原理的相关知识:正则表达式、有限自动机、语法分析等有一个比较清晰的了解和掌握。
(2)C-语言的词法说明①语言的关键字:else if int return void while所有的关键字都是保留字,并且必须是小写。
②专用符号:+ - * / < <= > >= == != = ; , ( ) [ ] { } /* */③其他标记是ID和NUM,通过下列正则表达式定义:ID = letter letter*NUM = digit digit*letter = a|..|z|A|..|Zdigit = 0|..|9注:ID表示标识符,NUM表示数字,letter表示一个字母,digit表示一个数字。
小写和大写字母是有区别的。
④空格由空白、换行符和制表符组成。
空格通常被忽略。
⑤注释用通常的c语言符号/ * . . . * /围起来。
注释可以放在任何空白出现的位置(即注释不能放在标记)上,且可以超过一行。
注释不能嵌套。
(3)程序设计目标能够对一个程序正确的进行词法及语法分析。
2.分析与设计(1)设计思想a.词法分析词法分析的实现主要利用有穷自动机理论。
有穷自动机可用作描述在输入串中识别模式的过程,因此也能用作构造扫描程序。
通过有穷自动机理论能够容易的设计出词法分析器。
b.语法分析语法分析采用递归下降分析。
递归下降法是语法分析中最易懂的一种方法。
它的主要原理是,对每个非终结符按其产生式结构构造相应语法分析子程序,其中终结符产生匹配命令,而非终结符则产生过程调用命令。
因为文法递归相应子程序也递归,所以称这种方法为递归子程序下降法或递归下降法。
其中子程序的结构与产生式结构几乎是一致的。
(2)程序流程图程序主流程图:词法分析: 语法分析:3.程序代码实现整个词法以及语法的程序设计在一个工程里面,一共包含了8个文件,分别为main.cpp、parse.cpp、scan.cpp、util.cpp、scan.h、util.h、 globals.h、parse.h,其中scan.cpp和scan.h为词法分析程序。
以下仅列出各个文件中的核心代码:Main.cpp#include "globals.h"#define NO_PARSE FALSE#include "util.h"#if NO_PARSE#include "scan.h"#else#include "parse.h"#endifint lineno=0;FILE * source;FILE * listing;FILE * code;int EchoSource = TRUE;int TraceScan=TRUE;int TraceParse=TRUE;int Error = FALSE;int main(int argc,char * argv[]){TreeNode * syntaxTree;char pgm[120];scanf("%s",pgm);source=fopen(pgm,"r");if(source==NULL){fprintf(stderr,"File %s not found\n",pgm);exit(1);}listing = stdout;fprintf(listing,"\nC COMPILATION: %s\n",pgm); #if NO_PARSEwhile(getToken()!=ENDFILE);#elsesyntaxTree = parse();if(TraceParse){fprintf(listing,"\nSyntaxtree:\n");printTree(syntaxTree);}#endiffclose(source);return 0;}Parse.cpp#include "globals.h"#include "util.h"#include "scan.h"#include "parse.h"static TokenType token; /* holds current token *//* function prototypes for recursive calls */static TreeNode * declaration_list(void);static TreeNode * declaration(void);static TreeNode * params(void);static TreeNode * param_list(void);static TreeNode * param(void);static TreeNode * compound_stmt(void);static TreeNode * local_declarations(void);static TreeNode * statement_list(void);static TreeNode * statement(void);static TreeNode * expression_stmt(void);static TreeNode * if_stmt(void);static TreeNode * while_stmt(void);static TreeNode * return_stmt(void);static TreeNode * expression(void);static TreeNode * var(void);static TreeNode * simple_exp(void);static TreeNode * additive_expression(void);static TreeNode * term(void);static TreeNode * factor(void);static TreeNode * args(void);static TreeNode * arg_list(void);static void syntaxError(char * message){ fprintf(listing,"\n>>> ");fprintf(listing,"Syntax error at line %d: %s",lineno,message); Error = TRUE;}/*判断读取的字符*/static void match(TokenType expected){if(token==expected){token=getToken( );}else{syntaxError("unexpected token -> ");printToken(token,tokenString);fprintf(listing," ");}}/*进行语法分析,构建语法树*/TreeNode * declaration_list(void){TreeNode * t= declaration();TreeNode * p= t;while ((token==INT) || (token==VOID) ){TreeNode *q = declaration();if (q!=NULL) {if (t==NULL) t = p = q;else /* now p cannot be NULL either */{p->sibling = q;p = q;}}}return t;}TreeNode * declaration(void){ TreeNode * t = NULL;switch (token){case VOID :case INT :t = newStmtNode(DecK);if(token == INT)t->type =Integer;elset->type = Void;match(token);switch (token){case ID:t-> = copyString(tokenString);t->kind.stmt = VarDK;match(ID);switch (token){case LZKH:t->kind.stmt = VarDK;t->type = IntArray;match(LZKH);match(NUM);match(RZKH);match(SEMI);break;case LPAREN:t->kind.stmt = FunDK;match(LPAREN);t->child[0] = params();match(RPAREN);t->child[1] = compound_stmt();break;default: match(SEMI);break;}break;default:syntaxError("unexpected token -> ");printToken(token,tokenString);token = getToken();break;}break;default : syntaxError("unexpected token -> ");printToken(token,tokenString);token = getToken();break;} /* end case */return t;}TreeNode * params(void){TreeNode * t = NULL;if(token == VOID){match(token);t = newStmtNode(ParamList);t->child[0] = newStmtNode(ParamK);t->child[0]->type = Void;}else if(token == RPAREN)t=NULL;else{t = param_list();}return t;}TreeNode * param_list(void){TreeNode * t = newStmtNode(ParamList);int i = 1;t->child[0] = param();while(token != RPAREN){match(DOT);t->child[i] = param();i++;}return t;}TreeNode * param(void){TreeNode * t = NULL;match(INT);t= newStmtNode(ParamK);t->type=Integer;t->=copyString(tokenString);match(ID);if(token == LZKH){t->type=IntArray;match(LZKH);match(RZKH);}return t;}TreeNode * compound_stmt(void){TreeNode * t = newStmtNode(ComK);match(LDKH);t->child[0] = local_declarations();t->child[1] = statement_list();match(RDKH);return t;}TreeNode * local_declarations(void){TreeNode * t = newStmtNode(LocalDecK);int i=0;while(token == INT || token == VOID){t->child[i] = declaration();i++;}return t;}TreeNode * statement_list(void){TreeNode * t = newStmtNode(StmtList);int i=0;while(token != RDKH){t->child[i] =statement();i++;}return t;}TreeNode * statement(void){TreeNode * t ;switch (token) {case IF : t = if_stmt(); break;case WHILE : t = while_stmt(); break;case ID :case SEMI:t = expression_stmt(); break;case RETURN : t = return_stmt(); break;case LDKH : t=compound_stmt();break;default : syntaxError("unexpected token -> ");printToken(token,tokenString);token = getToken();break;} /* end case */return t;}TreeNode * expression_stmt(void){TreeNode * t = newStmtNode(ExpstmtK);if(token == SEMI)match(SEMI);else{t = expression();match(SEMI);}return t;}TreeNode * if_stmt(void){TreeNode * t = newStmtNode(IfK);if(t!=NULL){match(IF);match(LPAREN);t->child[0] = expression();match(RPAREN);t->child[1] = statement();if (token==ELSE){match(ELSE);if (t!=NULL) t->child[2] = newStmtNode(ElseK);t->child[2]->child[0] = statement();} }return t;}TreeNode * while_stmt(void){TreeNode * t = newStmtNode(WhileK);match(WHILE);match(LPAREN);if (t!=NULL) t->child[0] = expression();match(RPAREN);if (t!=NULL) t->child[1] = statement();return t;}TreeNode * return_stmt(void){TreeNode * t = newStmtNode(RetK);if(token == RETURN)match(RETURN);if(token == SEMI)match(SEMI);else{t->child[0] = expression();match(SEMI);}return t;}TreeNode * expression(void){TreeNode * t = simple_exp();return t;}TreeNode* var(void){TreeNode* t = newExpNode(IdK);if ((t!=NULL) && (token==ID))t-> = copyString(tokenString);match(ID);if(token == LZKH){match(token);t->type = ArrayUnit;t->child[0] = expression();match(RZKH);}return t;}TreeNode * simple_exp(void){TreeNode * t = additive_expression();if(t!=NULL){if (token == LT || token == LE|| token == MT || token == ME||token ==EQ||token ==NEQ){TreeNode * p = newExpNode(OpK);if(p!=NULL){p->attr.op = token;p->child[0] = t;match(token);p->child[1] = additive_expression();t=p;}}}return t;}TreeNode* additive_expression(void){TreeNode * t = term();while(token == PLUS || token == MINUS){TreeNode * p = newExpNode(OpK);p->attr.op = token;p->child[0] = t;match(token);p->child[1] = term();t = p;}return t;}TreeNode * term(void){TreeNode * t = factor();while ((token==TIMES)||(token==OVER)){TreeNode * p = newExpNode(OpK);if (p!=NULL) {p->child[0] = t;p->attr.op = token;match(token);p->child[1] = factor();t = p;}}return t;}TreeNode * factor(void){TreeNode * t = NULL;switch (token){case NUM :t = newExpNode(ConstK);if ((t!=NULL) && (token==NUM))t->attr.val = atoi(tokenString);match(NUM);break;case ID :t = var();if (token == ASSIGN){TreeNode* p = newStmtNode(AssignK);p-> = t->;match(token);p->child[0] = expression();t = p;}if (token == LPAREN ){TreeNode * p = newStmtNode(CallK);p-> = t->;t=p;match(token);p->child[0] = args();match(RPAREN);}break;case LPAREN :match(LPAREN);t = expression();match(RPAREN);break;default:syntaxError("unexpected token -> ");printToken(token,tokenString);token = getToken();break;}return t;}TreeNode * args(void){TreeNode * t = newStmtNode(ArgList);if(token != RPAREN){t->child[0] = arg_list();return t;}elsereturn NULL;}TreeNode * arg_list(void){TreeNode * t = newStmtNode(ArgK);int i = 1;if(token != RPAREN)t->child[0] = expression();while(token!=RPAREN){match(DOT);t->child[i] = expression();i++;}return t;}TreeNode * parse(void){ TreeNode * t;token = getToken();t =declaration_list();if (token!=ENDFILE)syntaxError("Code ends before file\n");return t;}scan.cpp#include "globals.h"#include "util.h"#include "scan.h"/*对扫描的字符进行匹配判断*/TokenType getToken(void){ /* index for storing into tokenString */int tokenStringIndex = 0;/* holds current token to be returned */TokenType currentToken;/* current state - always begins at START */ StateType state = START;/* flag to indicate save to tokenString */ int save;while (state != DONE){ int c = getNextChar();save = TRUE;switch (state){ case START:if (isdigit(c))state = INNUM;else if (isalpha(c))state = INID;else if (c == '=')state = INEQUAL;else if (c == '<')state = INLE;else if (c == '>')state = INME;else if ((c == ' ') || (c == '\t') || (c == '\n')) save = FALSE;else if (c== '!')state = INNEQ;else if (c == '/'){if(getNextChar()!='*'){ungetNextChar();state = DONE;currentToken = OVER;break;}else{save = FALSE;state = INCOMMENT;}}else{ state = DONE;switch (c){ case EOF:save = FALSE;currentToken = ENDFILE;break;case '+':currentToken = PLUS;break;case '-':currentToken = MINUS; break;case '*':currentToken = TIMES; break;case '(':currentToken = LPAREN; break;case ')':currentToken = RPAREN; break;case ';':currentToken = SEMI; break;case '[':currentToken=LZKH;break;case ']':currentToken=RZKH;break;case '{':currentToken=LDKH;break;case '}':currentToken=RDKH;break;case ',':currentToken=DOT;break;default:currentToken = ERROR; break;}}break;case INCOMMENT:save = FALSE;if (c == EOF){state = DONE;currentToken = ERROR;}else if(c=='*'){if(getNextChar()=='/'){state = START;}else{ungetNextChar();}}break;case INNEQ:state=DONE;if(c=='=')currentToken=NEQ;else{ungetNextChar();save=FALSE;currentToken=ERROR;}break;case INEQUAL:state = DONE;if (c == '=')currentToken = EQ;else{ /* backup in the input */ ungetNextChar();currentToken = ASSIGN;}case INNUM:if (!isdigit(c)){ /* backup in the input */ ungetNextChar();save = FALSE;state = DONE;currentToken = NUM;}break;case INID:if (!isalpha(c)){ /* backup in the input */ ungetNextChar();save = FALSE;state = DONE;currentToken = ID;}break;case INLE:state = DONE;if(c== '=')currentToken = LE;else{ /* backup in the input */ ungetNextChar();currentToken = LT;}break;case INME:state = DONE;if(c== '=')currentToken = ME;else{ /* backup in the input */ ungetNextChar();currentToken = MT;}case DONE:default: /* should never happen */fprintf(listing,"Scanner Bug: state= %d\n",state); state = DONE;currentToken = ERROR;break;}if ((save) && (tokenStringIndex <= MAXTOKENLEN))tokenString[tokenStringIndex++] = (char) c;if (state == DONE){ tokenString[tokenStringIndex] = '\0';if (currentToken == ID)currentToken = reservedLookup(tokenString);}}if (TraceScan) {fprintf(listing,"\t%d: ",lineno);printToken(currentToken,tokenString);}return currentToken;} /* end getToken */Util.cpp#include "globals.h"#include "util.h"void printToken(TokenType token, const char* tokenString) {/*根据对应的判断输出判断结果*/switch(token){case ELSE:case IF:case INT:case RETURN:case VOID:case WHILE:fprintf(listing, "reserved word: %s\n", tokenString);break;case LT:fprintf(listing,"<\n");break;case EQ:fprintf(listing,"==\n");break;case LPAREN:fprintf(listing,"(\n");break;case RPAREN:fprintf(listing,")\n");break;case SEMI:fprintf(listing,";\n");break;case PLUS:fprintf(listing,"+\n");break;case MINUS:fprintf(listing,"-\n");break;case TIMES:fprintf(listing,"*\n");break;case OVER:fprintf(listing,"/\n");break;case ENDFILE:fprintf(listing,"EOF\n");break;case MT:fprintf(listing,">\n");break;case NEQ:fprintf(listing,"!=\n");break;case ASSIGN:fprintf(listing,"=\n");break;case DOT:fprintf(listing,",\n");break;case LZKH:fprintf(listing,"[\n");break;case RZKH:fprintf(listing,"]\n");break;case LDKH:fprintf(listing,"{\n");break;case RDKH:fprintf(listing,"}\n");break;case LZS:fprintf(listing,"/*\n");break;case RZS:fprintf(listing,"*/\n");break;case ME:fprintf(listing,">=\n");break;case LE:fprintf(listing,"<=\n");break;case NUM:fprintf(listing,"NUM,val= %s\n",tokenString);break;case ID:fprintf(listing,"ID, name= %s\n",tokenString);break;case ERROR:fprintf(listing,"ERROR: %s\n",tokenString);break;default:fprintf(listing,"Unknown token: %d\n",token);}}/*this function is used to establish the new stmt node*/TreeNode * newStmtNode(StmtKind kind){TreeNode * t = (TreeNode *)malloc(sizeof(TreeNode));int i;if(t==NULL){fprintf(listing, "Out of memory error at line %d\n",lineno);}else{for(i=0;i<MAXCHILDREN;i++){t->child[i]=NULL;}t->sibling=NULL;t->nodekind=StmtK;t->kind.stmt=kind;t->lineno=lineno;}return t;}/* Function newExpNode creates a new expressionnode for syntax tree construction*/TreeNode * newExpNode(ExpKind kind){TreeNode * t = (TreeNode *)malloc(sizeof(TreeNode));int i;if(t==NULL){fprintf(listing, "Out of memory error at line %d\n",lineno);}else{for(i=0;i<MAXCHILDREN;i++){t->child[i]=NULL;}t->sibling=NULL;t->nodekind=ExpK;t->kind.exp=kind;t->lineno=lineno;t->type=Void;}return t;}char * copyString(char * s){int n;char * t;if(s==NULL){return NULL;}n=strlen(s)+1;t=(char *)malloc(n);/*其作用是在存的动态存储区中分配一个长度为n的连续空间.保存tokenstring*/ if(t==NULL){fprintf(listing, "Out of memory error at line %d\n",lineno);}else{strcpy(t,s);/*该函数是字符串拷贝函数,用来将一个字符串复制到一个字符数组中。