编译原理课程设计报告课题名称: C-语言编译器设计(scanner和parser)提交文档学生姓名:提交文档学生学号:同组成员名单:无指导教师姓名:金军指导教师评阅成绩:指导教师评阅意见:..提交报告时间: 2011年 6 月 17 日1.课程设计目标设计C-Minus编译器分为scanner和parser两个部分。
scanner主要作用是对目标代码进行扫描,列出关键字,变量等内容;parser主要对语法进行分析并生成语法树。
2.分析与设计●实现方法:代码用C语言编译而成。
其中scanner为手工实现,主要采用switch-case结构实现状态转换;parser部分采用递归下降分析方法实现。
●扫描器:C-的词法如下:1、语言的关键字:i f el se i nt return void while2、专用符号:+ - * /< <= > >= == != =; , ( ) [ ] { } /* */3、其他标记是变量(ID)和数字(NUM),通过下列正则表达式定义:ID = letter letter* NUM = di git digi t* letter = a|..|z|A|..|Z digi t = 0|..|94、空格由空白、换行符和制表符组成。
空格通常被忽略,除了它必须分开ID、NUM关键字5. 注释用通常的C语言符号/ * . . . * /围起来。
注释可以放在任何空白出现的位置(即注释不能放在标记内)上,且可以超过一行。
注释不能嵌套其DFA图如下:分析器:以下为C-的语法规则BNF:代码设计说明:程序结构:本程序共有8个文件,分别为main.c ,globals.h , scan.h ,scan.c , util.h ,util.c ,parse.h ,parse.c。
其中main.c是主函数,实现打开文件等main函数的操作;globals.h里主要词法分析和语法分析的功能;util.c和util.h提供了词法分析和语法分析以及构造分析树的相关函数;scan.c和scan.h主要对目标程序进行扫描和词法分析;parse.c和parse.h主要实现语法分析。
系统结构图如下:3.程序代码实现#include "globals.h"//#define NO_PARSE FALSE#include "util.h"//#if NO_PARSE#include "scan.h"//#else#include "parse.h"//#endif/* allocate global variables */int lineno = 0;FILE * source;FILE * listing;FILE * code;/* allocate and set tracing flags */int EchoSource = TRUE;int TraceScan = TRUE;int TraceParse = TRUE;int Error = FALSE;main( int argc, char * argv[] ){ TreeNode * syntaxTree;char pgm[120];printf("请输入文件名(本文件夹下的文件,如a.txt b.txt):");scanf("%s",pgm);source=fopen(pgm,"r");if (source==NULL){ fprintf(stderr,"File %s not found\n",pgm);exit(1);}listing = stdout;fprintf(listing,"\nCMINUS COMPILATION: %s\n",pgm); syntaxTree = parse();if (TraceParse) {fprintf(listing,"\nSyntax tree:\n");printTree(syntaxTree);}fclose(source);return 0;}#ifndef _GLOBALS_H_#define _GLOBALS_H_#include <stdio.h>#include <stdlib.h>#include <cty pe.h>#include <string.h>#ifndef FALSE#define FALSE 0#endif#ifndef TRUE#define TRUE 1#endif/* MAXRESERVED = the number of reserved words */ #define MAXRESERVED 6typedef enum/* book-keeping tokens */{ENDFILE,ERROR,/* reserved words */IF,ELSE,RETURN,WHILE,/* multicharacter tokens */ID,NUM,/* ty pe*/INT,VOID,/* special symbols + - * / < <= > >= == != = ; , ( ) [ ] { } */PLUS,MINUS,TIMES,OVER,LT,LTEQ,GT,GTEQ,EQ,NOEQ,ASSIGN,SEMI,COMMA,LP AREN,RPAREN,LBRA,RBRA,LBRACE,RBRACE} TokenType;extern FILE* source;extern FILE* listing;extern FILE* code;extern int lineno;typedef enum {StmtK,ExpK,DecK,Param K} NodeKind;typedef enum {CompoundK,SelectionK,IterationK,Return K,ExpressionK} StmtKind; typedef enum {OpK,ConstK,IdK,AssignK,CallK} ExpKind;typedef enum {V ar,Array,Null} ParamKind;typedef enum {Array K,V arK,Fun K} DecKind; //declaration kind treen ode ;#define MAXCHILDREN 3//定义树的结点结构,树是左孩子右兄弟结构。
typedef struct treeNode{ struct treeNode * child[MAXCHILDREN];struct treeNode * sibling;int lineno;NodeKind nodekind;union { StmtKind stmt; ExpKind exp; DecKind dec;ParamKind param;} kind;union { TokenType op;int val;char * ty pe;char * name; } attr;} TreeNode;extern int EchoSource;extern int TraceScan;extern int TraceParse;extern int Error;#endif#include "globals.h"#include "util.h"#include "scan.h"/* states in scanner DFA *//* comments */typedef enum{ START,INASSIGN,INCOMMENT,INOVER,INNUM,INID,INLT,INGT,INNOEQ,DONE } StateType;/* lexeme of identifier or reserved word */char token String[MAXTOKENLEN+1];/* BUFLEN = length of the input buffer forsource code lines */#define BUFLEN 256static char lineBuf[BUFLEN]; /* holds the current line */static int linepos = 0; /* current position in LineBuf */static int bufsize = 0; /* current size of buffer string */static int EOF_flag = FALSE; /* corrects ungetNextChar behavior on EOF *//* getNextChar fetches the next n on-blank characterfrom lineBuf, reading in a new line if lineBuf isexhausted */static int getNextChar(void){ if (!(linepos < bufsize)){ lineno++;if (fgets(lineBuf,BUFLEN-1,source)){ if (EchoSource)fprintf(listing,"%4d: %s",lineno,lineBuf);bufsize = (int)strlen(lineBuf);linepos = 0;return lineBuf[linepos++];}else{ EOF_flag = TRUE;return EOF;}}else return lineBuf[linepos++];}static v oid ungetNextChar(void){ if (!EOF_flag) linepos--;}static struct{ char* str;TokenType tok;} reservedW ords[MAXRESERVED]={{"if",IF},{"else",ELSE},{"while",WHILE},{"return",RETURN},{"int",INT},{"void",VOID} };static TokenType reservedLooku p (char * s){ int i;for (i=0;i<MAXRESERVED;i++)if (!strcmp(s,reservedWords[i].str))return reserv edWords[i].tok;return ID;}TokenType g etToken(void){int token StringIndex = 0;TokenType currentT oken;StateType state = START;int save;while (state != DONE){int c = getNextChar();save = TRUE;switch (state){case START:if (isdigit(c))state = INNUM;else if (isalpha(c))state = INID;else if (c == '=')state = INASSIGN;else if ((c == ' ') || (c == '\t') || (c == '\n'))save = FALSE;else if (c == '<')state = INLT;else if (c == '>')state = INGT;else if (c == '!')state = INNOEQ;else if (c == '/')state = INOVER;else{ state = DONE;switch (c){ case EOF:save = FALSE;currentToken = ENDFILE;break;case '+':currentToken = PLUS;break;case '-':currentToken = MINUS;break;case '*':currentToken = TIMES;break;case '(':currentToken = LPAREN;break;case ')':currentToken = RPAREN;break;case '[':currentToken = LBRA;break;case ']':currentToken = RBRA;break;case '{':currentToken = LBRACE;break;case '}':currentToken = RBRACE;break;case ';':currentToken = SEMI;break;case ',':currentToken = COMMA;break;default:currentToken = ERROR;break;}}break;case INCOMMENT:save = FALSE;if (c == EOF){ state = DONE;currentToken = ENDFILE;}else if ((c == '*') && (getNextChar( ) =='/')) { sav e = FALSE;state = START;}break;case INOVER:if (c == '*'){save = FALSE;tokenStringIndex --;state = INCOMMENT;}else{state = DONE;currentToken = OVER;ungetNextCh ar();}break;case INASSIGN:state = DONE;if (c == '=')currentToken = EQ;else{currentToken = ASSIGN;ungetNextChar();save = FALSE;}break;case INNUM:if (!isdigit(c)){ungetNextChar();save = FALSE;state = DONE;currentToken = NUM;}break;case INID:if (!isalpha(c)){ungetNextChar();save = FALSE;state = DONE;currentToken = ID;}break;case INLT:state = DONE;if (c == '=')currentToken = LTEQ;else {currentToken = LT;ungetNextCh ar();save = FALSE;}break;case INGT:state = DONE;if (c == '=')currentToken = GTEQ;else {currentToken = GT;ungetNextCh ar();save = FALSE;}break;case INNOEQ:state = DONE;if (c == '=')currentToken = NOEQ;else{ungetNextCh ar();save = FALSE;currentToken = ERROR;}break;case DONE:default:fprintf(listing,"Scanner Bug: state= %d\n",state);state = DONE;currentT oken = ERROR;break;}if ((save) && (token StringIndex <= MAXTOKENLEN))token String[token StringIndex++] = (char) c;if (state == DONE){ token String[tokenStringIndex] = '\0';if (currentToken == ID)currentT oken = reservedLooku p(tokenString);}}if (TraceScan) {fprintf(listing,"\t%d: ",lineno);printToken(currentT oken,tokenString);}return currentToken;}#ifndef _SCAN_H_#define _SCAN_H_/* MAXTOKENLEN is the maximum size of a token */#define MAXTOKENLEN 40/* token String array stores the lexeme of each token */extern char token String[MAXTOKENLEN+1];/* function getToken returns the* next token in source file*/TokenType g etToken(void);TokenType g etNextToken(void);#endifvoid sort ( int a[], int low, int high ) { int i; int k;i = low;while (i < high-1){ int t;k = minloc (a,i,high);t =a[k];a[k] = a[i];a[i] = t;i = i + 1;}}void main (void){ int i;i = 0;while (i < 10){ x[i] = input;i = i + 1;}sort (x,0,10);i = 0;while (i < 10){ output(x[i]);i = i + 1;}}#include "globals.h"#include "util.h"/* Procedure printToken prints a token* and its lexeme to the listing file*///输出一个识别出来的关键字void printToken( TokenType token, const char* tokenString ) { switch (token){ case IF:case ELSE:case WHILE:case RETURN:case INT:case VOID:fprintf(listing,"reserved word: %s\n",tokenString);break;case PLUS: fprintf(listing,"+\n"); break;case MINUS: fprintf(listing,"-\n"); break;case TIMES: fprintf(listing,"*\n"); break;case OVER: fprintf(listing,"/\n"); break;case LT: fprintf(listing,"<\n"); break;case LTEQ: fprintf(listing,"<=\n"); break;case GT: fprintf(listing,">\n"); break;case GTEQ: fprintf(listing,">=\n"); break;case EQ: fprintf(listing,"==\n"); break;case NOEQ: fprintf(listing,"!=\n"); break;case ASSIGN: fprintf(listing,"=\n"); break;case SEMI: fprintf(listing,";\n"); break;case COMMA: fprintf(listing,",\n"); break;case LPAREN: fprintf(listing,"(\n"); break;case RPAREN: fprintf(listing,")\n"); break;case LBRA: fprintf(listing,"[\n"); break;case RBRA: fprintf(listing,"]\n"); break;case LBRACE: fprintf(listing,"{\n"); break;case RBRACE: fprintf(listing,"}\n"); break;case ENDFILE: fprintf(listing,"EOF\n"); break;case NUM:fprintf(listing,"NUM, val= %s\n",tokenString);break;case ID:fprintf(listing,"ID, name= %s\n",tokenString);break;case ERROR://报错处理fprintf(listing,"ERROR: %s\n",tokenString);break;default: /* should never happen */fprintf(listing,"Unknown token: %d\n",token);}}//新建一个stmtNode结点,与第三个实验的方法相同//不同的是这里的参数是识别出来的tokenTreeNode * newStmtNode(StmtKind kind){ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode));int i;if (t==NULL)fprintf(listing,"Out of memory error at line %d\n",lineno); else {for (i=0;i<MAXCHILDREN;i++) t->child[i] = NULL;t->sibling = NULL;t->nodekind = StmtK;t->kind.stmt = kind;t->lineno = lineno;}return t;}TreeNode * newExpNode(ExpKind kind){ TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode));int i;if (t==NULL)fprintf(listing,"Out of memory error at line %d\n",lineno); else {for (i=0;i<MAXCHILDREN;i++) t->child[i] = NULL;t->sibling = NULL;t->nodekind = ExpK;t->kind.exp = kind;t->lineno = lineno;}return t;}TreeNode * newParamNode(ParamKind kind){TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode));int i;if (t == NULL)fprintf(listing,"Out of memory error at line %d\n",lineno); else{for (i = 0; i<MAXCHILDREN;i++) t->child[i] = NULL;t->sibling = NULL;t->nodekind = ParamK;t->kind.param = kind;t->lineno = lineno;}return t;}TreeNode * newDecNode(){TreeNode * t = (TreeNode *) malloc(sizeof(TreeNode));int i;if (t == NULL)fprintf(listing,"Out of memory error at line %d\n",lineno);else{for (i=0;i<MAXCHILDREN;i++) t->child[i] = NULL;t->sibling = NULL;t->nodekind = DecK;t->lineno = lineno;}return t;}char * copyString(char * s){ int n;char * t;if (s==NULL) return NULL;n = (int)strlen(s)+1;t = (char *)malloc(n);if (t==NULL)fprintf(listing,"Out of memory error at line %d\n",lineno);else strcpy(t,s);return t;}static indentno = 0;#define INDENT indentno+=2#define UNINDENT indentno-=2//输出每行前面的空格static void printSpaces(void){ int i;for (i=0;i<indentno;i++)fprintf(listing," ");}//输出整个树。