PL/0语言词法分析程序//这是我编译原理的一次作业,中间有许多不足之处希望大家指正/*编写PL/0语言的词法分析程序要求:1、读入用PL/0语言编写的源程序,正确的进行词法分析,并输出二元式序列。
2、若源程序有词法错误,能够给出出错的准确位置。
3、词法代号如下(+,+);(-,-);(*,*);(/,/);((,();(),));(,,,);(;,;);(.,.);(#,#);(=,=);(>,>);(<,<);(:=,a);(>=,b);(<=,c);(数字,d);(标识符,e);关键字代号:(begin,f);(call,g);(const,h);(do,i);(end,j);(if,k);(odd,l); (procedure,m); (read,n);(then,o);(var,p);(while,q);(write,r);4、等于运算符号为一个 = 测试程序:A.C====================== CONST A=10;VAR B,C; PROCEDURE P;VAR D;PROCEDURE Q;VAR X;BEGINREAD(X);D:=X;WHILE XDO CALL P;END;BEGINWRITE(D);CALL Q;END;BEGINCALL P;END.*//*program name:chifufenxi*//*作者:小万 QQ:421404493*//*date:2004.10.11*/#include#include#include#include#include#define N 256//每一行的字符数不能超过256个char buffer[N]; //用作存放一行字符char word[20]; //用作存放经过分析单词char *kword[13]={"begin","call","const","do","end","if","odd","procedure","read","the n","var","while","write"};char ktype[13]={'f','g','h','i','j','k','l','m','n','o','p','q','r'};int len;//记录每一行的长度int count=0;//用来记录行数void write(char *wstr,char wc,FILE *wout)//将分析结果按照规则写入到文件{fputc('(',wout);fputs(wstr,wout);fputc(',',wout);fputc(wc,wout);fputc(')',wout);}int readbuffer(FILE *fp){char ch;len=0;ch=fgetc(fp);while(!feof(fp) && ch!='\n')//读取字符到缓冲区{buffer[len]=ch;ch=fgetc(fp);len++;}len--;//用来控制词法分析时行分析中字母的个数if(feof(fp))//标志文件是否结束return 0;elsereturn 1;}void error(int type){if(type==1)printf("为无效字符,第%d行词法出错,标志符不能以数字开头\n",count); else if(type==2)printf("第%d行词法出错,赋值符应为\:\= \n ",count);else printf("为无效字符,第%d行词法出错\n",count);void check(char *str,FILE *out);//声明函数,此函数用来分类单词void fenxi(char *row,FILE *op)//此函数用来对每一行的单词进行语法分析{//printf("%d\n",count);int k=0;//用作控制临时存放单词的变量str0int i=0;//定义两个变量用作控制每一行是否结束,int ferror=0;//用作出错标志char str0[20];//临时存放单词的变量while(i<=len){k=0;//将k置0strcpy(word,"\0");//将存放单词的变量清空/*去除空格*/if(isspace(row[i]))//去出空格,跳格符,换行符{i++;continue;}/*去出无效字符*/while(!isalpha(row[i])&&!isdigit(row[i])&&i<=len&&!isspace(row[i])&&!(row[i]=='\ 0'||row[i]==':'||row[i]=='>'||row[i]=='<'||row[i]=='+' || row[i]=='-' || row[i]=='*' || row[i]=='/' || row[i]=='(' || row[i]==')' || row[i]==',' || row[i]==';'|| row[i]=='.'|| row[i]=='#' || row[i]=='=')){putchar(row[i]);i++;ferror=1;//设置错误标志符if(ferror==1){error(3);//调用出错处理函数ferror=0;}/*对注释进行处理,假设此语言的注释只能单行注释以双斜杠"//"为注释开始标志*/if(row[i]=='/'){i++;if(row[i]=='/'){i=len+1;//忽略注释符后面的单词continue;}elsei--;}/*判断是否为数字*/if(isdigit(row[i])){while(i<=len&&!isspace(row[i])&&!(row[i]=='\0'||row[i]==':'||row[i]=='>'||row[i] =='<'||row[i]=='+' || row[i]=='-' || row[i]=='*' || row[i]=='/' || row[i]=='(' || row[i]==')' || row[i]==',' || row[i]==';'|| row[i]=='.'|| row[i]=='#' || row[i]=='='))//当不到行尾,是数字或字母当然有可能是无效字符{if(isdigit(row[i]))//是数字则将字符逐个存入临时数组{str0[k]=row[i];i++;k++;// putchar('e');}else //数字中加有字母或无效字符则报错{// putchar('x');ferror=1;break;//已经出错设置标志并退出循环}}if(ferror==1)//检测是否出错{ /*将刚刚的那个单词后面的数字和字母清空,如123abc123或则123$$23等,当出现错误后,需要消除abc123和$$23 以免误作为下一个标志符*/for(int j=0;j putchar(str0[j]);while(i<=len&&!isspace(row[i])&&!(row[i]=='\0'||row[i]==':'||row[i]=='>'||row[i] =='<'||row[i]=='+' || row[i]=='-' || row[i]=='*' || row[i]=='/' || row[i]=='(' || row[i]==')' || row[i]==',' || row[i]==';'|| row[i]=='.'|| row[i]=='#' || row[i]=='=')){putchar(row[i]);i++;}error(1);//putchar('e');//调用出错处理函数ferror=0;//重新设置错误标志位//i--;//strcpy(word,"");}else//未出错照常处理{str0[k]='\0';strcpy(word,str0);i--;//减一是为了使最后取出的那个字符不在被下面的程序判断// str0[0]='\0';}}/*判断是否为标志符和关键字即由字母开头并且不含标点符号用ispunct(int ch)判断标点符号*/if(isalpha(row[i]))//标志符或关键字由字母开头{k=0;while(i<=len&&row[i]!=32&&!(row[i]=='\0'||row[i]==':'||row[i]=='>'||row[i]=='<'| |row[i]=='+' || row[i]=='-' || row[i]=='*' || row[i]=='/' || row[i]=='(' || row[i]==')' || row[i]==',' || row[i]==';'|| row[i]=='.'|| row[i]=='#' || row[i]=='='))//关键字和标志符由数字和字母组成{if(isalpha(row[i])||isdigit(row[i]))//由数字和字母组成{str0[k]=row[i];i++;k++;}else//出错,原因可能是出现了不可识别的字符{ferror=1;break;}}if(ferror){for(int j=0;j putchar(str0[j]);while(i<=len&&!isspace(row[i])&&!(row[i]=='\0'||row[i]==':'||row[i]=='>'||row[i] =='<'||row[i]=='+' || row[i]=='-' || row[i]=='*' || row[i]=='/' || row[i]=='(' || row[i]==')' || row[i]==',' || row[i]==';'|| row[i]=='.'|| row[i]=='#' || row[i]=='=')){putchar(row[i]);//消除整个非法单词i++;}ferror=0;error(3);//i--;}else{str0[k]='\0';strcpy(word,str0);str0[0]='\0';i--;}}/*判断运算符*/if(row[i]=='+' ||row[i]=='-' ||row[i]=='*' || row[i]=='/' || row[i]=='(' || row[i]==')' || row[i]==',' || row[i]==';'||row[i]=='.'||row[i]=='#' || row[i]=='=') {str0[0]=row[i];str0[1]='\0';strcpy(word,str0);str0[0]='\0';}//要先判断单个字符的运算符,以避免诸如>=的运算符后面的=再次被判断if(row[i]==':'){i++;if(row[i]=='='){//word[0]=':';//word[1]='=';//word[2]='\0';strcpy(word,">=");}else{error(2);//出错后调用处理函数i--;}}if(row[i]=='>'){i++;if(row[i]=='='){strcpy(word,">=");}else{strcpy(word,">");i--;}}if(row[i]=='<'){i++;if(row[i]=='='){strcpy(word,"<=");}else{strcpy(word,"<");i--;}}//puts(word);check(word,op);/*调用分类函数,辨别每一个单词的类别要求输入的每一个单词必须符合词法规则*///word[0]='\0';i++;//使指针后移,取出下一个字母}}void check(char *str,FILE *out){if(isdigit(str[0]))/*如果第一个字符是数字那么整个单词都是数字组成的,即为常数*/{write(str,'d',out);//调用写函数将分好类的单词写入文件}if(isalpha(str[0]))/*如果第一个字符是字母,那么这个单词是标志符或关键字*/ {int fyiyong=0;//用作标记这个单词是否已被分类/*以下判别是否是关键字*/for(int ct=0;ct<13;ct++){if(!strcmp(str,kword[ct])){write(str,ktype[ct],out);fyiyong=1;}}/*经过以上判别,可以判别是否是关键字,不是即为标志符*/if(fyiyong!=1){write(str,'e',out);}/*以下对运算符分类*/ if(str[0]=='>'){if(str[1]=='='){write(str,'b',out); }else{write(str,'>',out); }}if(str[0]=='<'){if(str[1]=='='){write(str,'c',out); }else{write(str,'<',out); }}if(!strcmp(str,":=")) {write(str,'a',out); }if(str[0]=='+' || str[0]=='-' || str[0]=='*' || str[0]=='/' || str[0]=='(' || str[0]==')' || str[0]==',' || str[0]==';'|| str[0]=='.'|| str[0]=='#' || str[0]=='=' ){write(str,str[0],out);}}void main(){count=1;char scfilename[20],rsfilename[20];//定义用来存放输入源文件和输出目标文件的名字printf("Please input your source file name:");gets(scfilename);printf("Please input your result file name:");gets(rsfilename);FILE *fp,*op;fp=fopen(scfilename,"r");op=fopen(rsfilename,"w");if(fp)//打开文件成功后调用函数对源文件进行词法分析{while(readbuffer(fp)){fenxi(buffer,op);count++;//行加一}else//while the file not exist{printf("Your souce file not exist!!!\n"); exit(0);}fclose(fp);//close the filesfclose(op);printf("ok!");//output the mark of end getchar();}。