CompilerDesign Lab
CompilerDesign Lab
CompilerDesign Lab
The lexical
analyzer should ignore redundant spaces, tabs and new lines, comments
etc.
program :
LEX PART :
c_lex.l:
%{
#include "y.tab.h"
%}
%%
[ \t\n]+ /* Ignore spaces, tabs, and new lines */
"//"(.)* /* Ignore single-line comments */
"/*"([^*]|"*"+[^*/])*"*"+"/" /* Ignore multi-line comments */
"+" | "-" | "*" | "/" | "=" | "==" | "!=" | ">" | "<" | ">=" | "<=" {
return yytext[0]; }
"(" | ")" | "{" | "}" | ";" { return yytext[0]; }
%%
int yywrap() {
return 1;
}
YACC PART :
c_parser.y:
%{
#include <stdio.h>
int yylex();
void yyerror(const char *s);
%}
%%
program: statement
| program statement
;
type: INT
| FLOAT
| CHAR
;
%%
int main() {
yyparse();
return 0;
}
Output :
Program :
LEX PART :
C_Lexer.jflex:
import java.io.*;
%class CLexer
%type int
%public
%cup
%{
import java_cup.runtime.Symbol;
%}
%unicode
%line
%column
%{
private Yylex lexer;
private String filename;
public CLexer(Reader r) {
lexer = new Yylex(r, this);
}
WhiteSpace = [ \t\n\r\f]
LineTerminator = \r|\n|\r\n
%%
YACC PART :
C_Parser.cup :
import java_cup.runtime.*;
import java.io.*;
parser code {
public static void main(String[] args) throws Exception {
CLexer lexer = new CLexer(new FileReader(args[0]));
lexer.setFilename(args[0]);
parser p = new parser(lexer);
Symbol result = p.parse();
System.out.println("Parsing completed without errors.");
}
}
/* Non-terminals */
non terminal program, statement, type, expression;
/* Grammar rules */
program ::= statement
| program statement;
/* Terminals definition */
terminal INT: "int";
terminal FLOAT: "float";
terminal CHAR: "char";
terminal IF: "if";
terminal ELSE: "else";
terminal WHILE: "while";
terminal FOR: "for";
terminal NUM: < "[0-9]+", Integer.parseInt(yytext()) >;
terminal ID: < "[a-zA-Z_][a-zA-Z0-9_]*" >;
/* Error handling */
non terminal error;
Output :
Program :
LEX PART :
arith_id.l
%{
/* This LEX program returns the tokens for the expression */
#include “y.tab.h”
%}
%%
“=” {printf(“\n Operator is EQUAL”);}
“+” {printf(“\n Operator is PLUS”);}
“-“ {printf(“\n Operator is MINUS”);}
“/” {printf(“\n Operator is DIVISION”);}
“*” {printf(“\n Operator is MULTIPLICATION”);}
[a-z A-Z]*[0-9]* {
printf(“\n Identifier is %s”,yytext);
return ID;
}
return yytext[0];
\n return 0;
%%
int yywrap()
{
return 1;
}
YACC PART :
arith_id.y
%{
#include
/* This YYAC program is for recognizing the Expression */
%}
%%
statement: A’=’E
| E {
printf(“\n Valid arithmetic expression”);
$$ = $1;
};
E: E’+’ID
| E’-’ID
| E’*’ID
| E’/’ID
| ID
;
%%
extern FILE *yyin;
main()
{
do
{
yyparse();
}while(!feof(yyin));
}
yyerror(char*s)
{
}
Output :
a=3+5
identifier is a
operator is equal
identifier is 3
valid arithmetic expression error occured
operator is plus
identifier is 5
Program :
LEX PART :
%{
#include <stdio.h>
#include "cal.tab.h"
%%
[0-9]+ {
yylval = atoi(yytext);
return NUMBER;
}
[ \t] ;
[\n] return 0;
. return yytext[0];
%%
int yywrap()
{
return 1;
}
YACC Part :
%{
#include <stdio.h>
int flag = 0;
%}
%token NUMBER
%%
ArithmeticExpression: E {
printf("Result = %d\n", $1);
return 0;
};
E: E '+' E { $$ = $1 + $3; }
| E '-' E { $$ = $1 - $3; }
| E '*' E { $$ = $1 * $3; }
| E '/' E { $$ = $1 / $3; }
| E '%' E { $$ = $1 % $3; }
| '(' E ')' { $$ = $2; }
| NUMBER { $$ = $1; }
;
%%
int main()
{
printf("Enter an arithmetic expression that can have operations Addition,
Subtraction, Multiplication, Division, Modulus and Round brackets: ");
yyparse();
if (flag == 0) {
printf("Entered arithmetic expression is Valid\n");
}
return 0;
}
Output :
Enter an arithmetic expression that can have operations
Addition,Subtraction,Multiplication,Division,Modulus and Round brackets
:4+2+6*10
Result=66
Entered arithmetic expression is valid
program :
LEX PART :
variable_lex.l
%{
#include "y.tab.h"
%}
%%
[ \t\n]+ /* Ignore whitespace */
[a-zA-Z][a-zA-Z0-9]* { yylval.str = strdup(yytext); return VAR; }
. { return yytext[0]; }
%%
int yywrap() {
return 1;
}
YACC PART :
variable_parser.y
%{
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
%}
%token VAR
%%
%%
int main() {
yyparse();
return 0;
}
lex variable_lex.l
yacc -d variable_parser.y
gcc lex.yy.c y.tab.c -o variable_parser -ll
./variable_parser
Output :
example123
Valid variable: example123
(6)Write a LEX Program to convert the substring abc to ABC from the given
input string.
Program :
LEX PART :
convert_lex.l
%{
#include "y.tab.h"
%}
%%
"abc" { yylval.str = "ABC"; return CONVERT; }
. { return yytext[0]; }
%%
int yywrap() {
return 1;
}
YACC PART :
convert_parser.y
%{
#include <stdio.h>
#include <stdlib.h>
%}
%token CONVERT
%%
program:
| program CONVERT { printf("%s", $2); }
;
%%
int main() {
yyparse();
return 0;
}
lex convert_lex.l
yacc -d convert_parser.y
gcc lex.yy.c y.tab.c -o convert_parser -ll
./convert_parser
Output :
abc abc123
ABC ABC123
Program :
#include <stdio.h>
#include <regex.h>
int main() {
regex_t identifier_regex, constant_regex, operator_regex;
// Input strings
const char *inputs[] = {
"variable123",
"123",
"+",
"invalid123$",
"12.34"
};
// Free resources
regfree(&identifier_regex);
regfree(&constant_regex);
regfree(&operator_regex);
return 0;
}
Output :
variable123 is an identifier.
123 is a constant.
+ is an operator.
invalid123$ is not recognized.
12.34 is not recognized.
Program :
#include <stdio.h>
#include <stdbool.h>
#include <ctype.h>
switch (state) {
case 0:
if (isalpha(ch) || ch == '_') {
state = 1;
} else if (isdigit(ch)) {
state = 2;
} else if (ch == '+' || ch == '-' || ch == '*' || ch ==
'/') {
state = 3;
} else {
return INVALID;
}
break;
case 1:
if (!isIdentifierChar(ch)) {
return INVALID;
}
break;
case 2:
if (!isdigit(ch)) {
return INVALID;
}
break;
case 3:
// Operators are only one character
return OPERATOR;
}
}
switch (state) {
case 1: return IDENTIFIER;
case 2: return CONSTANT;
case 3: return OPERATOR;
default: return INVALID;
}
}
int main() {
const char *inputs[] = {
"variable123",
"123",
"+",
"invalid123$",
"12.34"
};
return 0;
}
Output :
variable123 is a Identifier
123 is a Constant
+ is a Operator
invalid123$ is a Invalid
12.34 is a Invalid
Program :
#include <stdio.h>
// Function to compute a complex expression
int computeExpression(int x, int y, int z) {
int intermediate_result = x * y + z;
return intermediate_result;
}
// Original expression
int original_result = computeExpression(x, y, z);
return 0;
}
int main() {
// Call the constant folding function
constantFoldingExample();
return 0;
}
Output :
Original Result: 17
Constant Folded Result: 17
(10)Write a program to Implement the back end of the compiler which takes
the three address code and produces the 8086 assembly language
instructions that can be assembled and run using a 8086 assembler. The
target assembly instructions can be simple move, add, sub, jump. Also
simple addressing modes are used.
Program :
#include <stdio.h>
void generateAssembly(int op, int arg1, int arg2, int result) {
switch (op) {
case 0: // Assignment
printf("MOV AX, [%d]\n", arg1);
printf("MOV [%d], AX\n", result);
break;
case 1: // Addition
printf("MOV AX, [%d]\n", arg1);
printf("ADD AX, [%d]\n", arg2);
printf("MOV [%d], AX\n", result);
break;
case 2: // Subtraction
printf("MOV AX, [%d]\n", arg1);
printf("SUB AX, [%d]\n", arg2);
printf("MOV [%d], AX\n", result);
break;
case 3: // Jump
printf("JMP L%d\n", arg1);
break;
default:
printf("Invalid operation\n");
break;
}
}
int main() {
// Sample three-address code
generateAssembly(0, 1000, 0, 2000); // a = b
generateAssembly(1, 2000, 3000, 4000); // c = a + d
generateAssembly(2, 4000, 5000, 6000); // e = c - f
generateAssembly(3, 7000, 0, 0); // Jump to label 0
return 0;
}
Output :
%{
int vowel_count = 0;
int consonant_count = 0;
%}
%%
[aeiouAEIOU] { vowel_count++; }
[a-zA-Z] { consonant_count++; }
[ \t\n] ; // skip whitespace
. ; // skip other characters
%%
int main() {
yylex();
printf("Total vowels: %d\n", vowel_count);
printf("Total consonants: %d\n", consonant_count);
return 0;
}
Output :
Program :
LEX PART :
recognize_control.l
%{
#include "y.tab.h"
%}
%%
"for" { return FOR; }
"while" { return WHILE; }
[ \t\n] ; // skip whitespace
. ; // skip other characters
%%
YACC PART :
recognize_control.y
%{
#include <stdio.h>
%}
%%
program : control_statement
| /* empty */
;
control_statement : for_loop
| while_loop
;
expr : /* empty */
;
%%
int main() {
yyparse();
return 0;
}
lex recognize_control.l
yacc -d recognize_control.y
gcc lex.yy.c y.tab.c -o recognize_control -ll
./recognize_control
Output :
(13)Write a program to Convert the BNF rules into Yacc form and write
code to generate abstract syntax tree.
Program :
LEX PART :
ast_lex.l
%{
#include "y.tab.h"
%}
%%
[0-9]+ { yylval.str = strdup(yytext); return NUMBER; }
[-+*/()\n] { return yytext[0]; }
[ \t] ; // Ignore whitespace
. ;
%%
int yywrap() {
return 1;
}
YACC PART :
ast_parser.y
%{
#include <stdio.h>
#include <stdlib.h>
struct Node {
char* value;
struct Node* left;
struct Node* right;
};
%token NUMBER
%left '+' '-'
%left '*' '/'
%%
%%
int main() {
yyparse();
return 0;
}
lex ast_lex.l
yacc -d ast_parser.y
gcc lex.yy.c y.tab.c -o ast_parser -ll
./ast_parser
Output :
5 + 3 * (2 - 1)
+
/ \
5 *
/ \
3 -
/ \
2 1
(14)Write a program to implement to recognize a valid control structures
syntax of C language using ifelse, if-else-if and switch-case.
Program :
LEX PART :
recognize_control.l
%{
#include "y.tab.h"
%}
%%
"if" { return IF; }
"else" { return ELSE; }
"switch" { return SWITCH; }
"case" { return CASE; }
"break" { return BREAK; }
"default" { return DEFAULT; }
[ \t\n] ; // skip whitespace
. ; // skip other characters
%%
YACC PART :
recognize_control.y
%{
#include <stdio.h>
%}
%%
program : control_statement
| /* empty */
;
control_statement : if_else_statement
| switch_case_statement
;
constant : NUMBER
;
expr : /* empty */
;
%%
int main() {
yyparse();
return 0;
}
lex recognize_control.l
yacc -d recognize_control.y
gcc lex.yy.c y.tab.c -o recognize_control -ll
./recognize_control
Output :
Program :
LEX PART :
type_check_lex.l
%{
#include "y.tab.h"
%}
%%
[0-9]+ { yylval.str = strdup(yytext); return NUMBER; }
[-+*/()\n] { return yytext[0]; }
[ \t] ; // Ignore whitespace
. ;
%%
int yywrap() {
return 1;
}
YACC PART :
type_check_parser.y
%{
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
%}
%token NUMBER
%%
checkTypes(type) {
// Perform type checking logic here
// This is a simplified example
printf("Type checked: %s\n", type);
}
%%
int main() {
yyparse();
return 0;
}
lex type_check_lex.l
yacc -d type_check_parser.y
gcc lex.yy.c y.tab.c -o type_check_parser -ll
./type_check_parser
Output :
5 + 3 * (2 - 1)
Type checked: int
Program :
#include <stdio.h>
int main() {
int i;
int n = 10;
return 0;
}
Output :
Original Code:
0 3 6 9 12 15 18 21 24 27
Program :
Program :
#include <stdio.h>
int main() {
int x = 5;
int y = 3;
int z = 7;
// Original code
int result1 = x * y + x * z;
printf("Original Code: %d\n", result1);
return 0;
}
Output :
Original Code: 50
Code after Algebraic Transformation: 50
(20)Write a program to generate three address code using LEX and YACC.
Program :
LEX PART :
three_address_lex.l
%{
#include "y.tab.h"
%}
%%
[0-9]+ { yylval.num = atoi(yytext); return NUM; }
[-+*/()\n] { return yytext[0]; }
[ \t] ; // Ignore whitespace
. ;
%%
int yywrap() {
return 1;
}
YACC PART :
three_address_parser.y
%{
#include <stdio.h>
#include <stdlib.h>
int tempCount = 1;
int newTemp() {
return tempCount++;
}
%}
%token NUM
%%
%%
int main() {
yyparse();
return 0;
}
lex three_address_lex.l
yacc -d three_address_parser.y
gcc lex.yy.c y.tab.c -o three_address_parser -ll
./three_address_parser
Output :
5 + 3 * (2 - 1)
Three Address Code:
t1 = 2 - 1
t2 = 3 * t1
t3 = 5 + t2