Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                

Lecture 04

Download as pptx, pdf, or txt
Download as pptx, pdf, or txt
You are on page 1of 35

Compiler

Construction
Abdul Mateen
Lecture 03
Lexical Analyzer Generators
 Flex
generates lexical analyzer in C
or C++
 Jlex
written in Java. Generates
lexical analyzer in Java
2
Using Flex
 Provide a specification file
 Flex reads this file and
produces C or C++ output file
contains the scanner.
 The file consists of three
sections
3
Using Flex
 Provide a specification file
 Flex reads this file and
produces C or C++ output file
contains the scanner.
 The file consists of three
sections
4
Using Flex
 Provide a specification file
 Flex reads this file and
produces C or C++ output file
contains the scanner.
 The file consists of three
sections
5
Flex Specification File
1 C or C++ and flex definitions

6
Flex Specification File
1 C or C++ and flex definitions
%%
2 token definitions and actions

7
Flex Specification File
1 C or C++ and flex definitions
%%
2 token definitions and actions

%%
3 user code

8
Specification File lex.l
%{
#include “tokdefs.h”
%}
D [0-9]
L [a-zA-Z_]
id {L}({L}|{D})*
%%
"void" {return(TOK_VOID);}
"int" {return(TOK_INT);}
"if" {return(TOK_IF);}
9
Specification File lex.l
"else" {return(TOK_ELSE);}
"while"{return(TOK_WHILE)};
"<=" {return(TOK_LE);}
">=" {return(TOK_GE);}
"==" {return(TOK_EQ);}
"!=" {return(TOK_NE);}
{D}+ {return(TOK_INT);}
{id} {return(TOK_ID);}
[\n]|[\t]|[ ];
%% 10
File tokdefs.h
#define TOK_VOID 1
#define TOK_INT 2
#define TOK_IF 3
#define TOK_ELSE 4
#define TOK_WHILE 5
#define TOK_LE 6
#define TOK_GE 7
#define TOK_EQ 8
#define TOK_NE 9
#define TOK_INT 10
#define TOK_ID 111

11
Invoking Flex

lex.l flex lex.cpp

12
Using Generated Scanner
void main()
{
FlexLexer lex;
int tc = lex.yylex();
while(tc != 0)
cout << tc << “,”
<<lex.YYText() << endl;
tc = lex.yylex();
}
13
Creating Scanner EXE
flex lex.l
g++ –c lex.cpp
g++ –c main.cpp
g++ –o lex.exe lex.o main.o

lex <main.cpp

14
Input Tokenized
dos> .\lex < main.cpp
259,void
258,main
283,(
284,)
285,{
258,FlexLexer
258,lex
290,;
260,int
15
Input Tokenized
258,tc
266,=
258,lex
291,.
258,yylex
283,(
284,)
290,;
263,while
16
Input Tokenized
283,(
258,tc
276,!=
257,0
284,)
258,cout
279,<<
258,tc
17
Input Tokenized
279,<<
292,","
279,<<
258,lex
291,.
258,YYText
283,(
284,)
279,<<
18
Input Tokenized
258,endl
290,;
258,tc
266,=
258,lex
291,.
258,yylex
283,(
284,)
290,;
286,} 19
Flex Input for C++
/*
* ISO C++ lexical analyzer.
* Based on the ISO C++ draft standard of December
'96.
*/

%{
#include <ctype.h>
#include <stdio.h>
#include “tokdefs.h"

int lineno;

static int yywrap(void);


static void skip_until_eol(void);
static void skip_comment(void);
static int check_identifier(const char *);
%}

20
intsuffix ([uU][lL]?)|([lL][uU]?)
fracconst ([0-9]*\.[0-9]+)|([0-9]+\.)
exppart [eE][-+]?[0-9]+
floatsuffix [fFlL]
chartext ([^'])|(\\.)
stringtext ([^"])|(\\.)
%%

21
%%
"\n" { ++lineno; }
[\t\f\v\r ]+ { /* Ignore whitespace. */ }

"/*" { skip_comment(); }
"//" { skip_until_eol(); }

"{" { return '{'; }


"<%" { return '{'; }
"}" { return '}'; }
"%>" { return '}'; }
"[" { return '['; }
"<:" { return '['; }

22
"]" { return ']'; }
":>" { return ']'; }
"(" { return '('; }
")" { return ')'; }
";" { return ';'; }
":" { return ':'; }
"..." { return ELLIPSIS; }
"?" { return '?'; }
"::" { return COLONCOLON; }
"." { return '.'; }
".*" { return DOTSTAR; }
"+" { return '+'; }
"-" { return '-'; }
"*" { return '*'; }
"/" { return '/'; }
"%" { return '%'; }
"^" { return '^'; }
"xor" { return '^'; }
"&" { return '&'; }
"bitand" { return '&'; }

23
"|" { return '|'; }
"bitor" { return '|'; }
"~" { return '~'; }
"compl" { return '~'; }
"!" { return '!'; }
"not" { return '!'; }
"=" { return '='; }
"<" { return '<'; }
">" { return '>'; }
"+=" { return ADDEQ; }
"-=" { return SUBEQ; }
"*=" { return MULEQ; }
"/=" { return DIVEQ; }
"%=" { return MODEQ; }
"^=" { return XOREQ; }
"xor_eq" { return XOREQ; }
"&=" { return ANDEQ; }
"and_eq" { return ANDEQ; }
"|=" { return OREQ; }
"or_eq" { return OREQ; }

24
"<<" { return SL; }
">>" { return SR; }
"<<=" { return SLEQ; }
">>=" { return SREQ; }
"==" { return EQ; }
"!=" { return NOTEQ; }
"not_eq" { return NOTEQ; }
"<=" { return LTEQ; }
">=" { return GTEQ; }
"&&" { return ANDAND; }
"and" { return ANDAND; }
"||" { return OROR; }
"or" { return OROR; }
"++" { return PLUSPLUS; }
"--" { return MINUSMINUS; }
"," { return ','; }
"->*" { return ARROWSTAR; }
"->" { return ARROW; }

25
"asm" { return ASM; }
"auto" { return AUTO; }
"bool" { return BOOL; }
"break" { return BREAK; }
"case" { return CASE; }
"catch" { return CATCH; }
"char" { return CHAR; }
"class" { return CLASS; }
"const" { return CONST; }
"const_cast" { return CONST_CAST; }
"continue" { return CONTINUE; }
"default" { return DEFAULT; }
"delete" { return DELETE; }
"do" { return DO; }
"double" { return DOUBLE; }
"dynamic_cast" { return DYNAMIC_CAST; }
"else" { return ELSE; }
"enum" { return ENUM; }
"explicit" { return EXPLICIT; }
"export" { return EXPORT; }

26
"extern" { return EXTERN; }
"false" { return FALSE; }
"float" { return FLOAT; }
"for" { return FOR; }
"friend" { return FRIEND; }
"goto" { return GOTO; }
"if" { return IF; }
"inline" { return INLINE; }
"int" { return INT; }
"long" { return LONG; }
"mutable" { return MUTABLE; }
"namespace" { return NAMESPACE; }
"new" { return NEW; }
"operator" { return OPERATOR; }
"private" { return PRIVATE; }
"protected" { return PROTECTED; }
"public" { return PUBLIC; }
"register" { return REGISTER; }
"reinterpret_cast" { return REINTERPRET_CAST; }
"return" { return RETURN; }

27
"short" { return SHORT; }
"signed" { return SIGNED; }
"sizeof" { return SIZEOF; }
"static" { return STATIC; }
"static_cast" { return STATIC_CAST; }
"struct" { return STRUCT; }
"switch" { return SWITCH; }
"template" { return TEMPLATE; }
"this" { return THIS; }
"throw" { return THROW; }
"true" { return TRUE; }
"try" { return TRY; }
"typedef" { return TYPEDEF; }
"typeid" { return TYPEID; }
"typename" { return TYPENAME; }
"union" { return UNION; }
"unsigned" { return UNSIGNED; }
"using" { return USING; }
"virtual" { return VIRTUAL; }
"void" { return VOID; }

28
"volatile" { return VOLATILE; }
"wchar_t" { return WCHAR_T; }
"while" { return WHILE; }

[a-zA-Z_][a-zA-Z_0-9]*
{ return check_identifier(yytext); }

"0"[xX][0-9a-fA-F]+{intsuffix}? { return INTEGER; }


"0"[0-7]+{intsuffix}? { return INTEGER; }
[0-9]+{intsuffix}? { return INTEGER; }

29
{fracconst}{exppart}?{floatsuffix}? { return
FLOATING; }
[0-9]+{exppart}{floatsuffix}? { return FLOATING; }

"'"{chartext}*"'" { return CHARACTER; }


"L'"{chartext}*"'" { return CHARACTER; }

"\""{stringtext}*"\"" { return STRING; }


"L\""{stringtext}*"\"" { return STRING; }

30
. { fprintf(stderr,
"%d: unexpected character `%c'\n", lineno,
yytext[0]); }

%%

static int
yywrap(void)
{
return 1;
}

31
static void
skip_comment(void)
{
int c1, c2;

c1 = input();
c2 = input();

while(c2 != EOF && !(c1 == '*' && c2 == '/'))


{
if (c1 == '\n')
++lineno;
c1 = c2;
c2 = input();
}
}

32
static void
skip_until_eol(void)
{
int c;

while ((c = input()) != EOF && c != '\n')


;
++lineno;
}

33
static int
check_identifier(const char *s)
{
/*
* This function should check if `s' is a
* typedef name or a class
* name, or a enum name, ... etc. or
* an identifier.
*/
switch (s[0]) {
case 'D': return TYPEDEF_NAME;
case 'N': return NAMESPACE_NAME;
case 'C': return CLASS_NAME;
case 'E': return ENUM_NAME;
case 'T': return TEMPLATE_NAME;
}
return IDENTIFIER;
}

34
Thanks

35

You might also like