use flex to do scanning for assembler

git-svn-id: svn+ssh://svn.cprover.org/srv/svn/cbmc/trunk@2020 6afb6bc1-c8e4-404c-8f48-9ae832c5b171
This commit is contained in:
kroening 2013-01-21 13:32:53 +00:00
parent 47af99fdce
commit 64d040c8f6
9 changed files with 309 additions and 25 deletions

View File

@ -1,5 +1,6 @@
DIRS = ansi-c big-int cbmc cpp goto-cc goto-instrument goto-programs \
goto-symex langapi pointer-analysis solvers util linking xmllang
goto-symex langapi pointer-analysis solvers util linking xmllang \
assembler
all: cbmc.dir goto-cc.dir goto-instrument.dir
@ -14,7 +15,7 @@ $(patsubst %, %.dir, $(filter-out big-int util, $(DIRS))): util.dir
.PHONY: clean
languages: util.dir langapi.dir \
cpp.dir ansi-c.dir xmllang.dir
cpp.dir ansi-c.dir xmllang.dir assembler.dir
goto-instrument.dir: languages goto-programs.dir pointer-analysis.dir \
goto-symex.dir linking.dir

22
src/assembler/Makefile Normal file
View File

@ -0,0 +1,22 @@
SRC = lex.yy.cpp assembler_parser.cpp
INCLUDES= -I .. -I ../util
include ../config.inc
include ../common
CLEANFILES = assembler$(LIBEXT) \
lex.yy.cpp
all: assembler$(LIBEXT)
###############################################################################
lex.yy.cpp: scanner.l
$(LEX) -Pyyassembler -o$@ scanner.l
###############################################################################
assembler$(LIBEXT): $(OBJ)
$(LINKLIB)

View File

@ -0,0 +1,34 @@
/*******************************************************************\
Module:
Author: Daniel Kroening, kroening@kroening.com
\*******************************************************************/
#include <iostream>
#include "assembler_parser.h"
assembler_parsert assembler_parser;
/*******************************************************************\
Function: yyassemblererror
Inputs:
Outputs:
Purpose:
\*******************************************************************/
extern char *yyassemblertext;
int yyassemblererror(const std::string &error)
{
assembler_parser.parse_error(error, yyassemblertext);
return 0;
}

View File

@ -0,0 +1,58 @@
/*******************************************************************\
Module:
Author: Daniel Kroening, kroening@kroening.com
\*******************************************************************/
#ifndef CPROVER_ASSEMBLER_PARSER_H
#define CPROVER_ASSEMBLER_PARSER_H
#include <parser.h>
#include <expr.h>
int yyassemblerlex();
int yyassemblererror(const std::string &error);
void assembler_scanner_init();
class assembler_parsert:public parsert
{
public:
typedef std::vector<irept> instructiont;
std::list<instructiont> instructions;
void add_token(const irept &irep)
{
if(instructions.empty())
new_instruction();
instructions.back().push_back(irep);
}
void new_instruction()
{
instructions.push_back(instructiont());
}
assembler_parsert()
{
}
virtual bool parse()
{
yyassemblerlex();
return false;
}
virtual void clear()
{
parsert::clear();
instructions.clear();
//assembler_scanner_init();
}
};
extern assembler_parsert assembler_parser;
#endif

108
src/assembler/scanner.l Normal file
View File

@ -0,0 +1,108 @@
%option nounput
%{
#ifdef _WIN32
#define YY_NO_UNISTD_H
static int isatty(int) { return 0; }
#endif
#define PARSER assembler_parser
#define YYSTYPE unsigned
#undef ECHO
#define ECHO
#include "assembler_parser.h"
/*** macros for easier rule definition **********************************/
%}
delimiter [ \t\b\r]
newline [\n\f\v]|"\\\n"
whitespace {delimiter}+
ws {delimiter}*
ws_or_newline ({delimiter}|{newline})*
ucletter [A-Z]
lcletter [a-z]
letter ({ucletter}|{lcletter})
digit [0-9]
bindigit [01]
octdigit [0-7]
hexdigit [0-9a-fA-F]
identifier (({letter}|"_"|"$"|".")({letter}|{digit}|"_"|"$"|".")*)
integer {digit}+
binary {bindigit}+
bininteger "0"[bB]{bindigit}+{int_suffix}
decinteger [1-9]{digit}*{int_suffix}
octinteger "0"{octdigit}*{int_suffix}
hexinteger "0"[xX]{hexdigit}+{int_suffix}
integer_s {decinteger}|{bininteger}|{octinteger}|{hexinteger}
octchar "\\"{octdigit}{1,3}
hexchar "\\x"{hexdigit}+
escape_sequence [\\][^\n]
c_char [^'\\\n]|{escape_sequence}
s_char [^"\\\n]|{escape_sequence}
char_lit ("L"|"u"|"U")?[']{c_char}+[']
string_lit ("L"|"u"|"U"|"u8")?["]{s_char}*["]
%x GRAMMAR
%x LINE_COMMENT
%{
void assemlber_scanner_init()
{
YY_FLUSH_BUFFER;
BEGIN(0);
}
%}
%%
<INITIAL>.|\n { BEGIN(GRAMMAR);
yyless(0); /* start again with this character */
}
<GRAMMAR>"#" { PARSER.new_instruction(); BEGIN(LINE_COMMENT); } /* begin comment state */
<LINE_COMMENT>{
\n { BEGIN(GRAMMAR); } /* end comment state, back GRAMMAR */
.* { } /* all characters within comments are ignored */
}
<GRAMMAR>{newline} { PARSER.new_instruction(); }
<GRAMMAR>";" { PARSER.new_instruction(); }
<GRAMMAR>{whitespace} { } /* skipped */
%{
/*** keywords ***/
%}
<GRAMMAR>{
".data" { }
}
%{
/*** rest ***/
%}
<GRAMMAR>{
{ws} { /* ignore */ }
{newline} { /* ignore */ }
{identifier} { irept identifier(ID_symbol);
identifier.set(ID_identifier, yytext);
PARSER.add_token(identifier); }
">>" { PARSER.add_token(irept(ID_shr)); }
"<<" { PARSER.add_token(irept(ID_shl)); }
. { std::string s;
s+=yytext[0];
PARSER.add_token(irept(s)); }
}
<<EOF>> { yyterminate(); /* done! */ }
%%
int yywrap() { return 1; }

View File

@ -11,6 +11,7 @@ OBJ += ../ansi-c/ansi-c$(LIBEXT) \
../pointer-analysis/pointer-analysis$(LIBEXT) \
../langapi/langapi$(LIBEXT) \
../xmllang/xmllang$(LIBEXT) \
../assembler/assembler$(LIBEXT) \
../solvers/solvers$(LIBEXT) \
../util/util$(LIBEXT)

View File

@ -12,6 +12,7 @@ OBJ += ../big-int/bigint$(OBJEXT) \
../linking/linking$(LIBEXT) \
../ansi-c/ansi-c$(LIBEXT) \
../xmllang/xmllang$(LIBEXT) \
../assembler/assembler$(LIBEXT) \
../langapi/langapi$(LIBEXT)
INCLUDES= -I .. -I ../util

View File

@ -15,6 +15,7 @@ OBJ += ../ansi-c/ansi-c$(LIBEXT) \
../big-int/bigint$(OBJEXT) \
../goto-programs/goto-programs$(LIBEXT) \
../goto-symex/goto-symex$(LIBEXT) \
../assembler/assembler$(LIBEXT) \
../pointer-analysis/pointer-analysis$(LIBEXT) \
../langapi/langapi$(LIBEXT) \
../util/util$(LIBEXT)

View File

@ -7,6 +7,7 @@ Author: Daniel Kroening, kroening@kroening.com
\*******************************************************************/
#include <ansi-c/string_constant.h>
#include <assembler/assembler_parser.h>
#include "goto_convert_class.h"
@ -22,36 +23,73 @@ Function: goto_convertt::convert_asm
\*******************************************************************/
void goto_convertt::convert_asm(const codet &code, goto_programt &dest)
void goto_convertt::convert_asm(
const codet &code,
goto_programt &dest)
{
if(code.get(ID_flavor)==ID_gcc)
{
const irep_idt &i_str=
to_string_constant(code.op0()).get_value();
//std::cout << "DOING " << i_str << std::endl;
std::istringstream str(id2string(i_str));
assembler_parser.clear();
assembler_parser.in=&str;
assembler_parser.parse();
goto_programt tmp_dest;
bool unknown=false;
bool lock_prefix=false;
std::string line;
while(std::getline(str, line))
for(std::list<assembler_parsert::instructiont>::const_iterator
it=assembler_parser.instructions.begin();
it!=assembler_parser.instructions.end();
it++)
{
// remove comments
std::string::size_type c=line.find("#");
if(c!=std::string::npos) line.resize(c);
// remove leading whitespace
c=line.find_first_not_of(" \t");
if(c!=std::string::npos) line.erase(0, c);
// remove trailing whitespace
c=line.find_last_not_of(" \t");
if(c!=std::string::npos) line.resize(c+1);
if(line.empty()) continue;
if(line=="mfence") // x86
const assembler_parsert::instructiont &instruction=*it;
if(instruction.empty()) continue;
#if 0
std::cout << "A ********************\n";
for(assembler_parsert::instructiont::const_iterator
t_it=instruction.begin();
t_it!=instruction.end();
t_it++)
{
std::cout << "XX: " << t_it->pretty() << std::endl;
}
std::cout << "B ********************\n";
#endif
// deal with prefixes
irep_idt command;
unsigned pos=0;
if(pos<instruction.size() &&
instruction[pos].id()==ID_symbol &&
instruction[pos].get(ID_identifier)=="lock")
{
lock_prefix=true;
pos++;
}
// done?
if(pos==instruction.size())
continue;
if(pos<instruction.size() &&
instruction[pos].id()==ID_symbol)
{
command=instruction[pos].get(ID_identifier);
pos++;
}
if(command=="mfence" ||
command=="lfence" ||
command=="sfence") // x86
{
goto_programt::targett t=tmp_dest.add_instruction(OTHER);
t->location=code.location();
@ -62,7 +100,24 @@ void goto_convertt::convert_asm(const codet &code, goto_programt &dest)
t->code.set(ID_RWfence, true);
t->code.set(ID_WRfence, true);
}
else if(line=="sync") // Power
else if(command=="addl") // x86
{
// hack for now to do lock addl as barrier
if(lock_prefix)
{
goto_programt::targett t=tmp_dest.add_instruction(OTHER);
t->location=code.location();
t->code=codet(ID_fence);
t->code.location()=code.location();
t->code.set(ID_WWfence, true);
t->code.set(ID_RRfence, true);
t->code.set(ID_RWfence, true);
t->code.set(ID_WRfence, true);
}
else
unknown=true;
}
else if(command=="sync") // Power
{
goto_programt::targett t=tmp_dest.add_instruction(OTHER);
t->location=code.location();
@ -77,7 +132,7 @@ void goto_convertt::convert_asm(const codet &code, goto_programt &dest)
t->code.set(ID_RRcumul, true);
t->code.set(ID_WRcumul, true);
}
else if(line=="lwsync") // Power
else if(command=="lwsync") // Power
{
goto_programt::targett t=tmp_dest.add_instruction(OTHER);
t->location=code.location();
@ -90,7 +145,7 @@ void goto_convertt::convert_asm(const codet &code, goto_programt &dest)
t->code.set(ID_RWcumul, true);
t->code.set(ID_RRcumul, true);
}
else if(line=="isync") // Power
else if(command=="isync") // Power
{
goto_programt::targett t=tmp_dest.add_instruction(OTHER);
t->location=code.location();
@ -99,7 +154,7 @@ void goto_convertt::convert_asm(const codet &code, goto_programt &dest)
// doesn't do anything by itself,
// needs to be combined with branch
}
else if(line=="dmb" || line=="dsb") // ARM
else if(command=="dmb" || command=="dsb") // ARM
{
goto_programt::targett t=tmp_dest.add_instruction(OTHER);
t->location=code.location();
@ -114,7 +169,7 @@ void goto_convertt::convert_asm(const codet &code, goto_programt &dest)
t->code.set(ID_RRcumul, true);
t->code.set(ID_WRcumul, true);
}
else if(line=="isb") // ARM
else if(command=="isb") // ARM
{
goto_programt::targett t=tmp_dest.add_instruction(OTHER);
t->location=code.location();
@ -125,6 +180,9 @@ void goto_convertt::convert_asm(const codet &code, goto_programt &dest)
}
else
unknown=true; // give up
// clean up prefixes
lock_prefix=false;
}
if(unknown)