KModule.cpp

Go to the documentation of this file.
00001 //===-- KModule.cpp -------------------------------------------------------===//
00002 //
00003 //                     The KLEE Symbolic Virtual Machine
00004 //
00005 // This file is distributed under the University of Illinois Open Source
00006 // License. See LICENSE.TXT for details.
00007 //
00008 //===----------------------------------------------------------------------===//
00009 
00010 // FIXME: This does not belong here.
00011 #include "../Core/Common.h"
00012 
00013 #include "klee/Internal/Module/KModule.h"
00014 
00015 #include "Passes.h"
00016 
00017 #include "klee/Interpreter.h"
00018 #include "klee/Internal/Module/Cell.h"
00019 #include "klee/Internal/Module/KInstruction.h"
00020 #include "klee/Internal/Module/InstructionInfoTable.h"
00021 #include "klee/Internal/Support/ModuleUtil.h"
00022 
00023 #include "llvm/Bitcode/ReaderWriter.h"
00024 #include "llvm/Instructions.h"
00025 #include "llvm/Module.h"
00026 #include "llvm/PassManager.h"
00027 #include "llvm/ValueSymbolTable.h"
00028 #include "llvm/Support/CommandLine.h"
00029 #include "llvm/System/Path.h"
00030 #include "llvm/Target/TargetData.h"
00031 #include "llvm/Transforms/Scalar.h"
00032 
00033 #include <sstream>
00034 
00035 using namespace llvm;
00036 using namespace klee;
00037 
00038 namespace {
00039   enum SwitchImplType {
00040     eSwitchTypeSimple,
00041     eSwitchTypeLLVM,
00042     eSwitchTypeInternal
00043   };
00044 
00045   cl::list<std::string>
00046   MergeAtExit("merge-at-exit");
00047     
00048   cl::opt<bool>
00049   NoTruncateSourceLines("no-truncate-source-lines",
00050                         cl::desc("Don't truncate long lines in the output source"));
00051 
00052   cl::opt<bool>
00053   OutputSource("output-source",
00054                cl::desc("Write the assembly for the final transformed source"),
00055                cl::init(true));
00056 
00057   cl::opt<bool>
00058   OutputModule("output-module",
00059                cl::desc("Write the bitcode for the final transformed module"),
00060                cl::init(false));
00061 
00062   cl::opt<SwitchImplType>
00063   SwitchType("switch-type", cl::desc("Select the implementation of switch"),
00064              cl::values(clEnumValN(eSwitchTypeSimple, "simple", 
00065                                    "lower to ordered branches"),
00066                         clEnumValN(eSwitchTypeLLVM, "llvm", 
00067                                    "lower using LLVM"),
00068                         clEnumValN(eSwitchTypeInternal, "internal", 
00069                                    "execute switch internally"),
00070                         clEnumValEnd),
00071              cl::init(eSwitchTypeInternal));
00072   
00073   cl::opt<bool>
00074   DebugPrintEscapingFunctions("debug-print-escaping-functions", 
00075                               cl::desc("Print functions whose address is taken."));
00076 }
00077 
00078 KModule::KModule(Module *_module) 
00079   : module(_module),
00080     targetData(new TargetData(module)),
00081     dbgStopPointFn(0),
00082     kleeMergeFn(0),
00083     infos(0),
00084     constantTable(0) {
00085 }
00086 
00087 KModule::~KModule() {
00088   delete[] constantTable;
00089   delete infos;
00090 
00091   for (std::vector<KFunction*>::iterator it = functions.begin(), 
00092          ie = functions.end(); it != ie; ++it)
00093     delete *it;
00094 
00095   delete targetData;
00096   delete module;
00097 }
00098 
00099 /***/
00100 
00101 namespace llvm {
00102 extern void Optimize(Module*);
00103 }
00104 
00105 // what a hack
00106 static Function *getStubFunctionForCtorList(Module *m,
00107                                             GlobalVariable *gv, 
00108                                             std::string name) {
00109   assert(!gv->isDeclaration() && !gv->hasInternalLinkage() &&
00110          "do not support old LLVM style constructor/destructor lists");
00111   
00112   std::vector<const Type*> nullary;
00113 
00114   Function *fn = Function::Create(FunctionType::get(Type::VoidTy, 
00115                                                     nullary, false),
00116                                   GlobalVariable::InternalLinkage, 
00117                                   name,
00118                               m);
00119   BasicBlock *bb = BasicBlock::Create("entry", fn);
00120   
00121   // From lli:
00122   // Should be an array of '{ int, void ()* }' structs.  The first value is
00123   // the init priority, which we ignore.
00124   ConstantArray *arr = dyn_cast<ConstantArray>(gv->getInitializer());
00125   if (arr) {
00126     for (unsigned i=0; i<arr->getNumOperands(); i++) {
00127       ConstantStruct *cs = cast<ConstantStruct>(arr->getOperand(i));
00128       assert(cs->getNumOperands()==2 && "unexpected element in ctor initializer list");
00129       
00130       Constant *fp = cs->getOperand(1);      
00131       if (!fp->isNullValue()) {
00132         if (llvm::ConstantExpr *ce = dyn_cast<llvm::ConstantExpr>(fp))
00133           fp = ce->getOperand(0);
00134 
00135         if (Function *f = dyn_cast<Function>(fp)) {
00136           CallInst::Create(f, "", bb);
00137         } else {
00138           assert(0 && "unable to get function pointer from ctor initializer list");
00139         }
00140       }
00141     }
00142   }
00143   
00144   ReturnInst::Create(bb);
00145 
00146   return fn;
00147 }
00148 
00149 static void injectStaticConstructorsAndDestructors(Module *m) {
00150   GlobalVariable *ctors = m->getNamedGlobal("llvm.global_ctors");
00151   GlobalVariable *dtors = m->getNamedGlobal("llvm.global_dtors");
00152   
00153   if (ctors || dtors) {
00154     Function *mainFn = m->getFunction("main");
00155     assert(mainFn && "unable to find main function");
00156 
00157     if (ctors)
00158     CallInst::Create(getStubFunctionForCtorList(m, ctors, "klee.ctor_stub"),
00159                      "", mainFn->begin()->begin());
00160     if (dtors) {
00161       Function *dtorStub = getStubFunctionForCtorList(m, dtors, "klee.dtor_stub");
00162       for (Function::iterator it = mainFn->begin(), ie = mainFn->end();
00163            it != ie; ++it) {
00164         if (isa<ReturnInst>(it->getTerminator()))
00165           CallInst::Create(dtorStub, "", it->getTerminator());
00166       }
00167     }
00168   }
00169 }
00170 
00171 static void forceImport(Module *m, const char *name, const Type *retType, ...) {
00172   // If module lacks an externally visible symbol for the name then we
00173   // need to create one. We have to look in the symbol table because
00174   // we want to check everything (global variables, functions, and
00175   // aliases).
00176 
00177   Value *v = m->getValueSymbolTable().lookup(name);
00178   GlobalValue *gv = dyn_cast_or_null<GlobalValue>(v);
00179 
00180   if (!gv || gv->hasInternalLinkage()) {
00181     va_list ap;
00182 
00183     va_start(ap, retType);
00184     std::vector<const Type *> argTypes;
00185     while (const Type *t = va_arg(ap, const Type*))
00186       argTypes.push_back(t);
00187     va_end(ap);
00188 
00189     m->getOrInsertFunction(name, FunctionType::get(retType, argTypes, false));
00190   }
00191 }
00192 
00193 void KModule::prepare(const Interpreter::ModuleOptions &opts,
00194                       InterpreterHandler *ih) {
00195   if (!MergeAtExit.empty()) {
00196     Function *mergeFn = module->getFunction("klee_merge");
00197     if (!mergeFn) {
00198       const llvm::FunctionType *Ty = 
00199         FunctionType::get(Type::VoidTy, std::vector<const Type*>(), false);
00200       mergeFn = Function::Create(Ty, GlobalVariable::ExternalLinkage,
00201                                  "klee_merge",
00202                                  module);
00203     }
00204 
00205     for (cl::list<std::string>::iterator it = MergeAtExit.begin(), 
00206            ie = MergeAtExit.end(); it != ie; ++it) {
00207       std::string &name = *it;
00208       Function *f = module->getFunction(name);
00209       if (!f) {
00210         klee_error("cannot insert merge-at-exit for: %s (cannot find)",
00211                    name.c_str());
00212       } else if (f->isDeclaration()) {
00213         klee_error("cannot insert merge-at-exit for: %s (external)",
00214                    name.c_str());
00215       }
00216 
00217       BasicBlock *exit = BasicBlock::Create("exit", f);
00218       PHINode *result = 0;
00219       if (f->getReturnType() != Type::VoidTy)
00220         result = PHINode::Create(f->getReturnType(), "retval", exit);
00221       CallInst::Create(mergeFn, "", exit);
00222       ReturnInst::Create(result, exit);
00223 
00224       llvm::cerr << "KLEE: adding klee_merge at exit of: " << name << "\n";
00225       for (llvm::Function::iterator bbit = f->begin(), bbie = f->end(); 
00226            bbit != bbie; ++bbit) {
00227         if (&*bbit != exit) {
00228           Instruction *i = bbit->getTerminator();
00229           if (i->getOpcode()==Instruction::Ret) {
00230             if (result) {
00231               result->addIncoming(i->getOperand(0), bbit);
00232             }
00233             i->eraseFromParent();
00234             BranchInst::Create(exit, bbit);
00235           }
00236         }
00237       }
00238     }
00239   }
00240 
00241   // Inject checks prior to optimization... we also perform the
00242   // invariant transformations that we will end up doing later so that
00243   // optimize is seeing what is as close as possible to the final
00244   // module.
00245   PassManager pm;
00246   pm.add(new RaiseAsmPass());
00247   if (opts.CheckDivZero) pm.add(new DivCheckPass());
00248   // FIXME: This false here is to work around a bug in
00249   // IntrinsicLowering which caches values which may eventually be
00250   // deleted (via RAUW). This can be removed once LLVM fixes this
00251   // issue.
00252   pm.add(new IntrinsicCleanerPass(*targetData, false));
00253   pm.run(*module);
00254 
00255   if (opts.Optimize)
00256     Optimize(module);
00257 
00258   // Force importing functions required by intrinsic lowering. Kind of
00259   // unfortunate clutter when we don't need them but we won't know
00260   // that until after all linking and intrinsic lowering is
00261   // done. After linking and passes we just try to manually trim these
00262   // by name. We only add them if such a function doesn't exist to
00263   // avoid creating stale uses.
00264 
00265   forceImport(module, "memcpy", PointerType::getUnqual(Type::Int8Ty),
00266               PointerType::getUnqual(Type::Int8Ty),
00267               PointerType::getUnqual(Type::Int8Ty),
00268               targetData->getIntPtrType(), (Type*) 0);
00269   forceImport(module, "memmove", PointerType::getUnqual(Type::Int8Ty),
00270               PointerType::getUnqual(Type::Int8Ty),
00271               PointerType::getUnqual(Type::Int8Ty),
00272               targetData->getIntPtrType(), (Type*) 0);
00273   forceImport(module, "memset", PointerType::getUnqual(Type::Int8Ty),
00274               PointerType::getUnqual(Type::Int8Ty),
00275               Type::Int32Ty,
00276               targetData->getIntPtrType(), (Type*) 0);
00277 
00278   // FIXME: Missing force import for various math functions.
00279 
00280   // FIXME: Find a way that we can test programs without requiring
00281   // this to be linked in, it makes low level debugging much more
00282   // annoying.
00283   llvm::sys::Path path(opts.LibraryDir);
00284   path.appendComponent("libintrinsic.bca");
00285   module = linkWithLibrary(module, path.c_str());
00286 
00287   // Needs to happen after linking (since ctors/dtors can be modified)
00288   // and optimization (since global optimization can rewrite lists).
00289   injectStaticConstructorsAndDestructors(module);
00290 
00291   // Finally, run the passes that maintain invariants we expect during
00292   // interpretation. We run the intrinsic cleaner just in case we
00293   // linked in something with intrinsics but any external calls are
00294   // going to be unresolved. We really need to handle the intrinsics
00295   // directly I think?
00296   PassManager pm3;
00297   pm3.add(createCFGSimplificationPass());
00298   switch(SwitchType) {
00299   case eSwitchTypeInternal: break;
00300   case eSwitchTypeSimple: pm3.add(new LowerSwitchPass()); break;
00301   case eSwitchTypeLLVM:  pm3.add(createLowerSwitchPass()); break;
00302   default: klee_error("invalid --switch-type");
00303   }
00304   pm3.add(new IntrinsicCleanerPass(*targetData));
00305   pm3.add(new PhiCleanerPass());
00306   pm3.run(*module);
00307 
00308   // For cleanliness see if we can discard any of the functions we
00309   // forced to import.
00310   Function *f;
00311   f = module->getFunction("memcpy");
00312   if (f && f->use_empty()) f->eraseFromParent();
00313   f = module->getFunction("memmove");
00314   if (f && f->use_empty()) f->eraseFromParent();
00315   f = module->getFunction("memset");
00316   if (f && f->use_empty()) f->eraseFromParent();
00317 
00318 
00319   // Write out the .ll assembly file. We truncate long lines to work
00320   // around a kcachegrind parsing bug (it puts them on new lines), so
00321   // that source browsing works.
00322   if (OutputSource) {
00323     std::ostream *os = ih->openOutputFile("assembly.ll");
00324     assert(os && os->good() && "unable to open source output");
00325 
00326     // We have an option for this in case the user wants a .ll they
00327     // can compile.
00328     if (NoTruncateSourceLines) {
00329       *os << *module;
00330     } else {
00331       bool truncated = false;
00332       std::stringstream buffer;
00333       buffer << *module;
00334       std::string string = buffer.str();
00335       const char *position = string.c_str();
00336 
00337       for (;;) {
00338         const char *end = index(position, '\n');
00339         if (!end) {
00340           *os << position;
00341           break;
00342         } else {
00343           unsigned count = (end - position) + 1;
00344           if (count<255) {
00345             os->write(position, count);
00346           } else {
00347             os->write(position, 254);
00348             *os << "\n";
00349             truncated = true;
00350           }
00351           position = end+1;
00352         }
00353       }
00354     }
00355 
00356     delete os;
00357   }
00358 
00359   if (OutputModule) {
00360     std::ostream *f = ih->openOutputFile("final.bc");
00361     WriteBitcodeToFile(module, *f);
00362     delete f;
00363   }
00364 
00365   dbgStopPointFn = module->getFunction("llvm.dbg.stoppoint");
00366   kleeMergeFn = module->getFunction("klee_merge");
00367 
00368   /* Build shadow structures */
00369 
00370   infos = new InstructionInfoTable(module);  
00371   
00372   for (Module::iterator it = module->begin(), ie = module->end();
00373        it != ie; ++it) {
00374     if (it->isDeclaration())
00375       continue;
00376 
00377     KFunction *kf = new KFunction(it, this);
00378     
00379     for (unsigned i=0; i<kf->numInstructions; ++i) {
00380       KInstruction *ki = kf->instructions[i];
00381       ki->info = &infos->getInfo(ki->inst);
00382     }
00383 
00384     functions.push_back(kf);
00385     functionMap.insert(std::make_pair(it, kf));
00386   }
00387 
00388   /* Compute various interesting properties */
00389 
00390   for (std::vector<KFunction*>::iterator it = functions.begin(), 
00391          ie = functions.end(); it != ie; ++it) {
00392     KFunction *kf = *it;
00393     if (functionEscapes(kf->function))
00394       escapingFunctions.insert(kf->function);
00395   }
00396 
00397   if (DebugPrintEscapingFunctions && !escapingFunctions.empty()) {
00398     llvm::cerr << "KLEE: escaping functions: [";
00399     for (std::set<Function*>::iterator it = escapingFunctions.begin(), 
00400          ie = escapingFunctions.end(); it != ie; ++it) {
00401       llvm::cerr << (*it)->getName() << ", ";
00402     }
00403     llvm::cerr << "]\n";
00404   }
00405 }
00406 
00407 KConstant* KModule::getKConstant(Constant *c) {
00408   std::map<llvm::Constant*, KConstant*>::iterator it = constantMap.find(c);
00409   if (it != constantMap.end())
00410     return it->second;
00411   return NULL;
00412 }
00413 
00414 unsigned KModule::getConstantID(Constant *c, KInstruction* ki) {
00415   KConstant *kc = getKConstant(c);
00416   if (kc)
00417     return kc->id;  
00418 
00419   unsigned id = constants.size();
00420   kc = new KConstant(c, id, ki);
00421   constantMap.insert(std::make_pair(c, kc));
00422   constants.push_back(c);
00423   return id;
00424 }
00425 
00426 /***/
00427 
00428 KConstant::KConstant(llvm::Constant* _ct, unsigned _id, KInstruction* _ki) {
00429   ct = _ct;
00430   id = _id;
00431   ki = _ki;
00432 }
00433 
00434 /***/
00435 
00436 KFunction::KFunction(llvm::Function *_function,
00437                      KModule *km) 
00438   : function(_function),
00439     numArgs(function->arg_size()),
00440     numInstructions(0),
00441     trackCoverage(true) {
00442   for (llvm::Function::iterator bbit = function->begin(), 
00443          bbie = function->end(); bbit != bbie; ++bbit) {
00444     BasicBlock *bb = bbit;
00445     basicBlockEntry[bb] = numInstructions;
00446     numInstructions += bb->size();
00447   }
00448 
00449   instructions = new KInstruction*[numInstructions];
00450 
00451   std::map<Instruction*, unsigned> registerMap;
00452 
00453   // The first arg_size() registers are reserved for formals.
00454   unsigned rnum = numArgs;
00455   for (llvm::Function::iterator bbit = function->begin(), 
00456          bbie = function->end(); bbit != bbie; ++bbit) {
00457     for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end();
00458          it != ie; ++it)
00459       registerMap[it] = rnum++;
00460   }
00461   numRegisters = rnum;
00462   
00463   unsigned i = 0;
00464   for (llvm::Function::iterator bbit = function->begin(), 
00465          bbie = function->end(); bbit != bbie; ++bbit) {
00466     for (llvm::BasicBlock::iterator it = bbit->begin(), ie = bbit->end();
00467          it != ie; ++it) {
00468       KInstruction *ki;
00469 
00470       switch(it->getOpcode()) {
00471       case Instruction::GetElementPtr:
00472         ki = new KGEPInstruction(); break;
00473       default:
00474         ki = new KInstruction(); break;
00475       }
00476 
00477       unsigned numOperands = it->getNumOperands();
00478       ki->inst = it;      
00479       ki->operands = new int[numOperands];
00480       ki->dest = registerMap[it];
00481       for (unsigned j=0; j<numOperands; j++) {
00482         Value *v = it->getOperand(j);
00483         
00484         if (Instruction *inst = dyn_cast<Instruction>(v)) {
00485           ki->operands[j] = registerMap[inst];
00486         } else if (Argument *a = dyn_cast<Argument>(v)) {
00487           ki->operands[j] = a->getArgNo();
00488         } else if (isa<BasicBlock>(v) || isa<InlineAsm>(v)) {
00489           ki->operands[j] = -1;
00490         } else {
00491           assert(isa<Constant>(v));
00492           Constant *c = cast<Constant>(v);
00493           ki->operands[j] = -(km->getConstantID(c, ki) + 2);
00494         }
00495       }
00496 
00497       instructions[i++] = ki;
00498     }
00499   }
00500 }
00501 
00502 KFunction::~KFunction() {
00503   for (unsigned i=0; i<numInstructions; ++i)
00504     delete instructions[i];
00505   delete[] instructions;
00506 }

Generated on Fri Jun 5 03:31:32 2009 for klee by  doxygen 1.5.8