|
| 1 | +// This file is a part of Julia. License is MIT: https://julialang.org/license |
| 2 | + |
| 3 | +// Function multi-versioning |
| 4 | +#define DEBUG_TYPE "julia_mv" |
| 5 | +#undef DEBUG |
| 6 | + |
| 7 | +// LLVM pass to clone function for different archs |
| 8 | + |
| 9 | +#include "llvm-version.h" |
| 10 | +#include "support/dtypes.h" |
| 11 | + |
| 12 | +#include <llvm/Pass.h> |
| 13 | +#include <llvm/IR/Module.h> |
| 14 | +#include <llvm/IR/Function.h> |
| 15 | +#include <llvm/IR/Instructions.h> |
| 16 | +#include <llvm/IR/Constants.h> |
| 17 | +#include <llvm/IR/LLVMContext.h> |
| 18 | +#include <llvm/Analysis/LoopInfo.h> |
| 19 | +#if JL_LLVM_VERSION >= 30700 |
| 20 | +#include <llvm/IR/LegacyPassManager.h> |
| 21 | +#else |
| 22 | +#include <llvm/PassManager.h> |
| 23 | +#endif |
| 24 | +#include <llvm/IR/MDBuilder.h> |
| 25 | +#include <llvm/IR/IRBuilder.h> |
| 26 | +#include <llvm/Transforms/Utils/Cloning.h> |
| 27 | +#include "fix_llvm_assert.h" |
| 28 | + |
| 29 | +#include "julia.h" |
| 30 | +#include "julia_internal.h" |
| 31 | + |
| 32 | +#include <unordered_map> |
| 33 | +#include <vector> |
| 34 | + |
| 35 | +using namespace llvm; |
| 36 | + |
| 37 | +extern std::pair<MDNode*,MDNode*> tbaa_make_child(const char *name, MDNode *parent=nullptr, bool isConstant=false); |
| 38 | +extern "C" void jl_dump_llvm_value(void *v); |
| 39 | + |
| 40 | +namespace { |
| 41 | + |
| 42 | +struct JuliaMV: public ModulePass { |
| 43 | + static char ID; |
| 44 | + JuliaMV() |
| 45 | + : ModulePass(ID) |
| 46 | + {} |
| 47 | + |
| 48 | +private: |
| 49 | + bool runOnModule(Module &M) override; |
| 50 | + void getAnalysisUsage(AnalysisUsage &AU) const override |
| 51 | + { |
| 52 | + AU.addRequired<LoopInfoWrapperPass>(); |
| 53 | + AU.setPreservesAll(); |
| 54 | + } |
| 55 | + bool shouldClone(Function &F); |
| 56 | + bool checkUses(Function &F, Constant *fary); |
| 57 | + bool checkUses(Function &F, Constant *V, Constant *fary, bool &inFVars); |
| 58 | + bool checkConstantUse(Function &F, Constant *V, Constant *fary, bool &inFVars); |
| 59 | +}; |
| 60 | + |
| 61 | +bool JuliaMV::shouldClone(Function &F) |
| 62 | +{ |
| 63 | + if (F.empty()) |
| 64 | + return false; |
| 65 | + auto &LI = getAnalysis<LoopInfoWrapperPass>(F).getLoopInfo(); |
| 66 | + if (!LI.empty()) |
| 67 | + return true; |
| 68 | + for (auto &bb: F) { |
| 69 | + for (auto &I: bb) { |
| 70 | + if (auto call = dyn_cast<CallInst>(&I)) { |
| 71 | + if (auto callee = call->getCalledFunction()) { |
| 72 | + auto name = callee->getName(); |
| 73 | + if (name.startswith("llvm.muladd.") || name.startswith("llvm.fma.")) { |
| 74 | + return true; |
| 75 | + } |
| 76 | + } |
| 77 | + } |
| 78 | + } |
| 79 | + } |
| 80 | + return false; |
| 81 | +} |
| 82 | + |
| 83 | +bool JuliaMV::checkUses(Function &F, Constant *fary) |
| 84 | +{ |
| 85 | + bool inFVars = false; |
| 86 | + bool res = checkUses(F, &F, fary, inFVars); |
| 87 | + return res && inFVars; |
| 88 | +} |
| 89 | + |
| 90 | +bool JuliaMV::checkConstantUse(Function &F, Constant *V, Constant *fary, bool &inFVars) |
| 91 | +{ |
| 92 | + if (V == fary) { |
| 93 | + inFVars = true; |
| 94 | + return true; |
| 95 | + } |
| 96 | + if (auto cexpr = dyn_cast<ConstantExpr>(V)) { |
| 97 | + if (cexpr->getOpcode() == Instruction::BitCast) { |
| 98 | + return checkUses(F, V, fary, inFVars); |
| 99 | + } |
| 100 | + } |
| 101 | + return false; |
| 102 | +} |
| 103 | + |
| 104 | +bool JuliaMV::checkUses(Function &F, Constant *V, Constant *fary, bool &inFVars) |
| 105 | +{ |
| 106 | + for (auto *user: V->users()) { |
| 107 | + if (isa<Instruction>(user)) |
| 108 | + continue; |
| 109 | + auto *C = dyn_cast<Constant>(user); |
| 110 | + if (!C || !checkConstantUse(F, C, fary, inFVars)) { |
| 111 | + return false; |
| 112 | + } |
| 113 | + } |
| 114 | + return true; |
| 115 | +} |
| 116 | + |
| 117 | +static Function *getFunction(Value *v) |
| 118 | +{ |
| 119 | + if (auto f = dyn_cast<Function>(v)) |
| 120 | + return f; |
| 121 | + if (auto c = dyn_cast<ConstantExpr>(v)) { |
| 122 | + if (c->getOpcode() == Instruction::BitCast) { |
| 123 | + return getFunction(c->getOperand(0)); |
| 124 | + } |
| 125 | + } |
| 126 | + return nullptr; |
| 127 | +} |
| 128 | + |
| 129 | +static void addFeatures(Function *F) |
| 130 | +{ |
| 131 | + auto attr = F->getFnAttribute("target-features"); |
| 132 | + std::string feature = |
| 133 | + "+avx2,+avx,+fma,+popcnt,+sse,+sse2,+sse3,+sse4.1,+sse4.2,+ssse3"; |
| 134 | + if (attr.isStringAttribute()) { |
| 135 | + feature += ","; |
| 136 | + feature += attr.getValueAsString(); |
| 137 | + } |
| 138 | + F->addFnAttr("target-features", feature); |
| 139 | +} |
| 140 | + |
| 141 | +bool JuliaMV::runOnModule(Module &M) |
| 142 | +{ |
| 143 | + MDNode *tbaa_const = tbaa_make_child("jtbaa_const", nullptr, true).first; |
| 144 | + GlobalVariable *fvars = M.getGlobalVariable("jl_sysimg_fvars"); |
| 145 | + // This makes sure this only runs during sysimg generation |
| 146 | + if (!fvars || !fvars->hasInitializer()) |
| 147 | + return true; |
| 148 | + auto *fary = dyn_cast<ConstantArray>(fvars->getInitializer()); |
| 149 | + if (!fary) |
| 150 | + return true; |
| 151 | + LLVMContext &ctx = M.getContext(); |
| 152 | + ValueToValueMapTy VMap; |
| 153 | + for (auto &F: M) { |
| 154 | + if (shouldClone(F) && checkUses(F, fary)) { |
| 155 | + Function *NF = Function::Create(cast<FunctionType>(F.getValueType()), |
| 156 | + F.getLinkage(), F.getName() + ".avx2", &M); |
| 157 | + NF->copyAttributesFrom(&F); |
| 158 | + VMap[&F] = NF; |
| 159 | + } |
| 160 | + } |
| 161 | + std::unordered_map<Function*,size_t> idx_map; |
| 162 | + size_t nf = fary->getNumOperands(); |
| 163 | + for (size_t i = 0; i < nf; i++) { |
| 164 | + if (Function *ele = getFunction(fary->getOperand(i))) { |
| 165 | + auto it = VMap.find(ele); |
| 166 | + if (it != VMap.end()) { |
| 167 | + idx_map[ele] = i; |
| 168 | + } |
| 169 | + } |
| 170 | + } |
| 171 | + for (auto I: idx_map) { |
| 172 | + auto oldF = I.first; |
| 173 | + auto newF = cast<Function>(VMap[oldF]); |
| 174 | + Function::arg_iterator DestI = newF->arg_begin(); |
| 175 | + for (Function::const_arg_iterator J = oldF->arg_begin(); J != oldF->arg_end(); ++J) { |
| 176 | + DestI->setName(J->getName()); |
| 177 | + VMap[&*J] = &*DestI++; |
| 178 | + } |
| 179 | + SmallVector<ReturnInst*,8> Returns; |
| 180 | + CloneFunctionInto(newF, oldF, VMap, false, Returns); |
| 181 | + addFeatures(newF); |
| 182 | + } |
| 183 | + std::vector<Constant*> ptrs; |
| 184 | + std::vector<Constant*> idxs; |
| 185 | + auto T_void = Type::getVoidTy(ctx); |
| 186 | + auto T_pvoidfunc = FunctionType::get(T_void, false)->getPointerTo(); |
| 187 | + auto T_size = (sizeof(size_t) == 8 ? Type::getInt64Ty(ctx) : Type::getInt32Ty(ctx)); |
| 188 | + for (auto I: idx_map) { |
| 189 | + auto oldF = I.first; |
| 190 | + auto idx = I.second; |
| 191 | + auto newF = cast<Function>(VMap[oldF]); |
| 192 | + ptrs.push_back(ConstantExpr::getBitCast(newF, T_pvoidfunc)); |
| 193 | + auto offset = ConstantInt::get(T_size, idx); |
| 194 | + idxs.push_back(offset); |
| 195 | + for (auto user: oldF->users()) { |
| 196 | + auto inst = dyn_cast<Instruction>(user); |
| 197 | + if (!inst) |
| 198 | + continue; |
| 199 | + auto encloseF = inst->getParent()->getParent(); |
| 200 | + if (VMap.find(encloseF) != VMap.end()) |
| 201 | + continue; |
| 202 | + auto slot = GetElementPtrInst::Create(T_pvoidfunc->getPointerTo(), fvars, |
| 203 | + {offset}, "", inst); |
| 204 | + Instruction *ptr = new LoadInst(slot, "", inst); |
| 205 | + ptr->setMetadata(llvm::LLVMContext::MD_tbaa, tbaa_const); |
| 206 | + ptr = new BitCastInst(ptr, oldF->getType(), "", inst); |
| 207 | + inst->replaceUsesOfWith(oldF, ptr); |
| 208 | + } |
| 209 | + } |
| 210 | + ArrayType *fvars_type = ArrayType::get(T_pvoidfunc, ptrs.size()); |
| 211 | + auto ptr_gv = new GlobalVariable(M, fvars_type, true, GlobalVariable::InternalLinkage, |
| 212 | + ConstantArray::get(fvars_type, ptrs)); |
| 213 | + ArrayType *idxs_type = ArrayType::get(T_size, idxs.size()); |
| 214 | + auto idx_gv = new GlobalVariable(M, idxs_type, true, GlobalVariable::InternalLinkage, |
| 215 | + ConstantArray::get(idxs_type, idxs)); |
| 216 | + |
| 217 | + std::vector<Type*> dispatch_args(0); |
| 218 | + dispatch_args.push_back(Type::getInt64Ty(ctx)); // Feature mask |
| 219 | + dispatch_args.push_back(Type::getInt64Ty(ctx)); // Extended feature mask1 |
| 220 | + dispatch_args.push_back(Type::getInt64Ty(ctx)); // Extended feature mask2 |
| 221 | + dispatch_args.push_back(T_size->getPointerTo()); |
| 222 | + dispatch_args.push_back(fvars_type->getPointerTo()->getPointerTo()); |
| 223 | + dispatch_args.push_back(idxs_type->getPointerTo()->getPointerTo()); |
| 224 | + Function *dispatchF = Function::Create(FunctionType::get(T_void, dispatch_args, false), |
| 225 | + Function::ExternalLinkage, |
| 226 | + "jl_dispatch_sysimg_fvars", &M); |
| 227 | + IRBuilder<> builder(ctx); |
| 228 | + BasicBlock *b0 = BasicBlock::Create(ctx, "top", dispatchF); |
| 229 | + builder.SetInsertPoint(b0); |
| 230 | + DebugLoc noDbg; |
| 231 | + builder.SetCurrentDebugLocation(noDbg); |
| 232 | + |
| 233 | + std::vector<Argument*> args; |
| 234 | + for (auto &arg: dispatchF->args()) |
| 235 | + args.push_back(&arg); |
| 236 | + |
| 237 | + auto sz_arg = args[3]; |
| 238 | + auto fvars_arg = args[4]; |
| 239 | + auto idxs_arg = args[5]; |
| 240 | + |
| 241 | + // Hard code for now |
| 242 | + // EDX:ECX |
| 243 | + uint64_t mask = 1 | (1 << 9) | (1 << 12) | (1 << 19) | (1 << 20) | (1 << 23) | (1 << 28); |
| 244 | + // EBX:ECX |
| 245 | + uint64_t emask1 = uint64_t(1) << (5 + 32); |
| 246 | + // EDX:0 |
| 247 | + uint64_t emask2 = 0; |
| 248 | + |
| 249 | + builder.CreateStore(ConstantInt::get(T_size, ptrs.size()), sz_arg); |
| 250 | + |
| 251 | + auto createMaskCmp = [&] (Value *v, uint64_t mask) { |
| 252 | + auto maskv = ConstantInt::get(v->getType(), mask); |
| 253 | + return builder.CreateICmpEQ(builder.CreateAnd(v, maskv), maskv); |
| 254 | + }; |
| 255 | + |
| 256 | + auto match_mask = createMaskCmp(args[0], mask); |
| 257 | + auto match_emask1 = createMaskCmp(args[1], emask1); |
| 258 | + auto match_emask2 = createMaskCmp(args[2], emask2); |
| 259 | + |
| 260 | + auto match = builder.CreateAnd(match_mask, match_emask1); |
| 261 | + match = builder.CreateAnd(match, match_emask2); |
| 262 | + |
| 263 | + BasicBlock *match_bb = BasicBlock::Create(ctx, "match"); |
| 264 | + BasicBlock *fail_bb = BasicBlock::Create(ctx, "fail"); |
| 265 | + builder.CreateCondBr(match, match_bb, fail_bb); |
| 266 | + |
| 267 | + dispatchF->getBasicBlockList().push_back(match_bb); |
| 268 | + builder.SetInsertPoint(match_bb); |
| 269 | + builder.CreateStore(ptr_gv, fvars_arg); |
| 270 | + builder.CreateStore(idx_gv, idxs_arg); |
| 271 | + builder.CreateRetVoid(); |
| 272 | + |
| 273 | + dispatchF->getBasicBlockList().push_back(fail_bb); |
| 274 | + builder.SetInsertPoint(fail_bb); |
| 275 | + builder.CreateStore(ConstantPointerNull::get(fvars_type->getPointerTo()), fvars_arg); |
| 276 | + builder.CreateStore(ConstantPointerNull::get(idxs_type->getPointerTo()), idxs_arg); |
| 277 | + builder.CreateRetVoid(); |
| 278 | + |
| 279 | + // jl_dump_llvm_value(dispatchF); |
| 280 | + // jl_dump_llvm_value(ptr_gv); |
| 281 | + // jl_dump_llvm_value(idx_gv); |
| 282 | + |
| 283 | + return true; |
| 284 | +} |
| 285 | + |
| 286 | +char JuliaMV::ID = 0; |
| 287 | +static RegisterPass<JuliaMV> X("JuliaMV", "JuliaMV Pass", |
| 288 | + false /* Only looks at CFG */, |
| 289 | + false /* Analysis Pass */); |
| 290 | + |
| 291 | +} |
| 292 | + |
| 293 | +Pass *createJuliaMVPass() |
| 294 | +{ |
| 295 | + return new JuliaMV(); |
| 296 | +} |
0 commit comments