LLVM 22.0.0git
X86SpeculativeLoadHardening.cpp
Go to the documentation of this file.
1//====- X86SpeculativeLoadHardening.cpp - A Spectre v1 mitigation ---------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8/// \file
9///
10/// Provide a pass which mitigates speculative execution attacks which operate
11/// by speculating incorrectly past some predicate (a type check, bounds check,
12/// or other condition) to reach a load with invalid inputs and leak the data
13/// accessed by that load using a side channel out of the speculative domain.
14///
15/// For details on the attacks, see the first variant in both the Project Zero
16/// writeup and the Spectre paper:
17/// https://googleprojectzero.blogspot.com/2018/01/reading-privileged-memory-with-side.html
18/// https://spectreattack.com/spectre.pdf
19///
20//===----------------------------------------------------------------------===//
21
22#include "X86.h"
23#include "X86InstrInfo.h"
24#include "X86Subtarget.h"
25#include "llvm/ADT/ArrayRef.h"
26#include "llvm/ADT/DenseMap.h"
27#include "llvm/ADT/STLExtras.h"
29#include "llvm/ADT/SmallSet.h"
32#include "llvm/ADT/Statistic.h"
47#include "llvm/IR/DebugLoc.h"
48#include "llvm/MC/MCSchedule.h"
49#include "llvm/Pass.h"
51#include "llvm/Support/Debug.h"
54#include <cassert>
55#include <iterator>
56#include <optional>
57#include <utility>
58
59using namespace llvm;
60
61#define PASS_KEY "x86-slh"
62#define DEBUG_TYPE PASS_KEY
63
64STATISTIC(NumCondBranchesTraced, "Number of conditional branches traced");
65STATISTIC(NumBranchesUntraced, "Number of branches unable to trace");
66STATISTIC(NumAddrRegsHardened,
67 "Number of address mode used registers hardaned");
68STATISTIC(NumPostLoadRegsHardened,
69 "Number of post-load register values hardened");
70STATISTIC(NumCallsOrJumpsHardened,
71 "Number of calls or jumps requiring extra hardening");
72STATISTIC(NumInstsInserted, "Number of instructions inserted");
73STATISTIC(NumLFENCEsInserted, "Number of lfence instructions inserted");
74
76 "x86-speculative-load-hardening",
77 cl::desc("Force enable speculative load hardening"), cl::init(false),
79
81 PASS_KEY "-lfence",
83 "Use LFENCE along each conditional edge to harden against speculative "
84 "loads rather than conditional movs and poisoned pointers."),
85 cl::init(false), cl::Hidden);
86
88 PASS_KEY "-post-load",
89 cl::desc("Harden the value loaded *after* it is loaded by "
90 "flushing the loaded bits to 1. This is hard to do "
91 "in general but can be done easily for GPRs."),
92 cl::init(true), cl::Hidden);
93
95 PASS_KEY "-fence-call-and-ret",
96 cl::desc("Use a full speculation fence to harden both call and ret edges "
97 "rather than a lighter weight mitigation."),
98 cl::init(false), cl::Hidden);
99
101 PASS_KEY "-ip",
102 cl::desc("Harden interprocedurally by passing our state in and out of "
103 "functions in the high bits of the stack pointer."),
104 cl::init(true), cl::Hidden);
105
106static cl::opt<bool>
108 cl::desc("Sanitize loads from memory. When disable, no "
109 "significant security is provided."),
110 cl::init(true), cl::Hidden);
111
113 PASS_KEY "-indirect",
114 cl::desc("Harden indirect calls and jumps against using speculatively "
115 "stored attacker controlled addresses. This is designed to "
116 "mitigate Spectre v1.2 style attacks."),
117 cl::init(true), cl::Hidden);
118
119namespace {
120
121class X86SpeculativeLoadHardeningPass : public MachineFunctionPass {
122public:
123 X86SpeculativeLoadHardeningPass() : MachineFunctionPass(ID) { }
124
125 StringRef getPassName() const override {
126 return "X86 speculative load hardening";
127 }
128 bool runOnMachineFunction(MachineFunction &MF) override;
129 void getAnalysisUsage(AnalysisUsage &AU) const override;
130
131 /// Pass identification, replacement for typeid.
132 static char ID;
133
134private:
135 /// The information about a block's conditional terminators needed to trace
136 /// our predicate state through the exiting edges.
137 struct BlockCondInfo {
139
140 // We mostly have one conditional branch, and in extremely rare cases have
141 // two. Three and more are so rare as to be unimportant for compile time.
143
144 MachineInstr *UncondBr;
145 };
146
147 /// Manages the predicate state traced through the program.
148 struct PredState {
149 Register InitialReg;
150 Register PoisonReg;
151
152 const TargetRegisterClass *RC;
154
155 PredState(MachineFunction &MF, const TargetRegisterClass *RC)
156 : RC(RC), SSA(MF) {}
157 };
158
159 const X86Subtarget *Subtarget = nullptr;
160 MachineRegisterInfo *MRI = nullptr;
161 const X86InstrInfo *TII = nullptr;
162 const TargetRegisterInfo *TRI = nullptr;
163
164 std::optional<PredState> PS;
165
166 void hardenEdgesWithLFENCE(MachineFunction &MF);
167
168 SmallVector<BlockCondInfo, 16> collectBlockCondInfo(MachineFunction &MF);
169
171 tracePredStateThroughCFG(MachineFunction &MF, ArrayRef<BlockCondInfo> Infos);
172
173 void unfoldCallAndJumpLoads(MachineFunction &MF);
174
176 tracePredStateThroughIndirectBranches(MachineFunction &MF);
177
178 void tracePredStateThroughBlocksAndHarden(MachineFunction &MF);
179
180 Register saveEFLAGS(MachineBasicBlock &MBB,
182 const DebugLoc &Loc);
183 void restoreEFLAGS(MachineBasicBlock &MBB,
184 MachineBasicBlock::iterator InsertPt, const DebugLoc &Loc,
185 Register Reg);
186
187 void mergePredStateIntoSP(MachineBasicBlock &MBB,
189 const DebugLoc &Loc, Register PredStateReg);
190 Register extractPredStateFromSP(MachineBasicBlock &MBB,
192 const DebugLoc &Loc);
193
194 void
195 hardenLoadAddr(MachineInstr &MI, MachineOperand &BaseMO,
196 MachineOperand &IndexMO,
197 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg);
199 sinkPostLoadHardenedInst(MachineInstr &MI,
200 SmallPtrSetImpl<MachineInstr *> &HardenedInstrs);
201 bool canHardenRegister(Register Reg);
202 Register hardenValueInRegister(Register Reg, MachineBasicBlock &MBB,
204 const DebugLoc &Loc);
205 Register hardenPostLoad(MachineInstr &MI);
206 void hardenReturnInstr(MachineInstr &MI);
207 void tracePredStateThroughCall(MachineInstr &MI);
208 void hardenIndirectCallOrJumpInstr(
210 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg);
211};
212
213} // end anonymous namespace
214
215char X86SpeculativeLoadHardeningPass::ID = 0;
216
217void X86SpeculativeLoadHardeningPass::getAnalysisUsage(
218 AnalysisUsage &AU) const {
220}
221
223 MachineBasicBlock &Succ, int SuccCount,
224 MachineInstr *Br, MachineInstr *&UncondBr,
225 const X86InstrInfo &TII) {
226 assert(!Succ.isEHPad() && "Shouldn't get edges to EH pads!");
227
229
231
232 // We have to insert the new block immediately after the current one as we
233 // don't know what layout-successor relationships the successor has and we
234 // may not be able to (and generally don't want to) try to fix those up.
235 MF.insert(std::next(MachineFunction::iterator(&MBB)), &NewMBB);
236
237 // Update the branch instruction if necessary.
238 if (Br) {
239 assert(Br->getOperand(0).getMBB() == &Succ &&
240 "Didn't start with the right target!");
241 Br->getOperand(0).setMBB(&NewMBB);
242
243 // If this successor was reached through a branch rather than fallthrough,
244 // we might have *broken* fallthrough and so need to inject a new
245 // unconditional branch.
246 if (!UncondBr) {
247 MachineBasicBlock &OldLayoutSucc =
248 *std::next(MachineFunction::iterator(&NewMBB));
249 assert(MBB.isSuccessor(&OldLayoutSucc) &&
250 "Without an unconditional branch, the old layout successor should "
251 "be an actual successor!");
252 auto BrBuilder =
253 BuildMI(&MBB, DebugLoc(), TII.get(X86::JMP_1)).addMBB(&OldLayoutSucc);
254 // Update the unconditional branch now that we've added one.
255 UncondBr = &*BrBuilder;
256 }
257
258 // Insert unconditional "jump Succ" instruction in the new block if
259 // necessary.
260 if (!NewMBB.isLayoutSuccessor(&Succ)) {
262 TII.insertBranch(NewMBB, &Succ, nullptr, Cond, Br->getDebugLoc());
263 }
264 } else {
265 assert(!UncondBr &&
266 "Cannot have a branchless successor and an unconditional branch!");
267 assert(NewMBB.isLayoutSuccessor(&Succ) &&
268 "A non-branch successor must have been a layout successor before "
269 "and now is a layout successor of the new block.");
270 }
271
272 // If this is the only edge to the successor, we can just replace it in the
273 // CFG. Otherwise we need to add a new entry in the CFG for the new
274 // successor.
275 if (SuccCount == 1) {
276 MBB.replaceSuccessor(&Succ, &NewMBB);
277 } else {
278 MBB.splitSuccessor(&Succ, &NewMBB);
279 }
280
281 // Hook up the edge from the new basic block to the old successor in the CFG.
282 NewMBB.addSuccessor(&Succ);
283
284 // Fix PHI nodes in Succ so they refer to NewMBB instead of MBB.
285 for (MachineInstr &MI : Succ) {
286 if (!MI.isPHI())
287 break;
288 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
289 OpIdx += 2) {
290 MachineOperand &OpV = MI.getOperand(OpIdx);
291 MachineOperand &OpMBB = MI.getOperand(OpIdx + 1);
292 assert(OpMBB.isMBB() && "Block operand to a PHI is not a block!");
293 if (OpMBB.getMBB() != &MBB)
294 continue;
295
296 // If this is the last edge to the succesor, just replace MBB in the PHI
297 if (SuccCount == 1) {
298 OpMBB.setMBB(&NewMBB);
299 break;
300 }
301
302 // Otherwise, append a new pair of operands for the new incoming edge.
303 MI.addOperand(MF, OpV);
304 MI.addOperand(MF, MachineOperand::CreateMBB(&NewMBB));
305 break;
306 }
307 }
308
309 // Inherit live-ins from the successor
310 for (auto &LI : Succ.liveins())
311 NewMBB.addLiveIn(LI);
312
313 LLVM_DEBUG(dbgs() << " Split edge from '" << MBB.getName() << "' to '"
314 << Succ.getName() << "'.\n");
315 return NewMBB;
316}
317
318/// Removing duplicate PHI operands to leave the PHI in a canonical and
319/// predictable form.
320///
321/// FIXME: It's really frustrating that we have to do this, but SSA-form in MIR
322/// isn't what you might expect. We may have multiple entries in PHI nodes for
323/// a single predecessor. This makes CFG-updating extremely complex, so here we
324/// simplify all PHI nodes to a model even simpler than the IR's model: exactly
325/// one entry per predecessor, regardless of how many edges there are.
328 SmallVector<int, 4> DupIndices;
329 for (auto &MBB : MF)
330 for (auto &MI : MBB) {
331 if (!MI.isPHI())
332 break;
333
334 // First we scan the operands of the PHI looking for duplicate entries
335 // a particular predecessor. We retain the operand index of each duplicate
336 // entry found.
337 for (int OpIdx = 1, NumOps = MI.getNumOperands(); OpIdx < NumOps;
338 OpIdx += 2)
339 if (!Preds.insert(MI.getOperand(OpIdx + 1).getMBB()).second)
340 DupIndices.push_back(OpIdx);
341
342 // Now walk the duplicate indices, removing both the block and value. Note
343 // that these are stored as a vector making this element-wise removal
344 // :w
345 // potentially quadratic.
346 //
347 // FIXME: It is really frustrating that we have to use a quadratic
348 // removal algorithm here. There should be a better way, but the use-def
349 // updates required make that impossible using the public API.
350 //
351 // Note that we have to process these backwards so that we don't
352 // invalidate other indices with each removal.
353 while (!DupIndices.empty()) {
354 int OpIdx = DupIndices.pop_back_val();
355 // Remove both the block and value operand, again in reverse order to
356 // preserve indices.
357 MI.removeOperand(OpIdx + 1);
358 MI.removeOperand(OpIdx);
359 }
360
361 Preds.clear();
362 }
363}
364
365/// Helper to scan a function for loads vulnerable to misspeculation that we
366/// want to harden.
367///
368/// We use this to avoid making changes to functions where there is nothing we
369/// need to do to harden against misspeculation.
371 for (MachineBasicBlock &MBB : MF) {
372 for (MachineInstr &MI : MBB) {
373 // Loads within this basic block after an LFENCE are not at risk of
374 // speculatively executing with invalid predicates from prior control
375 // flow. So break out of this block but continue scanning the function.
376 if (MI.getOpcode() == X86::LFENCE)
377 break;
378
379 // Looking for loads only.
380 if (!MI.mayLoad())
381 continue;
382
383 // An MFENCE is modeled as a load but isn't vulnerable to misspeculation.
384 if (MI.getOpcode() == X86::MFENCE)
385 continue;
386
387 // We found a load.
388 return true;
389 }
390 }
391
392 // No loads found.
393 return false;
394}
395
396bool X86SpeculativeLoadHardeningPass::runOnMachineFunction(
397 MachineFunction &MF) {
398 LLVM_DEBUG(dbgs() << "********** " << getPassName() << " : " << MF.getName()
399 << " **********\n");
400
401 // Only run if this pass is forced enabled or we detect the relevant function
402 // attribute requesting SLH.
404 !MF.getFunction().hasFnAttribute(Attribute::SpeculativeLoadHardening))
405 return false;
406
407 Subtarget = &MF.getSubtarget<X86Subtarget>();
408 MRI = &MF.getRegInfo();
409 TII = Subtarget->getInstrInfo();
410 TRI = Subtarget->getRegisterInfo();
411
412 // FIXME: Support for 32-bit.
413 PS.emplace(MF, &X86::GR64_NOSPRegClass);
414
415 if (MF.begin() == MF.end())
416 // Nothing to do for a degenerate empty function...
417 return false;
418
419 // We support an alternative hardening technique based on a debug flag.
421 hardenEdgesWithLFENCE(MF);
422 return true;
423 }
424
425 // Create a dummy debug loc to use for all the generated code here.
426 DebugLoc Loc;
427
429 auto EntryInsertPt = Entry.SkipPHIsLabelsAndDebug(Entry.begin());
430
431 // Do a quick scan to see if we have any checkable loads.
432 bool HasVulnerableLoad = hasVulnerableLoad(MF);
433
434 // See if we have any conditional branching blocks that we will need to trace
435 // predicate state through.
436 SmallVector<BlockCondInfo, 16> Infos = collectBlockCondInfo(MF);
437
438 // If we have no interesting conditions or loads, nothing to do here.
439 if (!HasVulnerableLoad && Infos.empty())
440 return true;
441
442 // The poison value is required to be an all-ones value for many aspects of
443 // this mitigation.
444 const int PoisonVal = -1;
445 PS->PoisonReg = MRI->createVirtualRegister(PS->RC);
446 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV64ri32), PS->PoisonReg)
447 .addImm(PoisonVal);
448 ++NumInstsInserted;
449
450 // If we have loads being hardened and we've asked for call and ret edges to
451 // get a full fence-based mitigation, inject that fence.
452 if (HasVulnerableLoad && FenceCallAndRet) {
453 // We need to insert an LFENCE at the start of the function to suspend any
454 // incoming misspeculation from the caller. This helps two-fold: the caller
455 // may not have been protected as this code has been, and this code gets to
456 // not take any specific action to protect across calls.
457 // FIXME: We could skip this for functions which unconditionally return
458 // a constant.
459 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::LFENCE));
460 ++NumInstsInserted;
461 ++NumLFENCEsInserted;
462 }
463
464 // If we guarded the entry with an LFENCE and have no conditionals to protect
465 // in blocks, then we're done.
466 if (FenceCallAndRet && Infos.empty())
467 // We may have changed the function's code at this point to insert fences.
468 return true;
469
470 // For every basic block in the function which can b
472 // Set up the predicate state by extracting it from the incoming stack
473 // pointer so we pick up any misspeculation in our caller.
474 PS->InitialReg = extractPredStateFromSP(Entry, EntryInsertPt, Loc);
475 } else {
476 // Otherwise, just build the predicate state itself by zeroing a register
477 // as we don't need any initial state.
478 PS->InitialReg = MRI->createVirtualRegister(PS->RC);
479 Register PredStateSubReg = MRI->createVirtualRegister(&X86::GR32RegClass);
480 auto ZeroI = BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::MOV32r0),
481 PredStateSubReg);
482 ++NumInstsInserted;
483 MachineOperand *ZeroEFLAGSDefOp =
484 ZeroI->findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr);
485 assert(ZeroEFLAGSDefOp && ZeroEFLAGSDefOp->isImplicit() &&
486 "Must have an implicit def of EFLAGS!");
487 ZeroEFLAGSDefOp->setIsDead(true);
488 BuildMI(Entry, EntryInsertPt, Loc, TII->get(X86::SUBREG_TO_REG),
489 PS->InitialReg)
490 .addImm(0)
491 .addReg(PredStateSubReg)
492 .addImm(X86::sub_32bit);
493 }
494
495 // We're going to need to trace predicate state throughout the function's
496 // CFG. Prepare for this by setting up our initial state of PHIs with unique
497 // predecessor entries and all the initial predicate state.
499
500 // Track the updated values in an SSA updater to rewrite into SSA form at the
501 // end.
502 PS->SSA.Initialize(PS->InitialReg);
503 PS->SSA.AddAvailableValue(&Entry, PS->InitialReg);
504
505 // Trace through the CFG.
506 auto CMovs = tracePredStateThroughCFG(MF, Infos);
507
508 // We may also enter basic blocks in this function via exception handling
509 // control flow. Here, if we are hardening interprocedurally, we need to
510 // re-capture the predicate state from the throwing code. In the Itanium ABI,
511 // the throw will always look like a call to __cxa_throw and will have the
512 // predicate state in the stack pointer, so extract fresh predicate state from
513 // the stack pointer and make it available in SSA.
514 // FIXME: Handle non-itanium ABI EH models.
516 for (MachineBasicBlock &MBB : MF) {
517 assert(!MBB.isEHScopeEntry() && "Only Itanium ABI EH supported!");
518 assert(!MBB.isEHFuncletEntry() && "Only Itanium ABI EH supported!");
519 assert(!MBB.isCleanupFuncletEntry() && "Only Itanium ABI EH supported!");
520 if (!MBB.isEHPad())
521 continue;
522 PS->SSA.AddAvailableValue(
523 &MBB,
524 extractPredStateFromSP(MBB, MBB.SkipPHIsAndLabels(MBB.begin()), Loc));
525 }
526 }
527
529 // If we are going to harden calls and jumps we need to unfold their memory
530 // operands.
531 unfoldCallAndJumpLoads(MF);
532
533 // Then we trace predicate state through the indirect branches.
534 auto IndirectBrCMovs = tracePredStateThroughIndirectBranches(MF);
535 CMovs.append(IndirectBrCMovs.begin(), IndirectBrCMovs.end());
536 }
537
538 // Now that we have the predicate state available at the start of each block
539 // in the CFG, trace it through each block, hardening vulnerable instructions
540 // as we go.
541 tracePredStateThroughBlocksAndHarden(MF);
542
543 // Now rewrite all the uses of the pred state using the SSA updater to insert
544 // PHIs connecting the state between blocks along the CFG edges.
545 for (MachineInstr *CMovI : CMovs)
546 for (MachineOperand &Op : CMovI->operands()) {
547 if (!Op.isReg() || Op.getReg() != PS->InitialReg)
548 continue;
549
550 PS->SSA.RewriteUse(Op);
551 }
552
553 LLVM_DEBUG(dbgs() << "Final speculative load hardened function:\n"; MF.dump();
554 dbgs() << "\n"; MF.verify(this));
555 return true;
556}
557
558/// Implements the naive hardening approach of putting an LFENCE after every
559/// potentially mis-predicted control flow construct.
560///
561/// We include this as an alternative mostly for the purpose of comparison. The
562/// performance impact of this is expected to be extremely severe and not
563/// practical for any real-world users.
564void X86SpeculativeLoadHardeningPass::hardenEdgesWithLFENCE(
565 MachineFunction &MF) {
566 // First, we scan the function looking for blocks that are reached along edges
567 // that we might want to harden.
569 for (MachineBasicBlock &MBB : MF) {
570 // If there are no or only one successor, nothing to do here.
571 if (MBB.succ_size() <= 1)
572 continue;
573
574 // Skip blocks unless their terminators start with a branch. Other
575 // terminators don't seem interesting for guarding against misspeculation.
576 auto TermIt = MBB.getFirstTerminator();
577 if (TermIt == MBB.end() || !TermIt->isBranch())
578 continue;
579
580 // Add all the non-EH-pad succossors to the blocks we want to harden. We
581 // skip EH pads because there isn't really a condition of interest on
582 // entering.
583 for (MachineBasicBlock *SuccMBB : MBB.successors())
584 if (!SuccMBB->isEHPad())
585 Blocks.insert(SuccMBB);
586 }
587
588 for (MachineBasicBlock *MBB : Blocks) {
589 auto InsertPt = MBB->SkipPHIsAndLabels(MBB->begin());
590 BuildMI(*MBB, InsertPt, DebugLoc(), TII->get(X86::LFENCE));
591 ++NumInstsInserted;
592 ++NumLFENCEsInserted;
593 }
594}
595
597X86SpeculativeLoadHardeningPass::collectBlockCondInfo(MachineFunction &MF) {
599
600 // Walk the function and build up a summary for each block's conditions that
601 // we need to trace through.
602 for (MachineBasicBlock &MBB : MF) {
603 // If there are no or only one successor, nothing to do here.
604 if (MBB.succ_size() <= 1)
605 continue;
606
607 // We want to reliably handle any conditional branch terminators in the
608 // MBB, so we manually analyze the branch. We can handle all of the
609 // permutations here, including ones that analyze branch cannot.
610 //
611 // The approach is to walk backwards across the terminators, resetting at
612 // any unconditional non-indirect branch, and track all conditional edges
613 // to basic blocks as well as the fallthrough or unconditional successor
614 // edge. For each conditional edge, we track the target and the opposite
615 // condition code in order to inject a "no-op" cmov into that successor
616 // that will harden the predicate. For the fallthrough/unconditional
617 // edge, we inject a separate cmov for each conditional branch with
618 // matching condition codes. This effectively implements an "and" of the
619 // condition flags, even if there isn't a single condition flag that would
620 // directly implement that. We don't bother trying to optimize either of
621 // these cases because if such an optimization is possible, LLVM should
622 // have optimized the conditional *branches* in that way already to reduce
623 // instruction count. This late, we simply assume the minimal number of
624 // branch instructions is being emitted and use that to guide our cmov
625 // insertion.
626
627 BlockCondInfo Info = {&MBB, {}, nullptr};
628
629 // Now walk backwards through the terminators and build up successors they
630 // reach and the conditions.
631 for (MachineInstr &MI : llvm::reverse(MBB)) {
632 // Once we've handled all the terminators, we're done.
633 if (!MI.isTerminator())
634 break;
635
636 // If we see a non-branch terminator, we can't handle anything so bail.
637 if (!MI.isBranch()) {
638 Info.CondBrs.clear();
639 break;
640 }
641
642 // If we see an unconditional branch, reset our state, clear any
643 // fallthrough, and set this is the "else" successor.
644 if (MI.getOpcode() == X86::JMP_1) {
645 Info.CondBrs.clear();
646 Info.UncondBr = &MI;
647 continue;
648 }
649
650 // If we get an invalid condition, we have an indirect branch or some
651 // other unanalyzable "fallthrough" case. We model this as a nullptr for
652 // the destination so we can still guard any conditional successors.
653 // Consider code sequences like:
654 // ```
655 // jCC L1
656 // jmpq *%rax
657 // ```
658 // We still want to harden the edge to `L1`.
660 Info.CondBrs.clear();
661 Info.UncondBr = &MI;
662 continue;
663 }
664
665 // We have a vanilla conditional branch, add it to our list.
666 Info.CondBrs.push_back(&MI);
667 }
668 if (Info.CondBrs.empty()) {
669 ++NumBranchesUntraced;
670 LLVM_DEBUG(dbgs() << "WARNING: unable to secure successors of block:\n";
671 MBB.dump());
672 continue;
673 }
674
675 Infos.push_back(Info);
676 }
677
678 return Infos;
679}
680
681/// Trace the predicate state through the CFG, instrumenting each conditional
682/// branch such that misspeculation through an edge will poison the predicate
683/// state.
684///
685/// Returns the list of inserted CMov instructions so that they can have their
686/// uses of the predicate state rewritten into proper SSA form once it is
687/// complete.
689X86SpeculativeLoadHardeningPass::tracePredStateThroughCFG(
691 // Collect the inserted cmov instructions so we can rewrite their uses of the
692 // predicate state into SSA form.
694
695 // Now walk all of the basic blocks looking for ones that end in conditional
696 // jumps where we need to update this register along each edge.
697 for (const BlockCondInfo &Info : Infos) {
698 MachineBasicBlock &MBB = *Info.MBB;
699 const SmallVectorImpl<MachineInstr *> &CondBrs = Info.CondBrs;
700 MachineInstr *UncondBr = Info.UncondBr;
701
702 LLVM_DEBUG(dbgs() << "Tracing predicate through block: " << MBB.getName()
703 << "\n");
704 ++NumCondBranchesTraced;
705
706 // Compute the non-conditional successor as either the target of any
707 // unconditional branch or the layout successor.
708 MachineBasicBlock *UncondSucc =
709 UncondBr ? (UncondBr->getOpcode() == X86::JMP_1
710 ? UncondBr->getOperand(0).getMBB()
711 : nullptr)
712 : &*std::next(MachineFunction::iterator(&MBB));
713
714 // Count how many edges there are to any given successor.
716 if (UncondSucc)
717 ++SuccCounts[UncondSucc];
718 for (auto *CondBr : CondBrs)
719 ++SuccCounts[CondBr->getOperand(0).getMBB()];
720
721 // A lambda to insert cmov instructions into a block checking all of the
722 // condition codes in a sequence.
723 auto BuildCheckingBlockForSuccAndConds =
724 [&](MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount,
725 MachineInstr *Br, MachineInstr *&UncondBr,
727 // First, we split the edge to insert the checking block into a safe
728 // location.
729 auto &CheckingMBB =
730 (SuccCount == 1 && Succ.pred_size() == 1)
731 ? Succ
732 : splitEdge(MBB, Succ, SuccCount, Br, UncondBr, *TII);
733
734 bool LiveEFLAGS = Succ.isLiveIn(X86::EFLAGS);
735 if (!LiveEFLAGS)
736 CheckingMBB.addLiveIn(X86::EFLAGS);
737
738 // Now insert the cmovs to implement the checks.
739 auto InsertPt = CheckingMBB.begin();
740 assert((InsertPt == CheckingMBB.end() || !InsertPt->isPHI()) &&
741 "Should never have a PHI in the initial checking block as it "
742 "always has a single predecessor!");
743
744 // We will wire each cmov to each other, but need to start with the
745 // incoming pred state.
746 Register CurStateReg = PS->InitialReg;
747
748 for (X86::CondCode Cond : Conds) {
749 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
750 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
751
752 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
753 // Note that we intentionally use an empty debug location so that
754 // this picks up the preceding location.
755 auto CMovI = BuildMI(CheckingMBB, InsertPt, DebugLoc(),
756 TII->get(CMovOp), UpdatedStateReg)
757 .addReg(CurStateReg)
758 .addReg(PS->PoisonReg)
759 .addImm(Cond);
760 // If this is the last cmov and the EFLAGS weren't originally
761 // live-in, mark them as killed.
762 if (!LiveEFLAGS && Cond == Conds.back())
763 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
764 ->setIsKill(true);
765
766 ++NumInstsInserted;
767 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump();
768 dbgs() << "\n");
769
770 // The first one of the cmovs will be using the top level
771 // `PredStateReg` and need to get rewritten into SSA form.
772 if (CurStateReg == PS->InitialReg)
773 CMovs.push_back(&*CMovI);
774
775 // The next cmov should start from this one's def.
776 CurStateReg = UpdatedStateReg;
777 }
778
779 // And put the last one into the available values for SSA form of our
780 // predicate state.
781 PS->SSA.AddAvailableValue(&CheckingMBB, CurStateReg);
782 };
783
784 std::vector<X86::CondCode> UncondCodeSeq;
785 for (auto *CondBr : CondBrs) {
786 MachineBasicBlock &Succ = *CondBr->getOperand(0).getMBB();
787 int &SuccCount = SuccCounts[&Succ];
788
791 UncondCodeSeq.push_back(Cond);
792
793 BuildCheckingBlockForSuccAndConds(MBB, Succ, SuccCount, CondBr, UncondBr,
794 {InvCond});
795
796 // Decrement the successor count now that we've split one of the edges.
797 // We need to keep the count of edges to the successor accurate in order
798 // to know above when to *replace* the successor in the CFG vs. just
799 // adding the new successor.
800 --SuccCount;
801 }
802
803 // Since we may have split edges and changed the number of successors,
804 // normalize the probabilities. This avoids doing it each time we split an
805 // edge.
807
808 // Finally, we need to insert cmovs into the "fallthrough" edge. Here, we
809 // need to intersect the other condition codes. We can do this by just
810 // doing a cmov for each one.
811 if (!UncondSucc)
812 // If we have no fallthrough to protect (perhaps it is an indirect jump?)
813 // just skip this and continue.
814 continue;
815
816 assert(SuccCounts[UncondSucc] == 1 &&
817 "We should never have more than one edge to the unconditional "
818 "successor at this point because every other edge must have been "
819 "split above!");
820
821 // Sort and unique the codes to minimize them.
822 llvm::sort(UncondCodeSeq);
823 UncondCodeSeq.erase(llvm::unique(UncondCodeSeq), UncondCodeSeq.end());
824
825 // Build a checking version of the successor.
826 BuildCheckingBlockForSuccAndConds(MBB, *UncondSucc, /*SuccCount*/ 1,
827 UncondBr, UncondBr, UncondCodeSeq);
828 }
829
830 return CMovs;
831}
832
833/// Compute the register class for the unfolded load.
834///
835/// FIXME: This should probably live in X86InstrInfo, potentially by adding
836/// a way to unfold into a newly created vreg rather than requiring a register
837/// input.
838static const TargetRegisterClass *
840 unsigned Opcode) {
841 unsigned Index;
842 unsigned UnfoldedOpc = TII.getOpcodeAfterMemoryUnfold(
843 Opcode, /*UnfoldLoad*/ true, /*UnfoldStore*/ false, &Index);
844 const MCInstrDesc &MCID = TII.get(UnfoldedOpc);
845 return TII.getRegClass(MCID, Index, &TII.getRegisterInfo(), MF);
846}
847
848void X86SpeculativeLoadHardeningPass::unfoldCallAndJumpLoads(
849 MachineFunction &MF) {
850 for (MachineBasicBlock &MBB : MF)
851 // We use make_early_inc_range here so we can remove instructions if needed
852 // without disturbing the iteration.
854 // Must either be a call or a branch.
855 if (!MI.isCall() && !MI.isBranch())
856 continue;
857 // We only care about loading variants of these instructions.
858 if (!MI.mayLoad())
859 continue;
860
861 switch (MI.getOpcode()) {
862 default: {
864 dbgs() << "ERROR: Found an unexpected loading branch or call "
865 "instruction:\n";
866 MI.dump(); dbgs() << "\n");
867 report_fatal_error("Unexpected loading branch or call!");
868 }
869
870 case X86::FARCALL16m:
871 case X86::FARCALL32m:
872 case X86::FARCALL64m:
873 case X86::FARJMP16m:
874 case X86::FARJMP32m:
875 case X86::FARJMP64m:
876 // We cannot mitigate far jumps or calls, but we also don't expect them
877 // to be vulnerable to Spectre v1.2 style attacks.
878 continue;
879
880 case X86::CALL16m:
881 case X86::CALL16m_NT:
882 case X86::CALL32m:
883 case X86::CALL32m_NT:
884 case X86::CALL64m:
885 case X86::CALL64m_NT:
886 case X86::JMP16m:
887 case X86::JMP16m_NT:
888 case X86::JMP32m:
889 case X86::JMP32m_NT:
890 case X86::JMP64m:
891 case X86::JMP64m_NT:
892 case X86::TAILJMPm64:
893 case X86::TAILJMPm64_REX:
894 case X86::TAILJMPm:
895 case X86::TCRETURNmi64:
896 case X86::TCRETURNmi: {
897 // Use the generic unfold logic now that we know we're dealing with
898 // expected instructions.
899 // FIXME: We don't have test coverage for all of these!
900 auto *UnfoldedRC = getRegClassForUnfoldedLoad(MF, *TII, MI.getOpcode());
901 if (!UnfoldedRC) {
903 << "ERROR: Unable to unfold load from instruction:\n";
904 MI.dump(); dbgs() << "\n");
905 report_fatal_error("Unable to unfold load!");
906 }
907 Register Reg = MRI->createVirtualRegister(UnfoldedRC);
909 // If we were able to compute an unfolded reg class, any failure here
910 // is just a programming error so just assert.
911 bool Unfolded =
912 TII->unfoldMemoryOperand(MF, MI, Reg, /*UnfoldLoad*/ true,
913 /*UnfoldStore*/ false, NewMIs);
914 (void)Unfolded;
915 assert(Unfolded &&
916 "Computed unfolded register class but failed to unfold");
917 // Now stitch the new instructions into place and erase the old one.
918 for (auto *NewMI : NewMIs)
919 MBB.insert(MI.getIterator(), NewMI);
920
921 // Update the call info.
922 if (MI.isCandidateForAdditionalCallInfo())
923 MF.eraseAdditionalCallInfo(&MI);
924
925 MI.eraseFromParent();
926 LLVM_DEBUG({
927 dbgs() << "Unfolded load successfully into:\n";
928 for (auto *NewMI : NewMIs) {
929 NewMI->dump();
930 dbgs() << "\n";
931 }
932 });
933 continue;
934 }
935 }
936 llvm_unreachable("Escaped switch with default!");
937 }
938}
939
940/// Trace the predicate state through indirect branches, instrumenting them to
941/// poison the state if a target is reached that does not match the expected
942/// target.
943///
944/// This is designed to mitigate Spectre variant 1 attacks where an indirect
945/// branch is trained to predict a particular target and then mispredicts that
946/// target in a way that can leak data. Despite using an indirect branch, this
947/// is really a variant 1 style attack: it does not steer execution to an
948/// arbitrary or attacker controlled address, and it does not require any
949/// special code executing next to the victim. This attack can also be mitigated
950/// through retpolines, but those require either replacing indirect branches
951/// with conditional direct branches or lowering them through a device that
952/// blocks speculation. This mitigation can replace these retpoline-style
953/// mitigations for jump tables and other indirect branches within a function
954/// when variant 2 isn't a risk while allowing limited speculation. Indirect
955/// calls, however, cannot be mitigated through this technique without changing
956/// the ABI in a fundamental way.
958X86SpeculativeLoadHardeningPass::tracePredStateThroughIndirectBranches(
959 MachineFunction &MF) {
960 // We use the SSAUpdater to insert PHI nodes for the target addresses of
961 // indirect branches. We don't actually need the full power of the SSA updater
962 // in this particular case as we always have immediately available values, but
963 // this avoids us having to re-implement the PHI construction logic.
964 MachineSSAUpdater TargetAddrSSA(MF);
965 TargetAddrSSA.Initialize(MRI->createVirtualRegister(&X86::GR64RegClass));
966
967 // Track which blocks were terminated with an indirect branch.
968 SmallPtrSet<MachineBasicBlock *, 4> IndirectTerminatedMBBs;
969
970 // We need to know what blocks end up reached via indirect branches. We
971 // expect this to be a subset of those whose address is taken and so track it
972 // directly via the CFG.
973 SmallPtrSet<MachineBasicBlock *, 4> IndirectTargetMBBs;
974
975 // Walk all the blocks which end in an indirect branch and make the
976 // target address available.
977 for (MachineBasicBlock &MBB : MF) {
978 // Find the last terminator.
979 auto MII = MBB.instr_rbegin();
980 while (MII != MBB.instr_rend() && MII->isDebugInstr())
981 ++MII;
982 if (MII == MBB.instr_rend())
983 continue;
984 MachineInstr &TI = *MII;
985 if (!TI.isTerminator() || !TI.isBranch())
986 // No terminator or non-branch terminator.
987 continue;
988
989 Register TargetReg;
990
991 switch (TI.getOpcode()) {
992 default:
993 // Direct branch or conditional branch (leading to fallthrough).
994 continue;
995
996 case X86::FARJMP16m:
997 case X86::FARJMP32m:
998 case X86::FARJMP64m:
999 // We cannot mitigate far jumps or calls, but we also don't expect them
1000 // to be vulnerable to Spectre v1.2 or v2 (self trained) style attacks.
1001 continue;
1002
1003 case X86::JMP16m:
1004 case X86::JMP16m_NT:
1005 case X86::JMP32m:
1006 case X86::JMP32m_NT:
1007 case X86::JMP64m:
1008 case X86::JMP64m_NT:
1009 // Mostly as documentation.
1010 report_fatal_error("Memory operand jumps should have been unfolded!");
1011
1012 case X86::JMP16r:
1014 "Support for 16-bit indirect branches is not implemented.");
1015 case X86::JMP32r:
1017 "Support for 32-bit indirect branches is not implemented.");
1018
1019 case X86::JMP64r:
1020 TargetReg = TI.getOperand(0).getReg();
1021 }
1022
1023 // We have definitely found an indirect branch. Verify that there are no
1024 // preceding conditional branches as we don't yet support that.
1025 if (llvm::any_of(MBB.terminators(), [&](MachineInstr &OtherTI) {
1026 return !OtherTI.isDebugInstr() && &OtherTI != &TI;
1027 })) {
1028 LLVM_DEBUG({
1029 dbgs() << "ERROR: Found other terminators in a block with an indirect "
1030 "branch! This is not yet supported! Terminator sequence:\n";
1031 for (MachineInstr &MI : MBB.terminators()) {
1032 MI.dump();
1033 dbgs() << '\n';
1034 }
1035 });
1036 report_fatal_error("Unimplemented terminator sequence!");
1037 }
1038
1039 // Make the target register an available value for this block.
1040 TargetAddrSSA.AddAvailableValue(&MBB, TargetReg);
1041 IndirectTerminatedMBBs.insert(&MBB);
1042
1043 // Add all the successors to our target candidates.
1044 IndirectTargetMBBs.insert_range(MBB.successors());
1045 }
1046
1047 // Keep track of the cmov instructions we insert so we can return them.
1049
1050 // If we didn't find any indirect branches with targets, nothing to do here.
1051 if (IndirectTargetMBBs.empty())
1052 return CMovs;
1053
1054 // We found indirect branches and targets that need to be instrumented to
1055 // harden loads within them. Walk the blocks of the function (to get a stable
1056 // ordering) and instrument each target of an indirect branch.
1057 for (MachineBasicBlock &MBB : MF) {
1058 // Skip the blocks that aren't candidate targets.
1059 if (!IndirectTargetMBBs.count(&MBB))
1060 continue;
1061
1062 // We don't expect EH pads to ever be reached via an indirect branch. If
1063 // this is desired for some reason, we could simply skip them here rather
1064 // than asserting.
1065 assert(!MBB.isEHPad() &&
1066 "Unexpected EH pad as target of an indirect branch!");
1067
1068 // We should never end up threading EFLAGS into a block to harden
1069 // conditional jumps as there would be an additional successor via the
1070 // indirect branch. As a consequence, all such edges would be split before
1071 // reaching here, and the inserted block will handle the EFLAGS-based
1072 // hardening.
1073 assert(!MBB.isLiveIn(X86::EFLAGS) &&
1074 "Cannot check within a block that already has live-in EFLAGS!");
1075
1076 // We can't handle having non-indirect edges into this block unless this is
1077 // the only successor and we can synthesize the necessary target address.
1078 for (MachineBasicBlock *Pred : MBB.predecessors()) {
1079 // If we've already handled this by extracting the target directly,
1080 // nothing to do.
1081 if (IndirectTerminatedMBBs.count(Pred))
1082 continue;
1083
1084 // Otherwise, we have to be the only successor. We generally expect this
1085 // to be true as conditional branches should have had a critical edge
1086 // split already. We don't however need to worry about EH pad successors
1087 // as they'll happily ignore the target and their hardening strategy is
1088 // resilient to all ways in which they could be reached speculatively.
1089 if (!llvm::all_of(Pred->successors(), [&](MachineBasicBlock *Succ) {
1090 return Succ->isEHPad() || Succ == &MBB;
1091 })) {
1092 LLVM_DEBUG({
1093 dbgs() << "ERROR: Found conditional entry to target of indirect "
1094 "branch!\n";
1095 Pred->dump();
1096 MBB.dump();
1097 });
1098 report_fatal_error("Cannot harden a conditional entry to a target of "
1099 "an indirect branch!");
1100 }
1101
1102 // Now we need to compute the address of this block and install it as a
1103 // synthetic target in the predecessor. We do this at the bottom of the
1104 // predecessor.
1105 auto InsertPt = Pred->getFirstTerminator();
1106 Register TargetReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1107 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1108 !Subtarget->isPositionIndependent()) {
1109 // Directly materialize it into an immediate.
1110 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(),
1111 TII->get(X86::MOV64ri32), TargetReg)
1112 .addMBB(&MBB);
1113 ++NumInstsInserted;
1114 (void)AddrI;
1115 LLVM_DEBUG(dbgs() << " Inserting mov: "; AddrI->dump();
1116 dbgs() << "\n");
1117 } else {
1118 auto AddrI = BuildMI(*Pred, InsertPt, DebugLoc(), TII->get(X86::LEA64r),
1119 TargetReg)
1120 .addReg(/*Base*/ X86::RIP)
1121 .addImm(/*Scale*/ 1)
1122 .addReg(/*Index*/ 0)
1123 .addMBB(&MBB)
1124 .addReg(/*Segment*/ 0);
1125 ++NumInstsInserted;
1126 (void)AddrI;
1127 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump();
1128 dbgs() << "\n");
1129 }
1130 // And make this available.
1131 TargetAddrSSA.AddAvailableValue(Pred, TargetReg);
1132 }
1133
1134 // Materialize the needed SSA value of the target. Note that we need the
1135 // middle of the block as this block might at the bottom have an indirect
1136 // branch back to itself. We can do this here because at this point, every
1137 // predecessor of this block has an available value. This is basically just
1138 // automating the construction of a PHI node for this target.
1139 Register TargetReg = TargetAddrSSA.GetValueInMiddleOfBlock(&MBB);
1140
1141 // Insert a comparison of the incoming target register with this block's
1142 // address. This also requires us to mark the block as having its address
1143 // taken explicitly.
1145 auto InsertPt = MBB.SkipPHIsLabelsAndDebug(MBB.begin());
1146 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
1147 !Subtarget->isPositionIndependent()) {
1148 // Check directly against a relocated immediate when we can.
1149 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64ri32))
1150 .addReg(TargetReg, RegState::Kill)
1151 .addMBB(&MBB);
1152 ++NumInstsInserted;
1153 (void)CheckI;
1154 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1155 } else {
1156 // Otherwise compute the address into a register first.
1157 Register AddrReg = MRI->createVirtualRegister(&X86::GR64RegClass);
1158 auto AddrI =
1159 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::LEA64r), AddrReg)
1160 .addReg(/*Base*/ X86::RIP)
1161 .addImm(/*Scale*/ 1)
1162 .addReg(/*Index*/ 0)
1163 .addMBB(&MBB)
1164 .addReg(/*Segment*/ 0);
1165 ++NumInstsInserted;
1166 (void)AddrI;
1167 LLVM_DEBUG(dbgs() << " Inserting lea: "; AddrI->dump(); dbgs() << "\n");
1168 auto CheckI = BuildMI(MBB, InsertPt, DebugLoc(), TII->get(X86::CMP64rr))
1169 .addReg(TargetReg, RegState::Kill)
1170 .addReg(AddrReg, RegState::Kill);
1171 ++NumInstsInserted;
1172 (void)CheckI;
1173 LLVM_DEBUG(dbgs() << " Inserting cmp: "; CheckI->dump(); dbgs() << "\n");
1174 }
1175
1176 // Now cmov over the predicate if the comparison wasn't equal.
1177 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
1178 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
1179 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
1180 auto CMovI =
1181 BuildMI(MBB, InsertPt, DebugLoc(), TII->get(CMovOp), UpdatedStateReg)
1182 .addReg(PS->InitialReg)
1183 .addReg(PS->PoisonReg)
1185 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)
1186 ->setIsKill(true);
1187 ++NumInstsInserted;
1188 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
1189 CMovs.push_back(&*CMovI);
1190
1191 // And put the new value into the available values for SSA form of our
1192 // predicate state.
1193 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
1194 }
1195
1196 // Return all the newly inserted cmov instructions of the predicate state.
1197 return CMovs;
1198}
1199
1200// Returns true if the MI has EFLAGS as a register def operand and it's live,
1201// otherwise it returns false
1202static bool isEFLAGSDefLive(const MachineInstr &MI) {
1203 if (const MachineOperand *DefOp =
1204 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1205 return !DefOp->isDead();
1206 }
1207 return false;
1208}
1209
1211 const TargetRegisterInfo &TRI) {
1212 // Check if EFLAGS are alive by seeing if there is a def of them or they
1213 // live-in, and then seeing if that def is in turn used.
1215 if (MachineOperand *DefOp =
1216 MI.findRegisterDefOperand(X86::EFLAGS, /*TRI=*/nullptr)) {
1217 // If the def is dead, then EFLAGS is not live.
1218 if (DefOp->isDead())
1219 return false;
1220
1221 // Otherwise we've def'ed it, and it is live.
1222 return true;
1223 }
1224 // While at this instruction, also check if we use and kill EFLAGS
1225 // which means it isn't live.
1226 if (MI.killsRegister(X86::EFLAGS, &TRI))
1227 return false;
1228 }
1229
1230 // If we didn't find anything conclusive (neither definitely alive or
1231 // definitely dead) return whether it lives into the block.
1232 return MBB.isLiveIn(X86::EFLAGS);
1233}
1234
1235/// Trace the predicate state through each of the blocks in the function,
1236/// hardening everything necessary along the way.
1237///
1238/// We call this routine once the initial predicate state has been established
1239/// for each basic block in the function in the SSA updater. This routine traces
1240/// it through the instructions within each basic block, and for non-returning
1241/// blocks informs the SSA updater about the final state that lives out of the
1242/// block. Along the way, it hardens any vulnerable instruction using the
1243/// currently valid predicate state. We have to do these two things together
1244/// because the SSA updater only works across blocks. Within a block, we track
1245/// the current predicate state directly and update it as it changes.
1246///
1247/// This operates in two passes over each block. First, we analyze the loads in
1248/// the block to determine which strategy will be used to harden them: hardening
1249/// the address or hardening the loaded value when loaded into a register
1250/// amenable to hardening. We have to process these first because the two
1251/// strategies may interact -- later hardening may change what strategy we wish
1252/// to use. We also will analyze data dependencies between loads and avoid
1253/// hardening those loads that are data dependent on a load with a hardened
1254/// address. We also skip hardening loads already behind an LFENCE as that is
1255/// sufficient to harden them against misspeculation.
1256///
1257/// Second, we actively trace the predicate state through the block, applying
1258/// the hardening steps we determined necessary in the first pass as we go.
1259///
1260/// These two passes are applied to each basic block. We operate one block at a
1261/// time to simplify reasoning about reachability and sequencing.
1262void X86SpeculativeLoadHardeningPass::tracePredStateThroughBlocksAndHarden(
1263 MachineFunction &MF) {
1264 SmallPtrSet<MachineInstr *, 16> HardenPostLoad;
1265 SmallPtrSet<MachineInstr *, 16> HardenLoadAddr;
1266
1267 SmallSet<Register, 16> HardenedAddrRegs;
1268
1269 SmallDenseMap<Register, Register, 32> AddrRegToHardenedReg;
1270
1271 // Track the set of load-dependent registers through the basic block. Because
1272 // the values of these registers have an existing data dependency on a loaded
1273 // value which we would have checked, we can omit any checks on them.
1274 SparseBitVector<> LoadDepRegs;
1275
1276 for (MachineBasicBlock &MBB : MF) {
1277 // The first pass over the block: collect all the loads which can have their
1278 // loaded value hardened and all the loads that instead need their address
1279 // hardened. During this walk we propagate load dependence for address
1280 // hardened loads and also look for LFENCE to stop hardening wherever
1281 // possible. When deciding whether or not to harden the loaded value or not,
1282 // we check to see if any registers used in the address will have been
1283 // hardened at this point and if so, harden any remaining address registers
1284 // as that often successfully re-uses hardened addresses and minimizes
1285 // instructions.
1286 //
1287 // FIXME: We should consider an aggressive mode where we continue to keep as
1288 // many loads value hardened even when some address register hardening would
1289 // be free (due to reuse).
1290 //
1291 // Note that we only need this pass if we are actually hardening loads.
1292 if (HardenLoads)
1293 for (MachineInstr &MI : MBB) {
1294 // We naively assume that all def'ed registers of an instruction have
1295 // a data dependency on all of their operands.
1296 // FIXME: Do a more careful analysis of x86 to build a conservative
1297 // model here.
1298 if (llvm::any_of(MI.uses(), [&](MachineOperand &Op) {
1299 return Op.isReg() && LoadDepRegs.test(Op.getReg().id());
1300 }))
1301 for (MachineOperand &Def : MI.defs())
1302 if (Def.isReg())
1303 LoadDepRegs.set(Def.getReg().id());
1304
1305 // Both Intel and AMD are guiding that they will change the semantics of
1306 // LFENCE to be a speculation barrier, so if we see an LFENCE, there is
1307 // no more need to guard things in this block.
1308 if (MI.getOpcode() == X86::LFENCE)
1309 break;
1310
1311 // If this instruction cannot load, nothing to do.
1312 if (!MI.mayLoad())
1313 continue;
1314
1315 // Some instructions which "load" are trivially safe or unimportant.
1316 if (MI.getOpcode() == X86::MFENCE)
1317 continue;
1318
1319 // Extract the memory operand information about this instruction.
1320 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1321 if (MemRefBeginIdx < 0) {
1323 << "WARNING: unable to harden loading instruction: ";
1324 MI.dump());
1325 continue;
1326 }
1327
1328 MachineOperand &BaseMO =
1329 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1330 MachineOperand &IndexMO =
1331 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1332
1333 // If we have at least one (non-frame-index, non-RIP) register operand,
1334 // and neither operand is load-dependent, we need to check the load.
1335 Register BaseReg, IndexReg;
1336 if (!BaseMO.isFI() && BaseMO.getReg() != X86::RIP &&
1337 BaseMO.getReg().isValid())
1338 BaseReg = BaseMO.getReg();
1339 if (IndexMO.getReg().isValid())
1340 IndexReg = IndexMO.getReg();
1341
1342 if (!BaseReg && !IndexReg)
1343 // No register operands!
1344 continue;
1345
1346 // If any register operand is dependent, this load is dependent and we
1347 // needn't check it.
1348 // FIXME: Is this true in the case where we are hardening loads after
1349 // they complete? Unclear, need to investigate.
1350 if ((BaseReg && LoadDepRegs.test(BaseReg.id())) ||
1351 (IndexReg && LoadDepRegs.test(IndexReg.id())))
1352 continue;
1353
1354 // If post-load hardening is enabled, this load is compatible with
1355 // post-load hardening, and we aren't already going to harden one of the
1356 // address registers, queue it up to be hardened post-load. Notably,
1357 // even once hardened this won't introduce a useful dependency that
1358 // could prune out subsequent loads.
1360 !isEFLAGSDefLive(MI) && MI.getDesc().getNumDefs() == 1 &&
1361 MI.getOperand(0).isReg() &&
1362 canHardenRegister(MI.getOperand(0).getReg()) &&
1363 !HardenedAddrRegs.count(BaseReg) &&
1364 !HardenedAddrRegs.count(IndexReg)) {
1365 HardenPostLoad.insert(&MI);
1366 HardenedAddrRegs.insert(MI.getOperand(0).getReg());
1367 continue;
1368 }
1369
1370 // Record this instruction for address hardening and record its register
1371 // operands as being address-hardened.
1372 HardenLoadAddr.insert(&MI);
1373 if (BaseReg)
1374 HardenedAddrRegs.insert(BaseReg);
1375 if (IndexReg)
1376 HardenedAddrRegs.insert(IndexReg);
1377
1378 for (MachineOperand &Def : MI.defs())
1379 if (Def.isReg())
1380 LoadDepRegs.set(Def.getReg().id());
1381 }
1382
1383 // Now re-walk the instructions in the basic block, and apply whichever
1384 // hardening strategy we have elected. Note that we do this in a second
1385 // pass specifically so that we have the complete set of instructions for
1386 // which we will do post-load hardening and can defer it in certain
1387 // circumstances.
1388 for (MachineInstr &MI : MBB) {
1389 if (HardenLoads) {
1390 // We cannot both require hardening the def of a load and its address.
1391 assert(!(HardenLoadAddr.count(&MI) && HardenPostLoad.count(&MI)) &&
1392 "Requested to harden both the address and def of a load!");
1393
1394 // Check if this is a load whose address needs to be hardened.
1395 if (HardenLoadAddr.erase(&MI)) {
1396 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(MI);
1397 assert(MemRefBeginIdx >= 0 && "Cannot have an invalid index here!");
1398
1399 MachineOperand &BaseMO =
1400 MI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1401 MachineOperand &IndexMO =
1402 MI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1403 hardenLoadAddr(MI, BaseMO, IndexMO, AddrRegToHardenedReg);
1404 continue;
1405 }
1406
1407 // Test if this instruction is one of our post load instructions (and
1408 // remove it from the set if so).
1409 if (HardenPostLoad.erase(&MI)) {
1410 assert(!MI.isCall() && "Must not try to post-load harden a call!");
1411
1412 // If this is a data-invariant load and there is no EFLAGS
1413 // interference, we want to try and sink any hardening as far as
1414 // possible.
1416 // Sink the instruction we'll need to harden as far as we can down
1417 // the graph.
1418 MachineInstr *SunkMI = sinkPostLoadHardenedInst(MI, HardenPostLoad);
1419
1420 // If we managed to sink this instruction, update everything so we
1421 // harden that instruction when we reach it in the instruction
1422 // sequence.
1423 if (SunkMI != &MI) {
1424 // If in sinking there was no instruction needing to be hardened,
1425 // we're done.
1426 if (!SunkMI)
1427 continue;
1428
1429 // Otherwise, add this to the set of defs we harden.
1430 HardenPostLoad.insert(SunkMI);
1431 continue;
1432 }
1433 }
1434
1435 Register HardenedReg = hardenPostLoad(MI);
1436
1437 // Mark the resulting hardened register as such so we don't re-harden.
1438 AddrRegToHardenedReg[HardenedReg] = HardenedReg;
1439
1440 continue;
1441 }
1442
1443 // Check for an indirect call or branch that may need its input hardened
1444 // even if we couldn't find the specific load used, or were able to
1445 // avoid hardening it for some reason. Note that here we cannot break
1446 // out afterward as we may still need to handle any call aspect of this
1447 // instruction.
1448 if ((MI.isCall() || MI.isBranch()) && HardenIndirectCallsAndJumps)
1449 hardenIndirectCallOrJumpInstr(MI, AddrRegToHardenedReg);
1450 }
1451
1452 // After we finish hardening loads we handle interprocedural hardening if
1453 // enabled and relevant for this instruction.
1455 continue;
1456 if (!MI.isCall() && !MI.isReturn())
1457 continue;
1458
1459 // If this is a direct return (IE, not a tail call) just directly harden
1460 // it.
1461 if (MI.isReturn() && !MI.isCall()) {
1462 hardenReturnInstr(MI);
1463 continue;
1464 }
1465
1466 // Otherwise we have a call. We need to handle transferring the predicate
1467 // state into a call and recovering it after the call returns (unless this
1468 // is a tail call).
1469 assert(MI.isCall() && "Should only reach here for calls!");
1470 tracePredStateThroughCall(MI);
1471 }
1472
1473 HardenPostLoad.clear();
1474 HardenLoadAddr.clear();
1475 HardenedAddrRegs.clear();
1476 AddrRegToHardenedReg.clear();
1477
1478 // Currently, we only track data-dependent loads within a basic block.
1479 // FIXME: We should see if this is necessary or if we could be more
1480 // aggressive here without opening up attack avenues.
1481 LoadDepRegs.clear();
1482 }
1483}
1484
1485/// Save EFLAGS into the returned GPR. This can in turn be restored with
1486/// `restoreEFLAGS`.
1487///
1488/// Note that LLVM can only lower very simple patterns of saved and restored
1489/// EFLAGS registers. The restore should always be within the same basic block
1490/// as the save so that no PHI nodes are inserted.
1491Register X86SpeculativeLoadHardeningPass::saveEFLAGS(
1493 const DebugLoc &Loc) {
1494 // FIXME: Hard coding this to a 32-bit register class seems weird, but matches
1495 // what instruction selection does.
1496 Register Reg = MRI->createVirtualRegister(&X86::GR32RegClass);
1497 // We directly copy the FLAGS register and rely on later lowering to clean
1498 // this up into the appropriate setCC instructions.
1499 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), Reg).addReg(X86::EFLAGS);
1500 ++NumInstsInserted;
1501 return Reg;
1502}
1503
1504/// Restore EFLAGS from the provided GPR. This should be produced by
1505/// `saveEFLAGS`.
1506///
1507/// This must be done within the same basic block as the save in order to
1508/// reliably lower.
1509void X86SpeculativeLoadHardeningPass::restoreEFLAGS(
1511 const DebugLoc &Loc, Register Reg) {
1512 BuildMI(MBB, InsertPt, Loc, TII->get(X86::COPY), X86::EFLAGS).addReg(Reg);
1513 ++NumInstsInserted;
1514}
1515
1516/// Takes the current predicate state (in a register) and merges it into the
1517/// stack pointer. The state is essentially a single bit, but we merge this in
1518/// a way that won't form non-canonical pointers and also will be preserved
1519/// across normal stack adjustments.
1520void X86SpeculativeLoadHardeningPass::mergePredStateIntoSP(
1522 const DebugLoc &Loc, Register PredStateReg) {
1523 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1524 // FIXME: This hard codes a shift distance based on the number of bits needed
1525 // to stay canonical on 64-bit. We should compute this somehow and support
1526 // 32-bit as part of that.
1527 auto ShiftI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHL64ri), TmpReg)
1528 .addReg(PredStateReg, RegState::Kill)
1529 .addImm(47);
1530 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1531 ++NumInstsInserted;
1532 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), X86::RSP)
1533 .addReg(X86::RSP)
1534 .addReg(TmpReg, RegState::Kill);
1535 OrI->addRegisterDead(X86::EFLAGS, TRI);
1536 ++NumInstsInserted;
1537}
1538
1539/// Extracts the predicate state stored in the high bits of the stack pointer.
1540Register X86SpeculativeLoadHardeningPass::extractPredStateFromSP(
1542 const DebugLoc &Loc) {
1543 Register PredStateReg = MRI->createVirtualRegister(PS->RC);
1544 Register TmpReg = MRI->createVirtualRegister(PS->RC);
1545
1546 // We know that the stack pointer will have any preserved predicate state in
1547 // its high bit. We just want to smear this across the other bits. Turns out,
1548 // this is exactly what an arithmetic right shift does.
1549 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), TmpReg)
1550 .addReg(X86::RSP);
1551 auto ShiftI =
1552 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SAR64ri), PredStateReg)
1553 .addReg(TmpReg, RegState::Kill)
1554 .addImm(TRI->getRegSizeInBits(*PS->RC) - 1);
1555 ShiftI->addRegisterDead(X86::EFLAGS, TRI);
1556 ++NumInstsInserted;
1557
1558 return PredStateReg;
1559}
1560
1561void X86SpeculativeLoadHardeningPass::hardenLoadAddr(
1562 MachineInstr &MI, MachineOperand &BaseMO, MachineOperand &IndexMO,
1563 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg) {
1564 MachineBasicBlock &MBB = *MI.getParent();
1565 const DebugLoc &Loc = MI.getDebugLoc();
1566
1567 // Check if EFLAGS are alive by seeing if there is a def of them or they
1568 // live-in, and then seeing if that def is in turn used.
1569 bool EFLAGSLive = isEFLAGSLive(MBB, MI.getIterator(), *TRI);
1570
1572
1573 if (BaseMO.isFI()) {
1574 // A frame index is never a dynamically controllable load, so only
1575 // harden it if we're covering fixed address loads as well.
1576 LLVM_DEBUG(
1577 dbgs() << " Skipping hardening base of explicit stack frame load: ";
1578 MI.dump(); dbgs() << "\n");
1579 } else if (BaseMO.getReg() == X86::RSP) {
1580 // Some idempotent atomic operations are lowered directly to a locked
1581 // OR with 0 to the top of stack(or slightly offset from top) which uses an
1582 // explicit RSP register as the base.
1583 assert(IndexMO.getReg() == X86::NoRegister &&
1584 "Explicit RSP access with dynamic index!");
1585 LLVM_DEBUG(
1586 dbgs() << " Cannot harden base of explicit RSP offset in a load!");
1587 } else if (BaseMO.getReg() == X86::RIP ||
1588 BaseMO.getReg() == X86::NoRegister) {
1589 // For both RIP-relative addressed loads or absolute loads, we cannot
1590 // meaningfully harden them because the address being loaded has no
1591 // dynamic component.
1592 //
1593 // FIXME: When using a segment base (like TLS does) we end up with the
1594 // dynamic address being the base plus -1 because we can't mutate the
1595 // segment register here. This allows the signed 32-bit offset to point at
1596 // valid segment-relative addresses and load them successfully.
1597 LLVM_DEBUG(
1598 dbgs() << " Cannot harden base of "
1599 << (BaseMO.getReg() == X86::RIP ? "RIP-relative" : "no-base")
1600 << " address in a load!");
1601 } else {
1602 assert(BaseMO.isReg() &&
1603 "Only allowed to have a frame index or register base.");
1604 HardenOpRegs.push_back(&BaseMO);
1605 }
1606
1607 if (IndexMO.getReg() != X86::NoRegister &&
1608 (HardenOpRegs.empty() ||
1609 HardenOpRegs.front()->getReg() != IndexMO.getReg()))
1610 HardenOpRegs.push_back(&IndexMO);
1611
1612 assert((HardenOpRegs.size() == 1 || HardenOpRegs.size() == 2) &&
1613 "Should have exactly one or two registers to harden!");
1614 assert((HardenOpRegs.size() == 1 ||
1615 HardenOpRegs[0]->getReg() != HardenOpRegs[1]->getReg()) &&
1616 "Should not have two of the same registers!");
1617
1618 // Remove any registers that have alreaded been checked.
1619 llvm::erase_if(HardenOpRegs, [&](MachineOperand *Op) {
1620 // See if this operand's register has already been checked.
1621 auto It = AddrRegToHardenedReg.find(Op->getReg());
1622 if (It == AddrRegToHardenedReg.end())
1623 // Not checked, so retain this one.
1624 return false;
1625
1626 // Otherwise, we can directly update this operand and remove it.
1627 Op->setReg(It->second);
1628 return true;
1629 });
1630 // If there are none left, we're done.
1631 if (HardenOpRegs.empty())
1632 return;
1633
1634 // Compute the current predicate state.
1635 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1636
1637 auto InsertPt = MI.getIterator();
1638
1639 // If EFLAGS are live and we don't have access to instructions that avoid
1640 // clobbering EFLAGS we need to save and restore them. This in turn makes
1641 // the EFLAGS no longer live.
1642 Register FlagsReg;
1643 if (EFLAGSLive && !Subtarget->hasBMI2()) {
1644 EFLAGSLive = false;
1645 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1646 }
1647
1648 for (MachineOperand *Op : HardenOpRegs) {
1649 Register OpReg = Op->getReg();
1650 auto *OpRC = MRI->getRegClass(OpReg);
1651 Register TmpReg = MRI->createVirtualRegister(OpRC);
1652
1653 // If this is a vector register, we'll need somewhat custom logic to handle
1654 // hardening it.
1655 if (!Subtarget->hasVLX() && (OpRC->hasSuperClassEq(&X86::VR128RegClass) ||
1656 OpRC->hasSuperClassEq(&X86::VR256RegClass))) {
1657 assert(Subtarget->hasAVX2() && "AVX2-specific register classes!");
1658 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128RegClass);
1659
1660 // Move our state into a vector register.
1661 // FIXME: We could skip this at the cost of longer encodings with AVX-512
1662 // but that doesn't seem likely worth it.
1663 Register VStateReg = MRI->createVirtualRegister(&X86::VR128RegClass);
1664 auto MovI =
1665 BuildMI(MBB, InsertPt, Loc, TII->get(X86::VMOV64toPQIrr), VStateReg)
1666 .addReg(StateReg);
1667 (void)MovI;
1668 ++NumInstsInserted;
1669 LLVM_DEBUG(dbgs() << " Inserting mov: "; MovI->dump(); dbgs() << "\n");
1670
1671 // Broadcast it across the vector register.
1672 Register VBStateReg = MRI->createVirtualRegister(OpRC);
1673 auto BroadcastI = BuildMI(MBB, InsertPt, Loc,
1674 TII->get(Is128Bit ? X86::VPBROADCASTQrr
1675 : X86::VPBROADCASTQYrr),
1676 VBStateReg)
1677 .addReg(VStateReg);
1678 (void)BroadcastI;
1679 ++NumInstsInserted;
1680 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1681 dbgs() << "\n");
1682
1683 // Merge our potential poison state into the value with a vector or.
1684 auto OrI =
1685 BuildMI(MBB, InsertPt, Loc,
1686 TII->get(Is128Bit ? X86::VPORrr : X86::VPORYrr), TmpReg)
1687 .addReg(VBStateReg)
1688 .addReg(OpReg);
1689 (void)OrI;
1690 ++NumInstsInserted;
1691 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1692 } else if (OpRC->hasSuperClassEq(&X86::VR128XRegClass) ||
1693 OpRC->hasSuperClassEq(&X86::VR256XRegClass) ||
1694 OpRC->hasSuperClassEq(&X86::VR512RegClass)) {
1695 assert(Subtarget->hasAVX512() && "AVX512-specific register classes!");
1696 bool Is128Bit = OpRC->hasSuperClassEq(&X86::VR128XRegClass);
1697 bool Is256Bit = OpRC->hasSuperClassEq(&X86::VR256XRegClass);
1698 if (Is128Bit || Is256Bit)
1699 assert(Subtarget->hasVLX() && "AVX512VL-specific register classes!");
1700
1701 // Broadcast our state into a vector register.
1702 Register VStateReg = MRI->createVirtualRegister(OpRC);
1703 unsigned BroadcastOp = Is128Bit ? X86::VPBROADCASTQrZ128rr
1704 : Is256Bit ? X86::VPBROADCASTQrZ256rr
1705 : X86::VPBROADCASTQrZrr;
1706 auto BroadcastI =
1707 BuildMI(MBB, InsertPt, Loc, TII->get(BroadcastOp), VStateReg)
1708 .addReg(StateReg);
1709 (void)BroadcastI;
1710 ++NumInstsInserted;
1711 LLVM_DEBUG(dbgs() << " Inserting broadcast: "; BroadcastI->dump();
1712 dbgs() << "\n");
1713
1714 // Merge our potential poison state into the value with a vector or.
1715 unsigned OrOp = Is128Bit ? X86::VPORQZ128rr
1716 : Is256Bit ? X86::VPORQZ256rr : X86::VPORQZrr;
1717 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOp), TmpReg)
1718 .addReg(VStateReg)
1719 .addReg(OpReg);
1720 (void)OrI;
1721 ++NumInstsInserted;
1722 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1723 } else {
1724 // FIXME: Need to support GR32 here for 32-bit code.
1725 assert(OpRC->hasSuperClassEq(&X86::GR64RegClass) &&
1726 "Not a supported register class for address hardening!");
1727
1728 if (!EFLAGSLive) {
1729 // Merge our potential poison state into the value with an or.
1730 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(X86::OR64rr), TmpReg)
1731 .addReg(StateReg)
1732 .addReg(OpReg);
1733 OrI->addRegisterDead(X86::EFLAGS, TRI);
1734 ++NumInstsInserted;
1735 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1736 } else {
1737 // We need to avoid touching EFLAGS so shift out all but the least
1738 // significant bit using the instruction that doesn't update flags.
1739 auto ShiftI =
1740 BuildMI(MBB, InsertPt, Loc, TII->get(X86::SHRX64rr), TmpReg)
1741 .addReg(OpReg)
1742 .addReg(StateReg);
1743 (void)ShiftI;
1744 ++NumInstsInserted;
1745 LLVM_DEBUG(dbgs() << " Inserting shrx: "; ShiftI->dump();
1746 dbgs() << "\n");
1747 }
1748 }
1749
1750 // Record this register as checked and update the operand.
1751 assert(!AddrRegToHardenedReg.count(Op->getReg()) &&
1752 "Should not have checked this register yet!");
1753 AddrRegToHardenedReg[Op->getReg()] = TmpReg;
1754 Op->setReg(TmpReg);
1755 ++NumAddrRegsHardened;
1756 }
1757
1758 // And restore the flags if needed.
1759 if (FlagsReg)
1760 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1761}
1762
1763MachineInstr *X86SpeculativeLoadHardeningPass::sinkPostLoadHardenedInst(
1764 MachineInstr &InitialMI, SmallPtrSetImpl<MachineInstr *> &HardenedInstrs) {
1766 "Cannot get here with a non-invariant load!");
1767 assert(!isEFLAGSDefLive(InitialMI) &&
1768 "Cannot get here with a data invariant load "
1769 "that interferes with EFLAGS!");
1770
1771 // See if we can sink hardening the loaded value.
1772 auto SinkCheckToSingleUse =
1773 [&](MachineInstr &MI) -> std::optional<MachineInstr *> {
1774 Register DefReg = MI.getOperand(0).getReg();
1775
1776 // We need to find a single use which we can sink the check. We can
1777 // primarily do this because many uses may already end up checked on their
1778 // own.
1779 MachineInstr *SingleUseMI = nullptr;
1780 for (MachineInstr &UseMI : MRI->use_instructions(DefReg)) {
1781 // If we're already going to harden this use, it is data invariant, it
1782 // does not interfere with EFLAGS, and within our block.
1783 if (HardenedInstrs.count(&UseMI)) {
1785 // If we've already decided to harden a non-load, we must have sunk
1786 // some other post-load hardened instruction to it and it must itself
1787 // be data-invariant.
1789 "Data variant instruction being hardened!");
1790 continue;
1791 }
1792
1793 // Otherwise, this is a load and the load component can't be data
1794 // invariant so check how this register is being used.
1795 const int MemRefBeginIdx = X86::getFirstAddrOperandIdx(UseMI);
1796 assert(MemRefBeginIdx >= 0 &&
1797 "Should always have mem references here!");
1798
1799 MachineOperand &BaseMO =
1800 UseMI.getOperand(MemRefBeginIdx + X86::AddrBaseReg);
1801 MachineOperand &IndexMO =
1802 UseMI.getOperand(MemRefBeginIdx + X86::AddrIndexReg);
1803 if ((BaseMO.isReg() && BaseMO.getReg() == DefReg) ||
1804 (IndexMO.isReg() && IndexMO.getReg() == DefReg))
1805 // The load uses the register as part of its address making it not
1806 // invariant.
1807 return {};
1808
1809 continue;
1810 }
1811
1812 if (SingleUseMI)
1813 // We already have a single use, this would make two. Bail.
1814 return {};
1815
1816 // If this single use isn't data invariant, isn't in this block, or has
1817 // interfering EFLAGS, we can't sink the hardening to it.
1818 if (!X86InstrInfo::isDataInvariant(UseMI) || UseMI.getParent() != MI.getParent() ||
1820 return {};
1821
1822 // If this instruction defines multiple registers bail as we won't harden
1823 // all of them.
1824 if (UseMI.getDesc().getNumDefs() > 1)
1825 return {};
1826
1827 // If this register isn't a virtual register we can't walk uses of sanely,
1828 // just bail. Also check that its register class is one of the ones we
1829 // can harden.
1830 Register UseDefReg = UseMI.getOperand(0).getReg();
1831 if (!canHardenRegister(UseDefReg))
1832 return {};
1833
1834 SingleUseMI = &UseMI;
1835 }
1836
1837 // If SingleUseMI is still null, there is no use that needs its own
1838 // checking. Otherwise, it is the single use that needs checking.
1839 return {SingleUseMI};
1840 };
1841
1842 MachineInstr *MI = &InitialMI;
1843 while (std::optional<MachineInstr *> SingleUse = SinkCheckToSingleUse(*MI)) {
1844 // Update which MI we're checking now.
1845 MI = *SingleUse;
1846 if (!MI)
1847 break;
1848 }
1849
1850 return MI;
1851}
1852
1853bool X86SpeculativeLoadHardeningPass::canHardenRegister(Register Reg) {
1854 // We only support hardening virtual registers.
1855 if (!Reg.isVirtual())
1856 return false;
1857
1858 auto *RC = MRI->getRegClass(Reg);
1859 int RegBytes = TRI->getRegSizeInBits(*RC) / 8;
1860 if (RegBytes > 8)
1861 // We don't support post-load hardening of vectors.
1862 return false;
1863
1864 unsigned RegIdx = Log2_32(RegBytes);
1865 assert(RegIdx < 4 && "Unsupported register size");
1866
1867 // If this register class is explicitly constrained to a class that doesn't
1868 // require REX prefix, we may not be able to satisfy that constraint when
1869 // emitting the hardening instructions, so bail out here.
1870 // FIXME: This seems like a pretty lame hack. The way this comes up is when we
1871 // end up both with a NOREX and REX-only register as operands to the hardening
1872 // instructions. It would be better to fix that code to handle this situation
1873 // rather than hack around it in this way.
1874 const TargetRegisterClass *NOREXRegClasses[] = {
1875 &X86::GR8_NOREXRegClass, &X86::GR16_NOREXRegClass,
1876 &X86::GR32_NOREXRegClass, &X86::GR64_NOREXRegClass};
1877 if (RC == NOREXRegClasses[RegIdx])
1878 return false;
1879
1880 const TargetRegisterClass *GPRRegClasses[] = {
1881 &X86::GR8RegClass, &X86::GR16RegClass, &X86::GR32RegClass,
1882 &X86::GR64RegClass};
1883 return RC->hasSuperClassEq(GPRRegClasses[RegIdx]);
1884}
1885
1886/// Harden a value in a register.
1887///
1888/// This is the low-level logic to fully harden a value sitting in a register
1889/// against leaking during speculative execution.
1890///
1891/// Unlike hardening an address that is used by a load, this routine is required
1892/// to hide *all* incoming bits in the register.
1893///
1894/// `Reg` must be a virtual register. Currently, it is required to be a GPR no
1895/// larger than the predicate state register. FIXME: We should support vector
1896/// registers here by broadcasting the predicate state.
1897///
1898/// The new, hardened virtual register is returned. It will have the same
1899/// register class as `Reg`.
1900Register X86SpeculativeLoadHardeningPass::hardenValueInRegister(
1902 const DebugLoc &Loc) {
1903 assert(canHardenRegister(Reg) && "Cannot harden this register!");
1904
1905 auto *RC = MRI->getRegClass(Reg);
1906 int Bytes = TRI->getRegSizeInBits(*RC) / 8;
1907 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
1908 assert((Bytes == 1 || Bytes == 2 || Bytes == 4 || Bytes == 8) &&
1909 "Unknown register size");
1910
1911 // FIXME: Need to teach this about 32-bit mode.
1912 if (Bytes != 8) {
1913 unsigned SubRegImms[] = {X86::sub_8bit, X86::sub_16bit, X86::sub_32bit};
1914 unsigned SubRegImm = SubRegImms[Log2_32(Bytes)];
1915 Register NarrowStateReg = MRI->createVirtualRegister(RC);
1916 BuildMI(MBB, InsertPt, Loc, TII->get(TargetOpcode::COPY), NarrowStateReg)
1917 .addReg(StateReg, 0, SubRegImm);
1918 StateReg = NarrowStateReg;
1919 }
1920
1921 Register FlagsReg;
1922 if (isEFLAGSLive(MBB, InsertPt, *TRI))
1923 FlagsReg = saveEFLAGS(MBB, InsertPt, Loc);
1924
1925 Register NewReg = MRI->createVirtualRegister(RC);
1926 unsigned OrOpCodes[] = {X86::OR8rr, X86::OR16rr, X86::OR32rr, X86::OR64rr};
1927 unsigned OrOpCode = OrOpCodes[Log2_32(Bytes)];
1928 auto OrI = BuildMI(MBB, InsertPt, Loc, TII->get(OrOpCode), NewReg)
1929 .addReg(StateReg)
1930 .addReg(Reg);
1931 OrI->addRegisterDead(X86::EFLAGS, TRI);
1932 ++NumInstsInserted;
1933 LLVM_DEBUG(dbgs() << " Inserting or: "; OrI->dump(); dbgs() << "\n");
1934
1935 if (FlagsReg)
1936 restoreEFLAGS(MBB, InsertPt, Loc, FlagsReg);
1937
1938 return NewReg;
1939}
1940
1941/// Harden a load by hardening the loaded value in the defined register.
1942///
1943/// We can harden a non-leaking load into a register without touching the
1944/// address by just hiding all of the loaded bits during misspeculation. We use
1945/// an `or` instruction to do this because we set up our poison value as all
1946/// ones. And the goal is just for the loaded bits to not be exposed to
1947/// execution and coercing them to one is sufficient.
1948///
1949/// Returns the newly hardened register.
1950Register X86SpeculativeLoadHardeningPass::hardenPostLoad(MachineInstr &MI) {
1951 MachineBasicBlock &MBB = *MI.getParent();
1952 const DebugLoc &Loc = MI.getDebugLoc();
1953
1954 auto &DefOp = MI.getOperand(0);
1955 Register OldDefReg = DefOp.getReg();
1956 auto *DefRC = MRI->getRegClass(OldDefReg);
1957
1958 // Because we want to completely replace the uses of this def'ed value with
1959 // the hardened value, create a dedicated new register that will only be used
1960 // to communicate the unhardened value to the hardening.
1961 Register UnhardenedReg = MRI->createVirtualRegister(DefRC);
1962 DefOp.setReg(UnhardenedReg);
1963
1964 // Now harden this register's value, getting a hardened reg that is safe to
1965 // use. Note that we insert the instructions to compute this *after* the
1966 // defining instruction, not before it.
1967 Register HardenedReg = hardenValueInRegister(
1968 UnhardenedReg, MBB, std::next(MI.getIterator()), Loc);
1969
1970 // Finally, replace the old register (which now only has the uses of the
1971 // original def) with the hardened register.
1972 MRI->replaceRegWith(/*FromReg*/ OldDefReg, /*ToReg*/ HardenedReg);
1973
1974 ++NumPostLoadRegsHardened;
1975 return HardenedReg;
1976}
1977
1978/// Harden a return instruction.
1979///
1980/// Returns implicitly perform a load which we need to harden. Without hardening
1981/// this load, an attacker my speculatively write over the return address to
1982/// steer speculation of the return to an attacker controlled address. This is
1983/// called Spectre v1.1 or Bounds Check Bypass Store (BCBS) and is described in
1984/// this paper:
1985/// https://people.csail.mit.edu/vlk/spectre11.pdf
1986///
1987/// We can harden this by introducing an LFENCE that will delay any load of the
1988/// return address until prior instructions have retired (and thus are not being
1989/// speculated), or we can harden the address used by the implicit load: the
1990/// stack pointer.
1991///
1992/// If we are not using an LFENCE, hardening the stack pointer has an additional
1993/// benefit: it allows us to pass the predicate state accumulated in this
1994/// function back to the caller. In the absence of a BCBS attack on the return,
1995/// the caller will typically be resumed and speculatively executed due to the
1996/// Return Stack Buffer (RSB) prediction which is very accurate and has a high
1997/// priority. It is possible that some code from the caller will be executed
1998/// speculatively even during a BCBS-attacked return until the steering takes
1999/// effect. Whenever this happens, the caller can recover the (poisoned)
2000/// predicate state from the stack pointer and continue to harden loads.
2001void X86SpeculativeLoadHardeningPass::hardenReturnInstr(MachineInstr &MI) {
2002 MachineBasicBlock &MBB = *MI.getParent();
2003 const DebugLoc &Loc = MI.getDebugLoc();
2004 auto InsertPt = MI.getIterator();
2005
2006 if (FenceCallAndRet)
2007 // No need to fence here as we'll fence at the return site itself. That
2008 // handles more cases than we can handle here.
2009 return;
2010
2011 // Take our predicate state, shift it to the high 17 bits (so that we keep
2012 // pointers canonical) and merge it into RSP. This will allow the caller to
2013 // extract it when we return (speculatively).
2014 mergePredStateIntoSP(MBB, InsertPt, Loc, PS->SSA.GetValueAtEndOfBlock(&MBB));
2015}
2016
2017/// Trace the predicate state through a call.
2018///
2019/// There are several layers of this needed to handle the full complexity of
2020/// calls.
2021///
2022/// First, we need to send the predicate state into the called function. We do
2023/// this by merging it into the high bits of the stack pointer.
2024///
2025/// For tail calls, this is all we need to do.
2026///
2027/// For calls where we might return and resume the control flow, we need to
2028/// extract the predicate state from the high bits of the stack pointer after
2029/// control returns from the called function.
2030///
2031/// We also need to verify that we intended to return to this location in the
2032/// code. An attacker might arrange for the processor to mispredict the return
2033/// to this valid but incorrect return address in the program rather than the
2034/// correct one. See the paper on this attack, called "ret2spec" by the
2035/// researchers, here:
2036/// https://christian-rossow.de/publications/ret2spec-ccs2018.pdf
2037///
2038/// The way we verify that we returned to the correct location is by preserving
2039/// the expected return address across the call. One technique involves taking
2040/// advantage of the red-zone to load the return address from `8(%rsp)` where it
2041/// was left by the RET instruction when it popped `%rsp`. Alternatively, we can
2042/// directly save the address into a register that will be preserved across the
2043/// call. We compare this intended return address against the address
2044/// immediately following the call (the observed return address). If these
2045/// mismatch, we have detected misspeculation and can poison our predicate
2046/// state.
2047void X86SpeculativeLoadHardeningPass::tracePredStateThroughCall(
2048 MachineInstr &MI) {
2049 MachineBasicBlock &MBB = *MI.getParent();
2050 MachineFunction &MF = *MBB.getParent();
2051 auto InsertPt = MI.getIterator();
2052 const DebugLoc &Loc = MI.getDebugLoc();
2053
2054 if (FenceCallAndRet) {
2055 if (MI.isReturn())
2056 // Tail call, we don't return to this function.
2057 // FIXME: We should also handle noreturn calls.
2058 return;
2059
2060 // We don't need to fence before the call because the function should fence
2061 // in its entry. However, we do need to fence after the call returns.
2062 // Fencing before the return doesn't correctly handle cases where the return
2063 // itself is mispredicted.
2064 BuildMI(MBB, std::next(InsertPt), Loc, TII->get(X86::LFENCE));
2065 ++NumInstsInserted;
2066 ++NumLFENCEsInserted;
2067 return;
2068 }
2069
2070 // First, we transfer the predicate state into the called function by merging
2071 // it into the stack pointer. This will kill the current def of the state.
2072 Register StateReg = PS->SSA.GetValueAtEndOfBlock(&MBB);
2073 mergePredStateIntoSP(MBB, InsertPt, Loc, StateReg);
2074
2075 // If this call is also a return, it is a tail call and we don't need anything
2076 // else to handle it so just return. Also, if there are no further
2077 // instructions and no successors, this call does not return so we can also
2078 // bail.
2079 if (MI.isReturn() || (std::next(InsertPt) == MBB.end() && MBB.succ_empty()))
2080 return;
2081
2082 // Create a symbol to track the return address and attach it to the call
2083 // machine instruction. We will lower extra symbols attached to call
2084 // instructions as label immediately following the call.
2085 MCSymbol *RetSymbol =
2086 MF.getContext().createTempSymbol("slh_ret_addr",
2087 /*AlwaysAddSuffix*/ true);
2088 MI.setPostInstrSymbol(MF, RetSymbol);
2089
2090 const TargetRegisterClass *AddrRC = &X86::GR64RegClass;
2091 Register ExpectedRetAddrReg;
2092
2093 // If we have no red zones or if the function returns twice (possibly without
2094 // using the `ret` instruction) like setjmp, we need to save the expected
2095 // return address prior to the call.
2096 if (!Subtarget->getFrameLowering()->has128ByteRedZone(MF) ||
2097 MF.exposesReturnsTwice()) {
2098 // If we don't have red zones, we need to compute the expected return
2099 // address prior to the call and store it in a register that lives across
2100 // the call.
2101 //
2102 // In some ways, this is doubly satisfying as a mitigation because it will
2103 // also successfully detect stack smashing bugs in some cases (typically,
2104 // when a callee-saved register is used and the callee doesn't push it onto
2105 // the stack). But that isn't our primary goal, so we only use it as
2106 // a fallback.
2107 //
2108 // FIXME: It isn't clear that this is reliable in the face of
2109 // rematerialization in the register allocator. We somehow need to force
2110 // that to not occur for this particular instruction, and instead to spill
2111 // or otherwise preserve the value computed *prior* to the call.
2112 //
2113 // FIXME: It is even less clear why MachineCSE can't just fold this when we
2114 // end up having to use identical instructions both before and after the
2115 // call to feed the comparison.
2116 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2117 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2118 !Subtarget->isPositionIndependent()) {
2119 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64ri32), ExpectedRetAddrReg)
2120 .addSym(RetSymbol);
2121 } else {
2122 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ExpectedRetAddrReg)
2123 .addReg(/*Base*/ X86::RIP)
2124 .addImm(/*Scale*/ 1)
2125 .addReg(/*Index*/ 0)
2126 .addSym(RetSymbol)
2127 .addReg(/*Segment*/ 0);
2128 }
2129 }
2130
2131 // Step past the call to handle when it returns.
2132 ++InsertPt;
2133
2134 // If we didn't pre-compute the expected return address into a register, then
2135 // red zones are enabled and the return address is still available on the
2136 // stack immediately after the call. As the very first instruction, we load it
2137 // into a register.
2138 if (!ExpectedRetAddrReg) {
2139 ExpectedRetAddrReg = MRI->createVirtualRegister(AddrRC);
2140 BuildMI(MBB, InsertPt, Loc, TII->get(X86::MOV64rm), ExpectedRetAddrReg)
2141 .addReg(/*Base*/ X86::RSP)
2142 .addImm(/*Scale*/ 1)
2143 .addReg(/*Index*/ 0)
2144 .addImm(/*Displacement*/ -8) // The stack pointer has been popped, so
2145 // the return address is 8-bytes past it.
2146 .addReg(/*Segment*/ 0);
2147 }
2148
2149 // Now we extract the callee's predicate state from the stack pointer.
2150 Register NewStateReg = extractPredStateFromSP(MBB, InsertPt, Loc);
2151
2152 // Test the expected return address against our actual address. If we can
2153 // form this basic block's address as an immediate, this is easy. Otherwise
2154 // we compute it.
2155 if (MF.getTarget().getCodeModel() == CodeModel::Small &&
2156 !Subtarget->isPositionIndependent()) {
2157 // FIXME: Could we fold this with the load? It would require careful EFLAGS
2158 // management.
2159 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64ri32))
2160 .addReg(ExpectedRetAddrReg, RegState::Kill)
2161 .addSym(RetSymbol);
2162 } else {
2163 Register ActualRetAddrReg = MRI->createVirtualRegister(AddrRC);
2164 BuildMI(MBB, InsertPt, Loc, TII->get(X86::LEA64r), ActualRetAddrReg)
2165 .addReg(/*Base*/ X86::RIP)
2166 .addImm(/*Scale*/ 1)
2167 .addReg(/*Index*/ 0)
2168 .addSym(RetSymbol)
2169 .addReg(/*Segment*/ 0);
2170 BuildMI(MBB, InsertPt, Loc, TII->get(X86::CMP64rr))
2171 .addReg(ExpectedRetAddrReg, RegState::Kill)
2172 .addReg(ActualRetAddrReg, RegState::Kill);
2173 }
2174
2175 // Now conditionally update the predicate state we just extracted if we ended
2176 // up at a different return address than expected.
2177 int PredStateSizeInBytes = TRI->getRegSizeInBits(*PS->RC) / 8;
2178 auto CMovOp = X86::getCMovOpcode(PredStateSizeInBytes);
2179
2180 Register UpdatedStateReg = MRI->createVirtualRegister(PS->RC);
2181 auto CMovI = BuildMI(MBB, InsertPt, Loc, TII->get(CMovOp), UpdatedStateReg)
2182 .addReg(NewStateReg, RegState::Kill)
2183 .addReg(PS->PoisonReg)
2185 CMovI->findRegisterUseOperand(X86::EFLAGS, /*TRI=*/nullptr)->setIsKill(true);
2186 ++NumInstsInserted;
2187 LLVM_DEBUG(dbgs() << " Inserting cmov: "; CMovI->dump(); dbgs() << "\n");
2188
2189 PS->SSA.AddAvailableValue(&MBB, UpdatedStateReg);
2190}
2191
2192/// An attacker may speculatively store over a value that is then speculatively
2193/// loaded and used as the target of an indirect call or jump instruction. This
2194/// is called Spectre v1.2 or Bounds Check Bypass Store (BCBS) and is described
2195/// in this paper:
2196/// https://people.csail.mit.edu/vlk/spectre11.pdf
2197///
2198/// When this happens, the speculative execution of the call or jump will end up
2199/// being steered to this attacker controlled address. While most such loads
2200/// will be adequately hardened already, we want to ensure that they are
2201/// definitively treated as needing post-load hardening. While address hardening
2202/// is sufficient to prevent secret data from leaking to the attacker, it may
2203/// not be sufficient to prevent an attacker from steering speculative
2204/// execution. We forcibly unfolded all relevant loads above and so will always
2205/// have an opportunity to post-load harden here, we just need to scan for cases
2206/// not already flagged and add them.
2207void X86SpeculativeLoadHardeningPass::hardenIndirectCallOrJumpInstr(
2209 SmallDenseMap<Register, Register, 32> &AddrRegToHardenedReg) {
2210 switch (MI.getOpcode()) {
2211 case X86::FARCALL16m:
2212 case X86::FARCALL32m:
2213 case X86::FARCALL64m:
2214 case X86::FARJMP16m:
2215 case X86::FARJMP32m:
2216 case X86::FARJMP64m:
2217 // We don't need to harden either far calls or far jumps as they are
2218 // safe from Spectre.
2219 return;
2220
2221 default:
2222 break;
2223 }
2224
2225 // We should never see a loading instruction at this point, as those should
2226 // have been unfolded.
2227 assert(!MI.mayLoad() && "Found a lingering loading instruction!");
2228
2229 // If the first operand isn't a register, this is a branch or call
2230 // instruction with an immediate operand which doesn't need to be hardened.
2231 if (!MI.getOperand(0).isReg())
2232 return;
2233
2234 // For all of these, the target register is the first operand of the
2235 // instruction.
2236 auto &TargetOp = MI.getOperand(0);
2237 Register OldTargetReg = TargetOp.getReg();
2238
2239 // Try to lookup a hardened version of this register. We retain a reference
2240 // here as we want to update the map to track any newly computed hardened
2241 // register.
2242 Register &HardenedTargetReg = AddrRegToHardenedReg[OldTargetReg];
2243
2244 // If we don't have a hardened register yet, compute one. Otherwise, just use
2245 // the already hardened register.
2246 //
2247 // FIXME: It is a little suspect that we use partially hardened registers that
2248 // only feed addresses. The complexity of partial hardening with SHRX
2249 // continues to pile up. Should definitively measure its value and consider
2250 // eliminating it.
2251 if (!HardenedTargetReg)
2252 HardenedTargetReg = hardenValueInRegister(
2253 OldTargetReg, *MI.getParent(), MI.getIterator(), MI.getDebugLoc());
2254
2255 // Set the target operand to the hardened register.
2256 TargetOp.setReg(HardenedTargetReg);
2257
2258 ++NumCallsOrJumpsHardened;
2259}
2260
2261INITIALIZE_PASS_BEGIN(X86SpeculativeLoadHardeningPass, PASS_KEY,
2262 "X86 speculative load hardener", false, false)
2263INITIALIZE_PASS_END(X86SpeculativeLoadHardeningPass, PASS_KEY,
2264 "X86 speculative load hardener", false, false)
2265
2267 return new X86SpeculativeLoadHardeningPass();
2268}
unsigned const MachineRegisterInfo * MRI
MachineInstrBuilder & UseMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Mark last scratch load
MachineBasicBlock & MBB
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
This file defines the DenseMap class.
DenseMap< Block *, BlockRelaxAux > Blocks
Definition: ELF_riscv.cpp:507
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
This file declares the MachineConstantPool class which is an abstract constant pool to keep track of ...
Register const TargetRegisterInfo * TRI
Memory SSA
Definition: MemorySSA.cpp:72
MachineInstr unsigned OpIdx
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:39
const SmallVectorImpl< MachineOperand > & Cond
This file contains some templates that are useful if you are working with the STL at all.
This file defines the SmallPtrSet class.
This file defines the SmallSet class.
This file defines the SmallVector class.
This file defines the SparseBitVector class.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
#define LLVM_DEBUG(...)
Definition: Debug.h:119
static MachineBasicBlock & splitEdge(MachineBasicBlock &MBB, MachineBasicBlock &Succ, int SuccCount, MachineInstr *Br, MachineInstr *&UncondBr, const X86InstrInfo &TII)
static cl::opt< bool > HardenLoads(PASS_KEY "-loads", cl::desc("Sanitize loads from memory. When disable, no " "significant security is provided."), cl::init(true), cl::Hidden)
static void canonicalizePHIOperands(MachineFunction &MF)
Removing duplicate PHI operands to leave the PHI in a canonical and predictable form.
static cl::opt< bool > HardenInterprocedurally(PASS_KEY "-ip", cl::desc("Harden interprocedurally by passing our state in and out of " "functions in the high bits of the stack pointer."), cl::init(true), cl::Hidden)
static cl::opt< bool > FenceCallAndRet(PASS_KEY "-fence-call-and-ret", cl::desc("Use a full speculation fence to harden both call and ret edges " "rather than a lighter weight mitigation."), cl::init(false), cl::Hidden)
static cl::opt< bool > EnablePostLoadHardening(PASS_KEY "-post-load", cl::desc("Harden the value loaded *after* it is loaded by " "flushing the loaded bits to 1. This is hard to do " "in general but can be done easily for GPRs."), cl::init(true), cl::Hidden)
static cl::opt< bool > HardenEdgesWithLFENCE(PASS_KEY "-lfence", cl::desc("Use LFENCE along each conditional edge to harden against speculative " "loads rather than conditional movs and poisoned pointers."), cl::init(false), cl::Hidden)
static bool isEFLAGSLive(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, const TargetRegisterInfo &TRI)
static cl::opt< bool > EnableSpeculativeLoadHardening("x86-speculative-load-hardening", cl::desc("Force enable speculative load hardening"), cl::init(false), cl::Hidden)
static const TargetRegisterClass * getRegClassForUnfoldedLoad(MachineFunction &MF, const X86InstrInfo &TII, unsigned Opcode)
Compute the register class for the unfolded load.
static bool hasVulnerableLoad(MachineFunction &MF)
Helper to scan a function for loads vulnerable to misspeculation that we want to harden.
static bool isEFLAGSDefLive(const MachineInstr &MI)
X86 speculative load hardener
static cl::opt< bool > HardenIndirectCallsAndJumps(PASS_KEY "-indirect", cl::desc("Harden indirect calls and jumps against using speculatively " "stored attacker controlled addresses. This is designed to " "mitigate Spectre v1.2 style attacks."), cl::init(true), cl::Hidden)
#define PASS_KEY
Represent the analysis usage information of a pass.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:124
iterator find(const_arg_type_t< KeyT > Val)
Definition: DenseMap.h:177
size_type count(const_arg_type_t< KeyT > Val) const
Return 1 if the specified key is in the map, 0 otherwise.
Definition: DenseMap.h:173
iterator end()
Definition: DenseMap.h:87
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:314
bool hasFnAttribute(Attribute::AttrKind Kind) const
Return true if the function has the attribute.
Definition: Function.cpp:727
unsigned insertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB, MachineBasicBlock *FBB, ArrayRef< MachineOperand > Cond, const DebugLoc &DL, int *BytesAdded=nullptr) const override
Insert branch code into the end of the specified MachineBasicBlock.
LLVM_ABI MCSymbol * createTempSymbol()
Create a temporary symbol with a unique name.
Definition: MCContext.cpp:386
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:199
MCSymbol - Instances of this class represent a symbol name in the MC file, and MCSymbols are created ...
Definition: MCSymbol.h:42
void normalizeSuccProbs()
Normalize probabilities of all successors so that the sum of them becomes one.
bool isEHPad() const
Returns true if the block is a landing pad.
LLVM_ABI void replaceSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New)
Replace successor OLD with NEW and update probability info.
LLVM_ABI instr_iterator insert(instr_iterator I, MachineInstr *M)
Insert MI into the instruction list before I, possibly inside a bundle.
iterator_range< livein_iterator > liveins() const
reverse_instr_iterator instr_rbegin()
LLVM_ABI iterator SkipPHIsAndLabels(iterator I)
Return the first instruction in MBB after I that is not a PHI or a label.
LLVM_ABI void splitSuccessor(MachineBasicBlock *Old, MachineBasicBlock *New, bool NormalizeSuccProbs=false)
Split the old successor into old plus new and updates the probability info.
LLVM_ABI iterator SkipPHIsLabelsAndDebug(iterator I, Register Reg=Register(), bool SkipPseudoOp=true)
Return the first instruction in MBB after I that is not a PHI, label or debug.
bool isEHFuncletEntry() const
Returns true if this is the entry block of an EH funclet.
LLVM_ABI iterator getFirstTerminator()
Returns an iterator to the first terminator instruction of this basic block.
unsigned succ_size() const
LLVM_ABI void dump() const
bool isEHScopeEntry() const
Returns true if this is the entry block of an EH scope, i.e., the block that used to have a catchpad ...
LLVM_ABI void addSuccessor(MachineBasicBlock *Succ, BranchProbability Prob=BranchProbability::getUnknown())
Add Succ as a successor of this MachineBasicBlock.
reverse_instr_iterator instr_rend()
LLVM_ABI bool isLayoutSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB will be emitted immediately after this block, such that if this bloc...
void addLiveIn(MCRegister PhysReg, LaneBitmask LaneMask=LaneBitmask::getAll())
Adds the specified register as a live in.
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
iterator_range< iterator > terminators()
iterator_range< succ_iterator > successors()
LLVM_ABI bool isSuccessor(const MachineBasicBlock *MBB) const
Return true if the specified MBB is a successor of this block.
iterator_range< pred_iterator > predecessors()
LLVM_ABI StringRef getName() const
Return the name of the corresponding LLVM basic block, or an empty string.
void setMachineBlockAddressTaken()
Set this block to indicate that its address is used as something other than the target of a terminato...
LLVM_ABI bool isLiveIn(MCRegister Reg, LaneBitmask LaneMask=LaneBitmask::getAll()) const
Return true if the specified register is in the live in set.
bool isCleanupFuncletEntry() const
Returns true if this is the entry block of a cleanup funclet.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
StringRef getName() const
getName - Return the name of the corresponding LLVM function.
bool exposesReturnsTwice() const
exposesReturnsTwice - Returns true if the function calls setjmp or any other similar functions with a...
MCContext & getContext() const
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
MachineBasicBlock * CreateMachineBasicBlock(const BasicBlock *BB=nullptr, std::optional< UniqueBBID > BBID=std::nullopt)
CreateMachineBasicBlock - Allocate a new MachineBasicBlock.
void insert(iterator MBBI, MachineBasicBlock *MBB)
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & addSym(MCSymbol *Sym, unsigned char TargetFlags=0) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0) const
Representation of each machine instruction.
Definition: MachineInstr.h:72
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:587
bool isTerminator(QueryType Type=AnyInBundle) const
Returns true if this instruction part of the terminator for a basic block.
Definition: MachineInstr.h:974
bool isBranch(QueryType Type=AnyInBundle) const
Returns true if this is a conditional, unconditional, or indirect branch.
Definition: MachineInstr.h:982
MachineOperand * findRegisterUseOperand(Register Reg, const TargetRegisterInfo *TRI, bool isKill=false)
Wrapper for findRegisterUseOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:511
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:595
LLVM_ABI bool addRegisterDead(Register Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound=false)
We have determined MI defined a register without a use.
MachineOperand class - Representation of each machine instruction operand.
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
MachineBasicBlock * getMBB() const
void setIsDead(bool Val=true)
void setIsKill(bool Val=true)
void setMBB(MachineBasicBlock *MBB)
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
static MachineOperand CreateMBB(MachineBasicBlock *MBB, unsigned TargetFlags=0)
bool isMBB() const
isMBB - Tests if this is a MO_MachineBasicBlock operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
MachineSSAUpdater - This class updates SSA form for a set of virtual registers defined in multiple bl...
void dump() const
Definition: Pass.cpp:146
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:85
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:107
constexpr unsigned id() const
Definition: Register.h:95
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
Definition: SmallPtrSet.h:380
bool erase(PtrType Ptr)
Remove pointer from the set.
Definition: SmallPtrSet.h:418
size_type count(ConstPtrType Ptr) const
count - Return 1 if the specified pointer is in the set, 0 otherwise.
Definition: SmallPtrSet.h:470
void insert_range(Range &&R)
Definition: SmallPtrSet.h:490
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
Definition: SmallPtrSet.h:401
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
Definition: SmallPtrSet.h:541
A SetVector that performs no allocations if smaller than a certain size.
Definition: SetVector.h:356
SmallSet - This maintains a set of unique values, optimizing for the case when the set is small (less...
Definition: SmallSet.h:134
size_type count(const T &V) const
count - Return 1 if the element is in the set, 0 otherwise.
Definition: SmallSet.h:176
void clear()
Definition: SmallSet.h:209
std::pair< const_iterator, bool > insert(const T &V)
insert - Insert an element into the set if it isn't already there.
Definition: SmallSet.h:182
bool empty() const
Definition: SmallVector.h:82
size_t size() const
Definition: SmallVector.h:79
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:574
void push_back(const T &Elt)
Definition: SmallVector.h:414
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1197
void set(unsigned Idx)
bool test(unsigned Idx) const
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:55
CodeModel::Model getCodeModel() const
Returns the code model.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
TargetRegisterInfo base class - We assume that the target defines a static array of TargetRegisterDes...
static bool isDataInvariantLoad(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value l...
static bool isDataInvariant(MachineInstr &MI)
Returns true if the instruction has no behavior (specified or otherwise) that is based on the value o...
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ Entry
Definition: COFF.h:862
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
@ Kill
The last use of a register.
Reg
All possible values of the reg field in the ModR/M byte.
CondCode getCondFromBranch(const MachineInstr &MI)
int getFirstAddrOperandIdx(const MachineInstr &MI)
Return the index of the instruction's first address operand, if it has a memory reference,...
CondCode GetOppositeBranchCondition(CondCode CC)
GetOppositeBranchCondition - Return the inverse of the specified cond, e.g.
@ AddrIndexReg
Definition: X86BaseInfo.h:31
unsigned getCMovOpcode(unsigned RegBytes, bool HasMemoryOperand=false, bool HasNDD=false)
Return a cmov opcode for the given register size in bytes, and operand type.
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:444
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
BaseReg
Stack frame base register. Bit 0 of FREInfo.Info.
Definition: SFrame.h:77
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
bool all_of(R &&range, UnaryPredicate P)
Provide wrappers to std::all_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1744
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:663
auto unique(Range &&R, Predicate P)
Definition: STLExtras.h:2095
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition: STLExtras.h:1751
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:336
FunctionPass * createX86SpeculativeLoadHardeningPass()
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1669
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:207
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition: Error.cpp:167
void erase_if(Container &C, UnaryPredicate P)
Provide a container algorithm similar to C++ Library Fundamentals v2's erase_if which is equivalent t...
Definition: STLExtras.h:2139