MachineBasicBlock: add liveout iterator aware of which liveins are defined by the runtime.

Using this in RegAlloc fast reduces register pressure, and in some cases allows
x86 code to compile that wouldn't before.
diff --git a/llvm/include/llvm/CodeGen/MachineBasicBlock.h b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
index 2bad64c..9bdbc50 100644
--- a/llvm/include/llvm/CodeGen/MachineBasicBlock.h
+++ b/llvm/include/llvm/CodeGen/MachineBasicBlock.h
@@ -410,6 +410,97 @@
   /// Remove entry from the livein set and return iterator to the next.
   livein_iterator removeLiveIn(livein_iterator I);
 
+  class liveout_iterator {
+  public:
+    using iterator_category = std::input_iterator_tag;
+    using difference_type = std::ptrdiff_t;
+    using value_type = RegisterMaskPair;
+    using pointer = const RegisterMaskPair *;
+    using reference = const RegisterMaskPair &;
+
+    liveout_iterator(const MachineBasicBlock &MBB, MCPhysReg ExceptionPointer,
+                     MCPhysReg ExceptionSelector, bool End)
+        : ExceptionPointer(ExceptionPointer),
+          ExceptionSelector(ExceptionSelector), BlockI(MBB.succ_begin()),
+          BlockEnd(MBB.succ_end()) {
+      if (End)
+        BlockI = BlockEnd;
+      else if (BlockI != BlockEnd) {
+        LiveRegI = (*BlockI)->livein_begin();
+        if (!advanceToValidPosition())
+          return;
+        if (LiveRegI->PhysReg == ExceptionPointer ||
+            LiveRegI->PhysReg == ExceptionSelector)
+          ++(*this);
+      }
+    }
+
+    liveout_iterator &operator++() {
+      do {
+        ++LiveRegI;
+        if (!advanceToValidPosition())
+          return *this;
+      } while ((*BlockI)->isEHPad() &&
+               (LiveRegI->PhysReg == ExceptionPointer ||
+                LiveRegI->PhysReg == ExceptionSelector));
+      return *this;
+    }
+
+    liveout_iterator operator++(int) {
+      liveout_iterator Tmp = *this;
+      ++(*this);
+      return Tmp;
+    }
+
+    reference operator*() const {
+      return *LiveRegI;
+    }
+
+    pointer operator->() const {
+      return &*LiveRegI;
+    }
+
+    bool operator==(const liveout_iterator &RHS) const {
+      if (BlockI != BlockEnd)
+        return BlockI == RHS.BlockI && LiveRegI == RHS.LiveRegI;
+      return RHS.BlockI == BlockEnd;
+    }
+
+    bool operator!=(const liveout_iterator &RHS) const {
+      return !(*this == RHS);
+    }
+  private:
+    bool advanceToValidPosition() {
+      if (LiveRegI != (*BlockI)->livein_end())
+        return true;
+
+      do {
+        ++BlockI;
+      } while (BlockI != BlockEnd && (*BlockI)->livein_empty());
+      if (BlockI == BlockEnd)
+        return false;
+
+      LiveRegI = (*BlockI)->livein_begin();
+      return true;
+    }
+
+    MCPhysReg ExceptionPointer, ExceptionSelector;
+    const_succ_iterator BlockI;
+    const_succ_iterator BlockEnd;
+    livein_iterator LiveRegI;
+  };
+
+  /// Iterator scanning successor basic blocks' liveins to determine the
+  /// registers potentially live at the end of this block. There may be
+  /// duplicates or overlapping registers in the list returned.
+  liveout_iterator liveout_begin() const;
+  liveout_iterator liveout_end() const {
+    return liveout_iterator(*this, 0, 0, true);
+  }
+  iterator_range<liveout_iterator> liveouts() const {
+    return make_range(liveout_begin(), liveout_end());
+  }
+
   /// Get the clobber mask for the start of this basic block. Funclets use this
   /// to prevent register allocation across funclet transitions.
   const uint32_t *getBeginClobberMask(const TargetRegisterInfo *TRI) const;
diff --git a/llvm/lib/CodeGen/MachineBasicBlock.cpp b/llvm/lib/CodeGen/MachineBasicBlock.cpp
index b4187af..798484d 100644
--- a/llvm/lib/CodeGen/MachineBasicBlock.cpp
+++ b/llvm/lib/CodeGen/MachineBasicBlock.cpp
@@ -21,6 +21,7 @@
 #include "llvm/CodeGen/MachineRegisterInfo.h"
 #include "llvm/CodeGen/SlotIndexes.h"
 #include "llvm/CodeGen/TargetInstrInfo.h"
+#include "llvm/CodeGen/TargetLowering.h"
 #include "llvm/CodeGen/TargetRegisterInfo.h"
 #include "llvm/CodeGen/TargetSubtargetInfo.h"
 #include "llvm/Config/llvm-config.h"
@@ -1569,6 +1570,23 @@
   return LiveIns.begin();
 }
 
+MachineBasicBlock::liveout_iterator MachineBasicBlock::liveout_begin() const {
+  const MachineFunction &MF = *getParent();
+  assert(MF.getProperties().hasProperty(
+      MachineFunctionProperties::Property::TracksLiveness) &&
+      "Liveness information is accurate");
+
+  const TargetLowering &TLI = *MF.getSubtarget().getTargetLowering();
+  MCPhysReg ExceptionPointer = 0, ExceptionSelector = 0;
+  if (MF.getFunction().hasPersonalityFn()) {
+    auto PersonalityFn = MF.getFunction().getPersonalityFn();
+    ExceptionPointer = TLI.getExceptionPointerRegister(PersonalityFn);
+    ExceptionSelector = TLI.getExceptionSelectorRegister(PersonalityFn);
+  }
+
+  return liveout_iterator(*this, ExceptionPointer, ExceptionSelector, false);
+}
+
 const MBBSectionID MBBSectionID::ColdSectionID(MBBSectionID::SectionType::Cold);
 const MBBSectionID
     MBBSectionID::ExceptionSectionID(MBBSectionID::SectionType::Exception);
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 6e548d4..1933c3d 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -1425,10 +1425,8 @@
   RegUnitStates.assign(TRI->getNumRegUnits(), regFree);
   assert(LiveVirtRegs.empty() && "Mapping not cleared from last block?");
 
-  for (MachineBasicBlock *Succ : MBB.successors()) {
-    for (const MachineBasicBlock::RegisterMaskPair &LI : Succ->liveins())
-      setPhysRegState(LI.PhysReg, regPreAssigned);
-  }
+  for (auto &LiveReg : MBB.liveouts())
+    setPhysRegState(LiveReg.PhysReg, regPreAssigned);
 
   Coalesced.clear();
 
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index 1e2407c..136b71d 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -26632,7 +26632,8 @@
 Register X86TargetLowering::getExceptionSelectorRegister(
     const Constant *PersonalityFn) const {
   // Funclet personalities don't use selectors (the runtime does the selection).
-  assert(!isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)));
+  if (isFuncletEHPersonality(classifyEHPersonality(PersonalityFn)))
+    return X86::NoRegister;
   return Subtarget.isTarget64BitLP64() ? X86::RDX : X86::EDX;
 }
 
diff --git a/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll b/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll
index 9a54c87..e262448 100644
--- a/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll
+++ b/llvm/test/CodeGen/X86/fast-isel-cmp-branch.ll
@@ -19,7 +19,7 @@
 ; different basic block, so its operands aren't necessarily exported
 ; for cross-block usage.
 
-; CHECK: movb    %cl, [[OFS:[0-9]*]](%rsp)
+; CHECK: movb    %al, [[OFS:[0-9]*]](%rsp)
 ; CHECK: callq   {{_?}}bar
 ; CHECK: movb    [[OFS]](%rsp), %al
 
diff --git a/llvm/test/CodeGen/X86/regalloc-tight-invoke.ll b/llvm/test/CodeGen/X86/regalloc-tight-invoke.ll
new file mode 100644
index 0000000..c56ad12
--- /dev/null
+++ b/llvm/test/CodeGen/X86/regalloc-tight-invoke.ll
@@ -0,0 +1,53 @@
+; RUN: llc -O0 -mtriple=x86_64-apple-darwin %s -o - | FileCheck %s
+declare void @foo(i32, ...)
+
+declare i32 @__gxx_personality_v0(...)
+
+; We were running out of registers for this invoke, because:
+
+;     1. The lshr/and pattern gets matched to a no-REX MOV so that ah/bh/... can
+;        be used instead, cutting available registers for %b.arg down to eax, ebx,
+;        ecx, edx, esi, edi.
+;     2. We have a base pointer taking ebx out of contention.
+;     3. The landingpad block convinced us we should be defining rax here.
+;     3. The al fiddling for the varargs call only noted down that al was spillable,
+;        not ah or hax.
+;
+; So by the time we need to allocate a register for the call all registers are
+; tied up and unspillable.
+
+; CHECK-LABEL: bar:
+; CHECK: xorl %edi, %edi
+; CHECK: movb %dil, {{[0-9]+}}(%rbx)
+; CHECK: movb {{[0-9]+}}(%rbx), %al
+
+define i32 @bar(i32 %a, i32 %b, i32 %c, i32 %d, ...) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+  %mem = alloca i32, i32 %a, align 32   ; Force rbx to be used as a base pointer
+  %b.tmp = lshr i32 %b, 8
+  %b.arg = and i32 %b.tmp, 255
+  invoke void(i32, ...) @foo(i32 42, i32* %mem, i32 %c, i32 %d, i32 %b.arg) to label %success unwind label %fail
+
+success:
+  ret i32 0
+fail:
+  %exc = landingpad { i8*, i32 } cleanup
+  %res = extractvalue { i8*, i32 } %exc, 1
+  ret i32 %res
+}
+
+; CHECK-LABEL: live:
+; CHECK: movl {{%.*}}, %eax
+
+define i32 @live(i32 %a, i32 %b, i32 %c, i32 %d, ...) personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) {
+  %mem = alloca i32, i32 %a, align 32   ; Force rbx to be used as a base pointer
+  %b.tmp = lshr i32 %b, 8
+  %b.arg = and i32 %b.tmp, 255
+  invoke void(i32, ...) @foo(i32 42) to label %success unwind label %fail
+
+success:
+  ret i32 0
+fail:
+  %exc = landingpad { i8*, i32 } cleanup
+  %res = extractvalue { i8*, i32 } %exc, 1
+  ret i32 %b.arg
+}