diff --git a/llvm/lib/Target/X86/X86InstrInfo.cpp b/llvm/lib/Target/X86/X86InstrInfo.cpp
index 39bda04b4d13..0d3b54fe8e22 100644
--- a/llvm/lib/Target/X86/X86InstrInfo.cpp
+++ b/llvm/lib/Target/X86/X86InstrInfo.cpp
@@ -1786,6 +1786,7 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
                            const SmallVectorImpl<MachineOperand> &Cond) const {
   // FIXME this should probably have a DebugLoc operand
   DebugLoc dl = DebugLoc::getUnknownLoc();
+
   // Shouldn't be a fall through.
   assert(TBB && "InsertBranch must not be told to insert a fallthrough");
   assert((Cond.size() == 1 || Cond.size() == 0) &&
@@ -1799,34 +1800,72 @@ X86InstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
   }
 
   // Conditional branch.
+  const MachineBasicBlock *NextBB = next(&MBB);
   unsigned Count = 0;
   X86::CondCode CC = (X86::CondCode)Cond[0].getImm();
+
+  // In a two-way conditional branch, if the fall-through block is the
+  // "false" branch of the conditional jumps, we can cut out the
+  // unconditional jump by rearranging the conditional jumps. This saves a
+  // few bytes and improves performance. I.e., for COND_NE_OR_P:
+  //
+  //     JNE L1
+  //     JP  L1
+  //     JMP L2
+  // L1:
+  //     ...
+  // L2:
+  //     ...
+  //
+  // to:
+  // 
+  //     JP  L1
+  //     JE  L2
+  // L1:
+  //     ...
+  // L2:
+  //     ...
+  //
+  // Similarly for COND_NP_OR_E.
   switch (CC) {
+  default:
+    BuildMI(&MBB, dl, get(GetCondBranchFromCond(CC))).addMBB(TBB);
+    ++Count;
+    break;
   case X86::COND_NP_OR_E:
     // Synthesize NP_OR_E with two branches.
-    BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
-    ++Count;
-    BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB);
-    ++Count;
+    if (FBB && FBB == NextBB) {
+      BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
+      BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(FBB);
+      FBB = 0;
+    } else {
+      BuildMI(&MBB, dl, get(X86::JNP_4)).addMBB(TBB);
+      BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(TBB);
+    }
+
+    Count += 2;
     break;
   case X86::COND_NE_OR_P:
     // Synthesize NE_OR_P with two branches.
-    BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB);
-    ++Count;
-    BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
-    ++Count;
+    if (FBB && FBB == NextBB) {
+      BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
+      BuildMI(&MBB, dl, get(X86::JE_4)).addMBB(FBB);
+      FBB = 0;
+    } else {
+      BuildMI(&MBB, dl, get(X86::JNE_4)).addMBB(TBB);
+      BuildMI(&MBB, dl, get(X86::JP_4)).addMBB(TBB);
+    }
+
+    Count += 2;
     break;
-  default: {
-    unsigned Opc = GetCondBranchFromCond(CC);
-    BuildMI(&MBB, dl, get(Opc)).addMBB(TBB);
-    ++Count;
-  }
   }
+
   if (FBB) {
     // Two-way Conditional branch. Insert the second branch.
     BuildMI(&MBB, dl, get(X86::JMP_4)).addMBB(FBB);
     ++Count;
   }
+
   return Count;
 }
 
diff --git a/llvm/test/CodeGen/X86/jump-opt.ll b/llvm/test/CodeGen/X86/jump-opt.ll
new file mode 100644
index 000000000000..dc32f6665f8e
--- /dev/null
+++ b/llvm/test/CodeGen/X86/jump-opt.ll
@@ -0,0 +1,22 @@
+; RUN: llc < %s -mtriple=i386-apple-darwin10 | FileCheck %s
+
+; <rdar://problem/7598384>
+define float @test1(float %x, float %y) nounwind readnone optsize ssp {
+; CHECK:      jp
+; CHECK-NEXT: je
+entry:
+  %0 = fpext float %x to double
+  %1 = fpext float %y to double
+  %2 = fmul double %0, %1
+  %3 = fcmp oeq double %2, 0.000000e+00
+  br i1 %3, label %bb2, label %bb1
+
+bb1:
+  %4 = fadd double %2, -1.000000e+00
+  br label %bb2
+
+bb2:
+  %.0.in = phi double [ %4, %bb1 ], [ %2, %entry ]
+  %.0 = fptrunc double %.0.in to float
+  ret float %.0
+}