diff --git a/llvm/include/llvm/CodeGen/CallingConvLower.h b/llvm/include/llvm/CodeGen/CallingConvLower.h
index 702ac1427c49..11f6e80fd373 100644
--- a/llvm/include/llvm/CodeGen/CallingConvLower.h
+++ b/llvm/include/llvm/CodeGen/CallingConvLower.h
@@ -33,32 +33,33 @@ public:
     SExt,   // The value is sign extended in the location.
     ZExt,   // The value is zero extended in the location.
     AExt,   // The value is extended with undefined upper bits.
-    BCvt    // The value is bit-converted in the location.
+    BCvt,   // The value is bit-converted in the location.
+    Indirect // The location contains pointer to the value.
     // TODO: a subset of the value is in the location.
   };
 private:
   /// ValNo - This is the value number begin assigned (e.g. an argument number).
   unsigned ValNo;
-  
+
   /// Loc is either a stack offset or a register number.
   unsigned Loc;
-  
+
   /// isMem - True if this is a memory loc, false if it is a register loc.
   bool isMem : 1;
-  
+
   /// isCustom - True if this arg/retval requires special handling.
   bool isCustom : 1;
 
   /// Information about how the value is assigned.
   LocInfo HTP : 6;
-  
+
   /// ValVT - The type of the value being assigned.
   MVT ValVT;
 
   /// LocVT - The type of the location being assigned to.
   MVT LocVT;
 public:
-    
+
   static CCValAssign getReg(unsigned ValNo, MVT ValVT,
                             unsigned RegNo, MVT LocVT,
                             LocInfo HTP) {
@@ -95,7 +96,7 @@ public:
     Ret.LocVT = LocVT;
     return Ret;
   }
-  
+
   static CCValAssign getCustomMem(unsigned ValNo, MVT ValVT,
                                   unsigned Offset, MVT LocVT,
                                   LocInfo HTP) {
@@ -110,14 +111,18 @@ public:
 
   bool isRegLoc() const { return !isMem; }
   bool isMemLoc() const { return isMem; }
-  
+
   bool needsCustom() const { return isCustom; }
 
   unsigned getLocReg() const { assert(isRegLoc()); return Loc; }
   unsigned getLocMemOffset() const { assert(isMemLoc()); return Loc; }
   MVT getLocVT() const { return LocVT; }
-  
+
   LocInfo getLocInfo() const { return HTP; }
+  bool isExtInLoc() const {
+    return (HTP == AExt || HTP == SExt || HTP == ZExt);
+  }
+
 };
 
 /// CCAssignFn - This function assigns a location for Val, updating State to
@@ -143,22 +148,22 @@ class CCState {
   const TargetRegisterInfo &TRI;
   SmallVector<CCValAssign, 16> &Locs;
   LLVMContext &Context;
-  
+
   unsigned StackOffset;
   SmallVector<uint32_t, 16> UsedRegs;
 public:
   CCState(unsigned CC, bool isVarArg, const TargetMachine &TM,
           SmallVector<CCValAssign, 16> &locs, LLVMContext &C);
-  
+
   void addLoc(const CCValAssign &V) {
     Locs.push_back(V);
   }
-  
+
   LLVMContext &getContext() const { return Context; }
   const TargetMachine &getTarget() const { return TM; }
   unsigned getCallingConv() const { return CallingConv; }
   bool isVarArg() const { return IsVarArg; }
-  
+
   unsigned getNextStackOffset() const { return StackOffset; }
 
   /// isAllocated - Return true if the specified register (or an alias) is
@@ -166,15 +171,15 @@ public:
   bool isAllocated(unsigned Reg) const {
     return UsedRegs[Reg/32] & (1 << (Reg&31));
   }
-  
+
   /// AnalyzeFormalArguments - Analyze an ISD::FORMAL_ARGUMENTS node,
   /// incorporating info about the formals into this state.
   void AnalyzeFormalArguments(SDNode *TheArgs, CCAssignFn Fn);
-  
+
   /// AnalyzeReturn - Analyze the returned values of an ISD::RET node,
   /// incorporating info about the result values into this state.
   void AnalyzeReturn(SDNode *TheRet, CCAssignFn Fn);
-  
+
   /// AnalyzeCallOperands - Analyze an ISD::CALL node, incorporating info
   /// about the passed values into this state.
   void AnalyzeCallOperands(CallSDNode *TheCall, CCAssignFn Fn);
@@ -188,7 +193,7 @@ public:
   /// AnalyzeCallResult - Analyze the return values of an ISD::CALL node,
   /// incorporating info about the passed values into this state.
   void AnalyzeCallResult(CallSDNode *TheCall, CCAssignFn Fn);
-  
+
   /// AnalyzeCallResult - Same as above except it's specialized for calls which
   /// produce a single value.
   void AnalyzeCallResult(MVT VT, CCAssignFn Fn);
@@ -201,7 +206,7 @@ public:
         return i;
     return NumRegs;
   }
-  
+
   /// AllocateReg - Attempt to allocate one register.  If it is not available,
   /// return zero.  Otherwise, return the register, marking it and any aliases
   /// as allocated.
diff --git a/llvm/include/llvm/Target/TargetCallingConv.td b/llvm/include/llvm/Target/TargetCallingConv.td
index 777aee84eadf..ceaeb0b5038b 100644
--- a/llvm/include/llvm/Target/TargetCallingConv.td
+++ b/llvm/include/llvm/Target/TargetCallingConv.td
@@ -109,6 +109,12 @@ class CCBitConvertToType<ValueType destTy> : CCAction {
   ValueType DestTy = destTy;
 }
 
+/// CCPassIndirect - If applied, this stores the value to stack and passes the pointer
+/// as normal argument.
+class CCPassIndirect<ValueType destTy> : CCAction {
+  ValueType DestTy = destTy;
+}
+
 /// CCDelegateTo - This action invokes the specified sub-calling-convention.  It
 /// is successful if the specified CC matches.
 class CCDelegateTo<CallingConv cc> : CCAction {
diff --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 5239dd1f5b47..d77f0390b10c 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -89,7 +89,7 @@ def RetCC_X86_64_C : CallingConv<[
 // X86-Win64 C return-value convention.
 def RetCC_X86_Win64_C : CallingConv<[
   // The X86-Win64 calling convention always returns __m64 values in RAX.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToReg<[RAX]>>,
+  CCIfType<[v8i8, v4i16, v2i32, v1i64], CCBitConvertToType<i64>>,
 
   // And FP in XMM0 only.
   CCIfType<[f32], CCAssignToReg<[XMM0]>>,
@@ -184,6 +184,13 @@ def CC_X86_Win64_C : CallingConv<[
   // The 'nest' parameter, if any, is passed in R10.
   CCIfNest<CCAssignToReg<[R10]>>,
 
+  // 128 bit vectors are passed by pointer
+  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCPassIndirect<i64>>,
+
+  // The first 4 MMX vector arguments are passed in GPRs.
+  CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32],
+           CCBitConvertToType<i64>>,
+
   // The first 4 integer arguments are passed in integer registers.
   CCIfType<[i32], CCAssignToRegWithShadow<[ECX , EDX , R8D , R9D ],
                                           [XMM0, XMM1, XMM2, XMM3]>>,
@@ -195,11 +202,6 @@ def CC_X86_Win64_C : CallingConv<[
            CCAssignToRegWithShadow<[XMM0, XMM1, XMM2, XMM3],
                                    [RCX , RDX , R8  , R9  ]>>,
 
-  // The first 4 MMX vector arguments are passed in GPRs.
-  CCIfType<[v8i8, v4i16, v2i32, v1i64, v2f32],
-           CCAssignToRegWithShadow<[RCX , RDX , R8  , R9  ],
-                                   [XMM0, XMM1, XMM2, XMM3]>>,
-
   // Integer/FP values get stored in stack slots that are 8 bytes in size and
   // 8-byte aligned if there are no more registers to hold them.
   CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
@@ -208,9 +210,6 @@ def CC_X86_Win64_C : CallingConv<[
   // subtarget.
   CCIfType<[f80], CCAssignToStack<0, 0>>,
 
-  // Vectors get 16-byte stack slots that are 16-byte aligned.
-  CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCAssignToStack<16, 16>>,
-
   // __m64 vectors get 8-byte stack slots that are 8-byte aligned.
   CCIfType<[v8i8, v4i16, v2i32, v1i64], CCAssignToStack<8, 8>>
 ]>;
diff --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index ef387bb6db11..a46ec1cf62f4 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -1408,6 +1408,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
 
   SmallVector<SDValue, 8> ArgValues;
   unsigned LastVal = ~0U;
+  SDValue ArgValue;
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
     // TODO: If an arg is passed in two places (e.g. reg and stack), skip later
@@ -1435,7 +1436,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
         llvm_unreachable("Unknown argument type!");
 
       unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
-      SDValue ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
+      ArgValue = DAG.getCopyFromReg(Root, dl, Reg, RegVT);
 
       // If this is an 8 or 16-bit value, it is really passed promoted to 32
       // bits.  Insert an assert[sz]ext to capture this, then truncate to the
@@ -1450,8 +1451,7 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
         ArgValue = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, ArgValue,
                                DAG.getValueType(VA.getValVT()));
 
-      if (VA.getLocInfo() != CCValAssign::Full &&
-          VA.getLocInfo() != CCValAssign::BCvt) {
+      if (VA.isExtInLoc()) {
         // Handle MMX values passed in XMM regs.
         if (RegVT.isVector()) {
           ArgValue = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::i64,
@@ -1460,12 +1460,16 @@ X86TargetLowering::LowerFORMAL_ARGUMENTS(SDValue Op, SelectionDAG &DAG) {
         } else
           ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
       }
-
-      ArgValues.push_back(ArgValue);
     } else {
       assert(VA.isMemLoc());
-      ArgValues.push_back(LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i));
+      ArgValue = LowerMemArgument(Op, DAG, VA, MFI, CC, Root, i);
     }
+
+    // If value is passed via pointer - do a load.
+    if (VA.getLocInfo() == CCValAssign::Indirect)
+      ArgValue = DAG.getLoad(VA.getValVT(), dl, Root, ArgValue, NULL, 0);
+
+    ArgValues.push_back(ArgValue);
   }
 
   // The x86-64 ABI for returning structs by value requires that we copy
@@ -1747,6 +1751,15 @@ SDValue X86TargetLowering::LowerCALL(SDValue Op, SelectionDAG &DAG) {
     case CCValAssign::BCvt:
       Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg);
       break;
+    case CCValAssign::Indirect: {
+      // Store the argument.
+      SDValue SpillSlot = DAG.CreateStackTemporary(VA.getValVT());
+      int FI = cast<FrameIndexSDNode>(SpillSlot)->getIndex();
+      Chain = DAG.getStore(Chain, dl, Arg, SpillSlot,
+                           PseudoSourceValue::getFixedStack(FI), 0);
+      Arg = SpillSlot;
+      break;
+    }
     }
 
     if (VA.isRegLoc()) {
diff --git a/llvm/utils/TableGen/CallingConvEmitter.cpp b/llvm/utils/TableGen/CallingConvEmitter.cpp
index a14be0b76fd6..5879c417e4a7 100644
--- a/llvm/utils/TableGen/CallingConvEmitter.cpp
+++ b/llvm/utils/TableGen/CallingConvEmitter.cpp
@@ -186,6 +186,10 @@ void CallingConvEmitter::EmitAction(Record *Action,
       Record *DestTy = Action->getValueAsDef("DestTy");
       O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";
       O << IndentStr << "LocInfo = CCValAssign::BCvt;\n";
+    } else if (Action->isSubClassOf("CCPassIndirect")) {
+      Record *DestTy = Action->getValueAsDef("DestTy");
+      O << IndentStr << "LocVT = " << getEnumName(getValueType(DestTy)) <<";\n";
+      O << IndentStr << "LocInfo = CCValAssign::Indirect;\n";
     } else if (Action->isSubClassOf("CCPassByVal")) {
       int Size = Action->getValueAsInt("Size");
       int Align = Action->getValueAsInt("Align");