You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@harmony.apache.org by va...@apache.org on 2006/12/11 08:45:03 UTC

svn commit: r485540 - in /harmony/enhanced/drlvm/trunk: src/test/microbenchmark/harmony-2345/ vm/jitrino/config/ia32/ vm/jitrino/src/codegenerator/ia32/

Author: varlax
Date: Sun Dec 10 23:45:01 2006
New Revision: 485540

URL: http://svn.apache.org/viewvc?view=rev&rev=485540
Log:
Applied harmony-2345 [drlvm][performance][jit,ia32]Float-to-int conversion optimized + minor peephole things.
Tested on SUSE9@ia32, SUSE9@x64, Win2003@ia32.
More than 2x boost on the microbenchmark on ia32 (beeting RI 2x, yay!) and no effect on x64.

Added:
    harmony/enhanced/drlvm/trunk/src/test/microbenchmark/harmony-2345/
    harmony/enhanced/drlvm/trunk/src/test/microbenchmark/harmony-2345/test_f2i_speed.java
    harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32PeepHole.cpp   (with props)
Modified:
    harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/client.emconf
    harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/opt.emconf
    harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server.emconf
    harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server_static.emconf
    harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32CgUtils.cpp
    harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32CgUtils.h

Added: harmony/enhanced/drlvm/trunk/src/test/microbenchmark/harmony-2345/test_f2i_speed.java
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/src/test/microbenchmark/harmony-2345/test_f2i_speed.java?view=auto&rev=485540
==============================================================================
--- harmony/enhanced/drlvm/trunk/src/test/microbenchmark/harmony-2345/test_f2i_speed.java (added)
+++ harmony/enhanced/drlvm/trunk/src/test/microbenchmark/harmony-2345/test_f2i_speed.java Sun Dec 10 23:45:01 2006
@@ -0,0 +1,41 @@
+import java.util.*;
+
+/**
+ * Microbenchmark for float & integer computations.
+ */
+public class test_f2i_speed {
+    public static void main(String[] args) {
+        //
+        // warm-up - force the method to be recompiled
+        //
+	    System.out.println("Warming up ...");
+        for (int i=0; i<20000; i++) {
+            test(false);
+        }
+        //
+        // The real measure
+        //
+	System.out.println("Measuring ...");
+        long startTime = System.currentTimeMillis();
+        test(true);
+        long endTime = System.currentTimeMillis();
+        //
+        //
+        long spentTime = endTime - startTime;
+	    System.out.println("... done.");
+        System.out.println("The test took: "+spentTime+"ms");
+    }
+
+    static void test(boolean do_test) {
+        int problem_size = do_test ? 10000000 : 5;
+        int array_size = 300000;
+        int[] array = new int[array_size];
+        Random rndValue = new Random(0);
+
+        for (int i=0; i<problem_size; i++) {
+            int index = i % array.length;
+            float value = rndValue.nextFloat();
+            array[index] = (int)value;
+        }
+    }
+}
\ No newline at end of file

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/client.emconf
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/client.emconf?view=diff&rev=485540&r1=485539&r2=485540
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/client.emconf (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/client.emconf Sun Dec 10 23:45:01 2006
@@ -54,7 +54,7 @@
 -XDjit.CD_OPT.path=opt_init,translator,optimizer,hir2lir,codegen
 
 -XDjit.CD_OPT.path.optimizer=ssa,devirt,inline,uce,purge,simplify,dce,uce,lazyexc,memopt,simplify,dce,uce,lower,dessa,statprof,markglobals
--XDjit.CD_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l,early_prop,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,emitter!,si_insts,gcmap,info,unlock_method
+-XDjit.CD_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l,early_prop,peephole,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,peephole,emitter!,si_insts,gcmap,info,unlock_method
 -XDjit.CD_OPT.path.dce1=cg_dce
 -XDjit.CD_OPT.path.dce2=cg_dce
 -XDjit.CD_OPT.path.regalloc=bp_regalloc1,bp_regalloc2

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/opt.emconf
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/opt.emconf?view=diff&rev=485540&r1=485539&r2=485540
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/opt.emconf (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/opt.emconf Sun Dec 10 23:45:01 2006
@@ -25,7 +25,7 @@
 -XDjit.CS_OPT.path=opt_init,translator,optimizer,hir2lir,codegen
 
 -XDjit.CS_OPT.path.optimizer=ssa,devirt,inline,uce,purge,simplify,dce,uce,lazyexc,memopt,simplify,dce,uce,lower,dessa,statprof,markglobals
--XDjit.CS_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l,early_prop,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,emitter!,si_insts,gcmap,info,unlock_method
+-XDjit.CS_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l,early_prop,peephole,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,peephole,emitter!,si_insts,gcmap,info,unlock_method
 -XDjit.CS_OPT.path.dce1=cg_dce
 -XDjit.CS_OPT.path.dce2=cg_dce
 -XDjit.CS_OPT.path.regalloc=bp_regalloc1,bp_regalloc2

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server.emconf
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server.emconf?view=diff&rev=485540&r1=485539&r2=485540
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server.emconf (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server.emconf Sun Dec 10 23:45:01 2006
@@ -43,7 +43,7 @@
 -XDjit.SD1_OPT.path=opt_init,translator,optimizer,hir2lir,codegen
 
 -XDjit.SD1_OPT.path.optimizer=ssa,simplify,dce,uce,edge_instrument,dessa,statprof,markglobals
--XDjit.SD1_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l,early_prop,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,emitter!,si_insts,gcmap,info,unlock_method
+-XDjit.SD1_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l,early_prop,peephole,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,peephole,emitter!,si_insts,gcmap,info,unlock_method
 -XDjit.SD1_OPT.path.dce1=cg_dce
 -XDjit.SD1_OPT.path.dce2=cg_dce
 -XDjit.SD1_OPT.path.regalloc=bp_regalloc1,bp_regalloc2
@@ -57,7 +57,7 @@
 -XDjit.SD2_OPT.path=opt_init,translator,optimizer,hir2lir,codegen
 
 -XDjit.SD2_OPT.path.optimizer=ssa,simplify,dce,uce,edge_annotate,devirt,inline,uce,purge,simplify,dce,uce,lazyexc,inline_helpers,purge,simplify,uce,dce,dessa,statprof,peel,ssa,hvn,simplify,dce,uce,lower,dce,uce,memopt,reassoc,dce,uce,hvn,dce,uce,abcd,dce,uce,gcm,dessa,statprof,markglobals
--XDjit.SD2_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l,early_prop,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,emitter!,si_insts,gcmap,info,unlock_method
+-XDjit.SD2_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l,early_prop,peephole,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,peephole,emitter!,si_insts,gcmap,info,unlock_method
 -XDjit.SD2_OPT.path.dce1=cg_dce
 -XDjit.SD2_OPT.path.dce2=cg_dce
 -XDjit.SD2_OPT.path.regalloc=bp_regalloc1,bp_regalloc2

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server_static.emconf
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server_static.emconf?view=diff&rev=485540&r1=485539&r2=485540
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server_static.emconf (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/config/ia32/server_static.emconf Sun Dec 10 23:45:01 2006
@@ -25,7 +25,7 @@
 -XDjit.SS_OPT.path=opt_init,translator,optimizer,hir2lir,codegen
 
 -XDjit.SS_OPT.path.optimizer=ssa,simplify,dce,uce,statprof,devirt,inline,uce,purge,simplify,dce,uce,lazyexc,hvn,dce,uce,dessa,statprof,peel,ssa,hvn,simplify,dce,uce,lower,dce,uce,memopt,reassoc,dce,uce,hvn,dce,uce,abcd,dce,uce,gcm,dessa,statprof,markglobals
--XDjit.SS_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l,early_prop,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,emitter!,si_insts,gcmap,info,unlock_method
+-XDjit.SS_OPT.path.codegen=lock_method,bbp,gcpoints,cafl,dce1,i8l,early_prop,peephole,itrace-,native,constraints,dce2,regalloc,spillgen,layout,copy,rce+,stack,break-,iprof-,peephole,emitter!,si_insts,gcmap,info,unlock_method
 -XDjit.SS_OPT.path.dce1=cg_dce
 -XDjit.SS_OPT.path.dce2=cg_dce
 -XDjit.SS_OPT.path.regalloc=bp_regalloc1,bp_regalloc2

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32CgUtils.cpp
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32CgUtils.cpp?view=diff&rev=485540&r1=485539&r2=485540
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32CgUtils.cpp (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32CgUtils.cpp Sun Dec 10 23:45:01 2006
@@ -26,6 +26,9 @@
 
 bool OpndUtils::isReg(const Opnd* op, RegName what)
 {
+    if (!op->hasAssignedPhysicalLocation()) {
+        return false;
+    }
     if (!op->isPlacedIn(OpndKind_Reg)) {
         return false;
     }
@@ -243,6 +246,16 @@
     BasicBlock* bb = toBeReplaced->getBasicBlock();
     bb->appendInst(brandNewInst, toBeReplaced);
     removeInst(toBeReplaced);
+}
+
+void InstUtils::replaceOpnd(Inst* inst, unsigned index, Opnd* newOpnd)
+{
+    Opnd* oldOpnd = inst->getOpnd(index);
+    // to be *replaced*, an operand must exist first
+    assert(oldOpnd != NULL);
+    if (oldOpnd != newOpnd) {
+        inst->replaceOpnd(oldOpnd, newOpnd);
+    }
 }
 
 

Modified: harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32CgUtils.h
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32CgUtils.h?view=diff&rev=485540&r1=485539&r2=485540
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32CgUtils.h (original)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32CgUtils.h Sun Dec 10 23:45:01 2006
@@ -199,6 +199,7 @@
     static bool isPseudoInst(const Inst*);
     static void removeInst(Inst* toBeRemoved);
     static void replaceInst(Inst* old, Inst* brandNewInst);
+    static void replaceOpnd(Inst* inst, unsigned index, Opnd* newOpnd);
 };
 
 class SubCfgBuilderUtils : virtual protected IRManagerHolder {

Added: harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32PeepHole.cpp
URL: http://svn.apache.org/viewvc/harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32PeepHole.cpp?view=auto&rev=485540
==============================================================================
--- harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32PeepHole.cpp (added)
+++ harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32PeepHole.cpp Sun Dec 10 23:45:01 2006
@@ -0,0 +1,531 @@
+/*
+ *  Licensed to the Apache Software Foundation (ASF) under one or more
+ *  contributor license agreements.  See the NOTICE file distributed with
+ *  this work for additional information regarding copyright ownership.
+ *  The ASF licenses this file to You under the Apache License, Version 2.0
+ *  (the "License"); you may not use this file except in compliance with
+ *  the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ *  Unless required by applicable law or agreed to in writing, software
+ *  distributed under the License is distributed on an "AS IS" BASIS,
+ *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ *  See the License for the specific language governing permissions and
+ *  limitations under the License.
+ */
+/**
+ * @author Alexander Astapchuk
+ */
+
+#include "Ia32CgUtils.h"
+
+namespace Jitrino {
+namespace Ia32 {
+
+
+class PeepHoleOpt;
+static const char* help = 
+"Performs simple local (per-BB) or per-Inst optimizations.\n"
+"Some of them include:\n"
+"\t Inlined F2I conversion\n"
+"A better instructions selection:\n"
+"\t Change 32bit immediate values to 8bit in ALU instructions\n"
+"\t MOVSS/MOVSD replaced with MOVQ\n"
+"\t MOVSS/MOVSD xmm, [memconst=0.] => PXOR xmm, xmm\n"
+"It's recommended to have 2 passes of peephole: the first one before\n"
+"a register allocator - to inline the conversions and provide more\n"
+"opportunities for further optimization. And the second one - after\n"
+"the register allocator to improve the instructions selection."
+;
+
+static ActionFactory<PeepHoleOpt> _staticAutoRegister("peephole", help);
+
+class PeepHoleOpt : 
+    public SessionAction, 
+    protected OpndUtils, 
+    protected InstUtils, 
+    protected SubCfgBuilderUtils 
+{
+private:
+    // Virtuals
+    uint32  getSideEffects(void) const
+    {
+        return m_bHadAnyChange ? (uint32)-1 : 0;
+    }
+private:
+    enum Changed { 
+    /// Nothing was changed
+    Changed_Nothing, 
+    /**
+     * One or more Opnds were changed/added/removed - might need to 
+     * update liveness info.
+     */
+    Changed_Opnd, 
+    /**
+     * One or more Insts were changed/added/removed - might need to 
+     * update Insts list.
+     */
+    Changed_Inst, 
+    /**
+     * One or more Nodes were changed/added/removed - might need to 
+     * update Nodes list.
+     */
+    Changed_Node
+    };
+    //
+    // General machinery. 
+    // TODO: It's better to separate the general CFG-walking machinery into
+    // a separate class.
+    //
+    void runImpl(void);
+    Changed handleBasicBlock(Node* node);
+    Changed handleInst(Inst* inst);
+    //
+    // 
+    //
+    Changed handleInst_Call(Inst* inst);
+    Changed handleInst_HelperCall(Inst* inst, const  Opnd::RuntimeInfo* ri);
+    Changed handleInst_Convert_F2I_D2I(Inst* inst);
+    Changed handleInst_ALU(Inst* inst);
+    Changed handleInst_SSEMov(Inst* inst);
+    Changed handleInst_SSEXor(Inst* inst);
+    //
+    // Helpers
+    //
+    //
+    bool m_bHadAnyChange;
+}; // ~PeepHoleOpt
+
+void PeepHoleOpt::runImpl(void)
+{
+    setIRManager(irManager);
+    m_bHadAnyChange = false;
+    // organize an infinity loop and keep spinning till we have any change.
+    // thought have a safety counter to prevent a really infinity in case 
+    // anything goes wrong in runtime
+    bool keepGoing = true;
+    unsigned safetyCounter = 0;
+    do {
+        keepGoing = false;
+        const Nodes& nodes = irManager->getFlowGraph()->getNodes();
+        for (Nodes::const_iterator citer = nodes.begin(); 
+            citer != nodes.end(); ++citer) {
+            Node* node = *citer;
+            if (!node->isBlockNode()) {
+                continue;
+            }
+            Changed whatChanged = handleBasicBlock(node);
+            if (whatChanged != Changed_Nothing) {
+                m_bHadAnyChange = true;
+                keepGoing = true;
+            }
+            if (whatChanged == Changed_Node) {
+                break;
+            }
+        }
+        ++safetyCounter;
+        if(safetyCounter > 100000) {
+            // I hardly believe in a method that has more than 100K 
+            // opportunities to fix in peephole. 
+            // Most probably self bug - assert() in debug mode, stop trying
+            // in release.
+            assert(false);
+            keepGoing = false;
+        }
+    } while(keepGoing);
+}
+
+
+PeepHoleOpt::Changed PeepHoleOpt::handleBasicBlock(Node* node)
+{
+    Inst* inst = (Inst*)node->getFirstInst();
+    Changed changedInBB = Changed_Nothing;
+    while (inst != NULL) {
+        Inst* savePrev = inst->getPrevInst();
+        Changed whatChanged = handleInst(inst);
+        if (whatChanged == Changed_Node) {
+            // Need to scan the CFG again.
+            return Changed_Node;
+        }
+        Inst* next = NULL;
+        if (whatChanged == Changed_Inst) {
+            changedInBB = Changed_Inst;
+            // Inst was replaced, or deleted, or new Inst was added - 
+            // proceed with this new or updated instruction(s) again
+            if (savePrev == NULL) {
+                next = (Inst*)node->getFirstInst();
+            }
+            else {
+                next = savePrev->getNextInst();
+            }
+        }
+        else {
+            assert(whatChanged == Changed_Nothing || whatChanged == Changed_Opnd);
+            if (changedInBB != Changed_Nothing) {
+                changedInBB = whatChanged;
+            }
+            next = inst->getNextInst();
+        }
+        inst = next;
+    }
+    return changedInBB;
+}
+
+
+PeepHoleOpt::Changed PeepHoleOpt::handleInst(Inst* inst)
+{
+    if (isPseudoInst(inst)) {
+        return Changed_Nothing;
+    }
+
+    Mnemonic mnemonic = inst->getMnemonic();
+    switch(mnemonic) {
+    case Mnemonic_CALL:
+        return handleInst_Call(inst);
+    case Mnemonic_ADD:
+    case Mnemonic_SUB:
+    case Mnemonic_NOT:
+    case Mnemonic_AND:
+    case Mnemonic_OR:
+    case Mnemonic_XOR:
+    case Mnemonic_CMP:
+    case Mnemonic_TEST:
+        return handleInst_ALU(inst);
+    case Mnemonic_MOVSS:
+    case Mnemonic_MOVSD:
+        return handleInst_SSEMov(inst);
+    case Mnemonic_XORPS:
+    case Mnemonic_XORPD:
+        return handleInst_SSEXor(inst);
+    default:
+        break;
+    }
+    return Changed_Nothing;
+}
+
+PeepHoleOpt::Changed PeepHoleOpt::handleInst_Call(Inst* inst)
+{
+    assert(inst->getMnemonic() == Mnemonic_CALL);
+    CallInst* callInst = (CallInst*)inst;
+    unsigned targetOpndIndex = callInst->getTargetOpndIndex();
+    Opnd* targetOpnd = callInst->getOpnd(targetOpndIndex);
+    Opnd::RuntimeInfo* ri = targetOpnd->getRuntimeInfo();
+    Opnd::RuntimeInfo::Kind rt_kind = Opnd::RuntimeInfo::Kind_Null;
+    if (ri != NULL) {
+        rt_kind = ri->getKind();
+    }
+
+    if (Opnd::RuntimeInfo::Kind_HelperAddress == rt_kind) {
+        return handleInst_HelperCall(inst, ri);
+    }
+    return Changed_Nothing;
+}
+
+PeepHoleOpt::Changed PeepHoleOpt::handleInst_HelperCall(
+    Inst* inst, 
+    const Opnd::RuntimeInfo* ri)
+{
+    assert(Opnd::RuntimeInfo::Kind_HelperAddress == ri->getKind());
+    void* rt_data = ri->getValue(0);
+    POINTER_SIZE_INT helperId = (POINTER_SIZE_INT)rt_data;
+    switch(helperId) {
+    case CompilationInterface::Helper_ConvStoI32:
+    case CompilationInterface::Helper_ConvDtoI32:
+        return handleInst_Convert_F2I_D2I(inst);
+    default:
+        break;
+    }
+    return Changed_Nothing;
+}
+
+PeepHoleOpt::Changed PeepHoleOpt::handleInst_Convert_F2I_D2I(Inst* inst)
+{
+    //
+    // Inline 'int_value = (int)(float_value or double_value)'
+    //
+    Opnd* dst = inst->getOpnd(0);
+    Opnd* src = inst->getOpnd(2);
+    Type* srcType = src->getType();
+    assert(srcType->isSingle() || srcType->isDouble());
+    assert(dst->getType()->isInt4());
+    const bool is_dbl = srcType->isDouble();
+    // Here, we might have to deal with 3 cases with src (_value):
+    // 1. Unassigned operand - act as if were operating with XMM
+    // 2. Assigned to FPU - convert to FPU operations, to 
+    //    avoid long FPU->mem->XMM chain
+    // 3. Assigned to XMM - see #1
+    const bool xmm_way = 
+        !(src->hasAssignedPhysicalLocation() && src->isPlacedIn(OpndKind_FPReg));
+
+    if (!xmm_way) {
+        //TODO: will add FPU later if measurements show it worths trying
+        return Changed_Nothing;
+    }
+    //
+    //
+    /*
+        movss xmm0, val
+        // presuming the corner cases (NaN, overflow) 
+        // normally happen rare, do conversion first, 
+        // and check for falls later
+    -- convertNode
+        cvttss2si eax, xmm0
+    -- ovfTestNode
+        // did overflow happen ?
+        cmp eax, 0x80000000 
+        jne _done               // no - go return result
+    -- testAgainstZeroNode
+        // test SRC against zero
+        comiss xmm0, [fp_zero]
+        // isNaN ? 
+        jp _nan     // yes - go load 0
+    -- testIfBelowNode
+        // xmm < 0 ?
+        jb _done    // yes - go load MIN_INT. EAX already has it - simply return.
+    -- loadMaxIntNode 
+        // ok. at this point, XMM is positive and > MAX_INT
+        // must load MAX_INT which is 0x7fffffff.
+        // As EAX has 0x80000000, then simply substract 1
+        sub eax, 1
+        jmp _done
+    -- loadZeroNode
+    _nan:
+        xor eax, eax
+    -- nodeNode
+    _done:
+        mov result, eax
+    }
+    */
+    Opnd* fpZeroOpnd = getZeroConst(srcType);
+    Type* int32type = irManager->getTypeManager().getInt32Type();
+    Opnd* oneOpnd = irManager->newImmOpnd(int32type, 1);
+    Opnd* intZeroOpnd = getIntZeroConst();
+
+    // 0x8..0 here is not the INT_MIN, but comes from the COMISS 
+    // opcode description instead.
+    Opnd* minIntOpnd = irManager->newImmOpnd(int32type, 0x80000000);
+
+    newSubGFG();
+    Node* entryNode = getSubCfgEntryNode();
+
+    Node* convertNode = newBB();
+    Node* ovfTestNode = newBB();
+    Node* testAgainstZeroNode = newBB();
+    Node* testIfBelowNode = newBB();
+    Node* loadMaxIntNode = newBB();
+    Node* loadZeroNode = newBB();
+    Node* doneNode = newBB();
+    //
+    // presuming the corner cases (NaN, overflow) 
+    // normally happen rare, do conversion first, 
+    // and check for falls later
+    //
+    connectNodes(entryNode, convertNode);
+    //
+    // convert
+    //
+    setCurrentNode(convertNode)    ;
+    Mnemonic mn_cvt = is_dbl ? Mnemonic_CVTTSD2SI : Mnemonic_CVTTSS2SI;
+    /*cvttss2si r32, xmm*/ newInst(mn_cvt, 1, dst, src);
+    connectNodeTo(ovfTestNode);
+    setCurrentNode(NULL);
+
+    //
+    // check whether overflow happened
+    //
+    setCurrentNode(ovfTestNode);
+    /*cmp r32, MIN_INT*/ newInst(Mnemonic_CMP, dst, minIntOpnd);
+    /*jne _done       */ newBranch(Mnemonic_JNE, doneNode, testAgainstZeroNode, 0.9, 0.1);
+    //
+    setCurrentNode(NULL);
+
+    // test SRC against zero
+    //
+    setCurrentNode(testAgainstZeroNode);
+    Mnemonic mn_cmp = is_dbl ? Mnemonic_COMISD : Mnemonic_COMISS;
+    /*comiss src, 0.  */ newInst(mn_cmp, src, fpZeroOpnd);
+    /*jp _nan:result=0*/ newBranch(Mnemonic_JP, loadZeroNode, testIfBelowNode);
+    setCurrentNode(NULL);
+
+    //
+    // 
+    //
+    setCurrentNode(loadZeroNode);
+    /*mov r32, 0*/      newInst(Mnemonic_MOV, dst, intZeroOpnd);
+    /*jmp _done*/       connectNodeTo(doneNode);
+    setCurrentNode(NULL);
+
+    //
+    // test if we have a huge negative in SRC
+    //
+    setCurrentNode(testIfBelowNode);
+    /*jb _done:*/       newBranch(Mnemonic_JB, doneNode, loadMaxIntNode);
+    setCurrentNode(NULL);
+    //
+    // 
+    //
+    setCurrentNode(loadMaxIntNode);
+    /* sub dst, 1*/     newInst(Mnemonic_SUB, dst, oneOpnd);
+    connectNodeTo(doneNode);
+    setCurrentNode(NULL);
+    //
+    connectNodes(doneNode, getSubCfgReturnNode());
+    //
+    propagateSubCFG(inst);
+    return Changed_Node;
+}
+
+
+PeepHoleOpt::Changed PeepHoleOpt::handleInst_ALU(Inst* inst)
+{
+    // The normal form is 'OPERATION left opnd, right operand'
+    // except for NOT operation.
+    const Mnemonic mnemonic = inst->getMnemonic();
+    if (mnemonic == Mnemonic_NOT) {
+        // No optimizations this time
+        return Changed_Nothing;
+    }
+    
+    // Only these mnemonics have the majestic name of ALUs.
+    assert(mnemonic == Mnemonic_ADD || mnemonic == Mnemonic_SUB ||
+           mnemonic == Mnemonic_OR || mnemonic == Mnemonic_XOR ||
+           mnemonic == Mnemonic_AND || 
+           mnemonic == Mnemonic_CMP || mnemonic == Mnemonic_TEST);
+
+    // Only process simple variants: ALU opcodes that either define flags 
+    //and use 2 operands, or simply use 2 operands
+    unsigned leftIndex = 0;
+    if (isReg(inst->getOpnd(leftIndex), RegName_EFLAGS)) {
+        ++leftIndex;
+    }
+    
+    const unsigned rightIndex = leftIndex + 1;
+    
+    Opnd* left = inst->getOpnd(leftIndex);
+    Opnd* right = inst->getOpnd(rightIndex);
+    
+    if (mnemonic != Mnemonic_TEST && 
+        isReg(left) && isImm32(right) && fitsImm8(right)) {
+        /* what: OPERATION reg, imm32 => OPERATION reg, imm8
+           why: shorter instruction
+           nb: applicable for all ALUs, but TEST
+        */
+        right = convertImmToImm8(right);
+        replaceOpnd(inst, rightIndex, right);
+        return Changed_Opnd;
+    }
+
+    return Changed_Nothing;
+}
+
+PeepHoleOpt::Changed PeepHoleOpt::handleInst_SSEMov(Inst* inst)
+{
+    assert(inst->getMnemonic() == Mnemonic_MOVSS || 
+           inst->getMnemonic() == Mnemonic_MOVSD);
+           
+    const bool isDouble = inst->getMnemonic() == Mnemonic_MOVSD;
+
+    if (inst->getOpndCount() != 2) {
+        // Expected only MOVSS/SD a, b
+        assert(false);
+        return Changed_Nothing;
+    }
+    Opnd* dst = inst->getOpnd(0);
+    Opnd* src = inst->getOpnd(1);
+    //
+    //
+    if (isReg(dst) && equals(src, dst)) {
+        // what: same register moved around
+        // why:  useless thing
+        removeInst(inst);
+        return Changed_Inst;
+    }
+    
+    //
+    //
+    if (isReg(dst) && isMem(src)) {
+        /* what: MOVSS/MOVSD xmmreg, [zero constant from memory] => PXOR xmmreg, xmmreg
+           why:  shorter instruction; no memory access => faster
+           nb:   only works with 64 XMMs
+        */
+        bool isZeroConstant = false;
+        if (isDouble) {
+            isZeroConstant = isFPConst(src, (double)0);
+        }
+        else {
+            isZeroConstant = isFPConst(src, (float)0);
+        }
+        
+        if (isZeroConstant) {
+            // PXOR only accepts double registers, convert dst
+            dst = convertToXmmReg64(dst);
+            Inst* ii = irManager->newInst(Mnemonic_PXOR, dst, dst);
+            replaceInst(inst, ii);
+            return Changed_Inst;
+        }
+        //
+        // fall through to process more
+        // ||
+        // vv
+        
+    }   // ~ movss xmm, 0 => pxor xmm,xmm
+    
+    if (isReg(dst) && isReg(src)) {
+        /*what: MOVSS/MOVSD reg, reg => MOVQ reg, reg
+          why: MOVSD has latency=6, MOVSS has latency=4, MOVQ's latency=2
+          nb: MOVQ only works with 64 xmms
+        */
+        dst = convertToXmmReg64(dst);
+        src = convertToXmmReg64(src);
+        Inst* ii = irManager->newInst(Mnemonic_MOVQ, dst, src);
+        replaceInst(inst, ii);
+        return Changed_Inst;
+    }
+    
+    // We just handled 'both regs' case above, the only possible variant:
+    assert((isReg(dst)&&isMem(src)) || (isReg(src)&&isMem(dst)));
+    if (false && isDouble) {
+        //FIXME: MOVQ with memory gets encoded badly - need to fix in encoder
+        /*
+        what: MOVSD => MOVQ
+        why:  faster (? actually, I hope so. Need to double check)
+        nb:   only for xmm64
+        */
+        Inst* ii = irManager->newInst(Mnemonic_MOVQ, dst, src);
+        replaceInst(inst, ii);
+        return Changed_Inst;
+    }
+    
+    return Changed_Nothing;
+}
+
+PeepHoleOpt::Changed PeepHoleOpt::handleInst_SSEXor(Inst* inst)
+{
+    assert(inst->getMnemonic() == Mnemonic_XORPS || 
+           inst->getMnemonic() == Mnemonic_XORPD);
+           
+    if (inst->getOpndCount() != 2) {
+        // Expected only XORPS/PD a, b
+        assert(false);
+        return Changed_Nothing;
+    }
+    
+    Opnd* dst = inst->getOpnd(0);
+    Opnd* src = inst->getOpnd(1);
+    
+    if (isReg(dst) && isReg(src, dst->getRegName())) {
+        /*what: XORPS/XORPD regN, regN => PXOR regN, regN
+          why: XORPS/PD used for zero-ing register, but PXOR is faster 
+               (2 ticks on PXOR vs 4 ticks for XORPS/XORPD)
+        */
+        dst = convertToXmmReg64(dst);
+        src = convertToXmmReg64(src);
+        Inst* ii = irManager->newInst(Mnemonic_PXOR, dst, src);
+        replaceInst(inst, ii);
+        return Changed_Inst;
+    }
+    return Changed_Nothing;
+}
+
+}}; // ~namespace Jitrino::Ia32

Propchange: harmony/enhanced/drlvm/trunk/vm/jitrino/src/codegenerator/ia32/Ia32PeepHole.cpp
------------------------------------------------------------------------------
    svn:eol-style = native