@@ -67,6 +67,8 @@ public class MatrixBlockDictionary extends ADictionary {
6767
6868 final private MatrixBlock _data ;
6969
70+ static final VectorSpecies <Double > SPECIES = DoubleVector .SPECIES_PREFERRED ;
71+
7072 /**
7173 * Unsafe private constructor that does not check the data validity. USE WITH CAUTION.
7274 *
@@ -2125,6 +2127,9 @@ private void preaggValuesFromDenseDictDenseAggRangeRange(final int numVals, fina
21252127
21262128 private static void preaggValuesFromDenseDictBlockedIKJ (double [] a , double [] b , double [] ret , int bi , int bk , int bj ,
21272129 int bie , int bke , int cz , int az , int ls , int cut , int sOffT , int eOffT ) {
2130+ final int vLen = SPECIES .length ();
2131+ final DoubleVector vVec = DoubleVector .zero (SPECIES );
2132+ final int leftover = (eOffT - sOffT ) % vLen ; // leftover not vectorized
21282133 for (int i = bi ; i < bie ; i ++) {
21292134 final int offI = i * cz ;
21302135 final int offOutT = i * az + bj ;
@@ -2133,14 +2138,27 @@ private static void preaggValuesFromDenseDictBlockedIKJ(double[] a, double[] b,
21332138 final int sOff = sOffT + idb ;
21342139 final int eOff = eOffT + idb ;
21352140 final double v = a [offI + k ];
2136- int offOut = offOutT ;
2137- for (int j = sOff ; j < eOff ; j ++, offOut ++) {
2138- ret [offOut ] += v * b [j ];
2139- }
2141+ vecInnerLoop (v , b , ret , offOutT , eOff , sOff , leftover , vLen , vVec );
21402142 }
21412143 }
21422144 }
21432145
2146+ private static void vecInnerLoop (final double v , final double [] b , final double [] ret , final int offOutT ,
2147+ final int eOff , final int sOff , final int leftover , final int vLen , DoubleVector vVec ) {
2148+ int offOut = offOutT ;
2149+ vVec = vVec .broadcast (v );
2150+ final int end = eOff - leftover ;
2151+ for (int j = sOff ; j < end ; j += vLen , offOut += vLen ) {
2152+ DoubleVector res = DoubleVector .fromArray (SPECIES , ret , offOut );
2153+ DoubleVector bVec = DoubleVector .fromArray (SPECIES , b , j );
2154+ vVec .fma (bVec , res ).intoArray (ret , offOut );
2155+ }
2156+ for (int j = end ; j < eOff ; j ++, offOut ++) {
2157+ ret [offOut ] += v * b [j ];
2158+ }
2159+
2160+ }
2161+
21442162 private void preaggValuesFromDenseDictDenseAggRangeGeneric (final int numVals , final IColIndex colIndexes ,
21452163 final int s , final int e , final double [] b , final int cut , final double [] ret ) {
21462164 final int cz = colIndexes .size ();
0 commit comments