/* DoReMiFaSort.cpp */ #include "stdafx.h" #include "DoReMiFaSort.h" #include #pragma optimize( "t", on ) static void __stdcall SwapUU(void* p1, void* p2) { char *c1 = ( char* ) p1, *c2 = ( char* ) p2, cdata; size_t sztRecLength = sizeof( SortRecStruct ); __m128i XmmTemp; DWORD dwTemp; while( sztRecLength >= 16 ) { XmmTemp = _mm_loadu_si128( ( __m128i* ) c1 ); _mm_storeu_si128( ( __m128i* ) c1, _mm_loadu_si128( ( __m128i* ) c2 ) ); _mm_storeu_si128( ( __m128i* ) c2, XmmTemp ); c1 += 16; c2 += 16; sztRecLength -= 16; } if( sztRecLength >= 8 ) { XmmTemp = _mm_loadl_epi64( ( __m128i* ) c1 ); _mm_storel_epi64( ( __m128i* ) c1, _mm_loadl_epi64( ( __m128i* ) c2 ) ); _mm_storel_epi64( ( __m128i* ) c2, XmmTemp ); c1 += 8; c2 += 8; sztRecLength -= 8; } if( sztRecLength >= 4 ) { dwTemp = *( ( DWORD* ) c1 ); *( ( int* ) c1 ) = *( ( DWORD* ) c2 ); *( ( DWORD* ) c2 ) = dwTemp; c1 += 4; c2 += 4; sztRecLength -=4; } while( sztRecLength-- ) { cdata = *c1; *c1++ = *c2; *c2++ = cdata; } } static void __stdcall CopyRec(void* pDest, void* pSrc) { char *cDest = ( char* ) pDest, *cSrc = ( char* ) pSrc, cdata; size_t sztRecLength = sizeof( SortRecStruct ); while( sztRecLength >= 16 ) { _mm_storeu_si128( ( __m128i* ) cDest, _mm_loadu_si128( ( __m128i* ) cSrc ) ); cDest += 16; cSrc += 16; sztRecLength -= 16; } if( sztRecLength >= 8 ) { _mm_storel_epi64( ( __m128i* ) cDest, _mm_loadl_epi64( ( __m128i* ) cSrc ) ); cDest += 8; cSrc += 8; sztRecLength -= 8; } if( sztRecLength >= 4 ) { *( ( int* ) cDest ) = *( ( DWORD* ) cSrc ); cDest += 4; cSrc += 4; sztRecLength -= 4; } while( sztRecLength-- ) { cdata = *cDest; *cDest++ = *cSrc; *cSrc++ = cdata; } } static void __stdcall DualSort(SortRecStruct* p1, SortRecStruct* p2) { if( p1->Key > p2->Key ) SwapUU( p1, p2 ); } static void __stdcall TriSort(SortRecStruct* p1, SortRecStruct* p2, SortRecStruct* p3) { DualSort( p1, p2 ); DualSort( p2, p3 ); DualSort( p1, p2 ); } static void __stdcall InsertionP(SortRecStruct* pTop, SortRecStruct* pBtm) { SortRecStruct TempData; SortRecStruct *ScanPtr, *CopyPtr; ScanPtr = pBtm - 1; while( ScanPtr >= pTop ) { if( ( ScanPtr + 1 )->Key < ScanPtr->Key ) { CopyRec( &TempData, ScanPtr ); CopyPtr = ScanPtr; do { CopyRec( CopyPtr, CopyPtr + 1 ); CopyPtr++; } while( ( CopyPtr < pBtm ) && ( ( CopyPtr + 1 )->Key < TempData.Key ) ); CopyRec( CopyPtr, &TempData ); } ScanPtr--; } } #ifdef _M_X64 static DWORD __stdcall DoReMiFaSortP(SortRecStruct* pTop, SortRecStruct* pBtm, bool* pAbort) { SortRecStruct *nPtrL, *nPtrR, *nPivotL, *nPivotR, *nTemp; SortRecStruct *RecRe, *RecMe, *RecFa; SortRecStruct Pivot; if( pBtm > pTop ) { if( ( pBtm - pTop ) > 48 ) // 48 is best much value at win32 and x64 { // Do-Re-Mi-Fa-Sort RecMe = ( pBtm - pTop ) / 2 + pTop; RecRe = ( RecMe - pTop ) / 2 + pTop; RecFa = ( pBtm - RecMe ) / 2 + RecMe; TriSort( pTop, RecRe, RecMe ); TriSort( RecRe, RecMe, RecFa ); TriSort( RecMe, RecFa, pBtm ); DualSort( pTop, RecRe ); DualSort( RecRe, RecMe ); SwapUU( RecMe, pBtm ); //Pivot = *pBtm; CopyRec( &Pivot, pBtm ); // adjust pivot range nPivotR = pBtm; while( ( nPivotR > pTop ) && ( nPivotR[ -1 ].Key == Pivot.Key ) ) { nPivotR--; } if( nPivotR == pTop ) return 0; // All records same data nPtrR = nPivotR; nPivotL = pTop - 1; nPtrL = pTop - 1; for( ; ; ) { while( ( ++nPtrL )->Key < Pivot.Key ); while( Pivot.Key < ( --nPtrR )->Key ) if( nPtrR == pTop ) break; if( nPtrL >= nPtrR ) break; SwapUU( nPtrL, nPtrR ); if( nPtrL->Key == Pivot.Key ) { nPivotL++; SwapUU( nPivotL, nPtrL ); } if( Pivot.Key == nPtrR->Key ) { nPivotR--; SwapUU( nPtrR, nPivotR ); } } SwapUU( nPtrL, pBtm ); nPtrR = nPtrL - 1; nPtrL = nPtrL + 1; for( nTemp = pTop; nTemp <= nPivotL; nTemp++, nPtrR-- ) { SwapUU( nTemp, nPtrR ); } for( nTemp = pBtm - 1; nTemp >= nPivotR; nTemp--, nPtrL++ ) { SwapUU( nPtrL, nTemp ); } if( *pAbort ) return 1223; if( nPtrR > pTop ) DoReMiFaSortP( pTop, nPtrR, pAbort ); if( nPtrL < pBtm ) DoReMiFaSortP( nPtrL, pBtm, pAbort ); } else { InsertionP( pTop, pBtm ); } } return 0; } DWORD __stdcall DoReMiFaSort(SortRecStruct* pArray, size_t pRecCount, bool* pAbort) { DWORD dwResult = 0; if( pRecCount > 1 ) dwResult = DoReMiFaSortP( pArray, &pArray[ pRecCount - 1 ], pAbort ); return dwResult; } #else DWORD __stdcall DoReMiFaSort(SortRecStruct* pArray, size_t pRecCount, bool* pAbort) { struct QuickStackStruct { SortRecStruct *Top, *Btm; }; QuickStackStruct *Stack; HANDLE hStack; size_t StackPtr = 0; SortRecStruct *nPtrL, *nPtrR, *nPivotL, *nPivotR, *nTemp, *pTop, *pBtm; SortRecStruct *RecRe, *RecMe, *RecFa; SortRecStruct Pivot; DWORD dwResult = 0; if( pRecCount > 1 ) { hStack = HeapCreate( 0, 0, 0 ); Stack = ( QuickStackStruct* ) HeapAlloc( hStack, 0, sizeof( QuickStackStruct ) * sizeof( size_t ) * 512 ); if( Stack ) { Stack[ StackPtr ].Top = pArray; Stack[ StackPtr ].Btm = &pArray[ pRecCount - 1 ]; // do { pTop = Stack[ StackPtr ].Top; pBtm = Stack[ StackPtr ].Btm; if( ( pBtm - pTop ) > 48 ) // 48 is best much value at win32 and x64 { // Do-Re-Mi-Fa-Sort RecMe = ( pBtm - pTop ) / 2 + pTop; RecRe = ( RecMe - pTop ) / 2 + pTop; RecFa = ( pBtm - RecMe ) / 2 + RecMe; TriSort( pTop, RecRe, RecMe ); TriSort( RecRe, RecMe, RecFa ); TriSort( RecMe, RecFa, pBtm ); DualSort( pTop, RecRe ); DualSort( RecRe, RecMe ); SwapUU( RecMe, pBtm ); //Pivot = *pBtm; CopyRec( &Pivot, pBtm ); // adjust pivot range nPivotR = pBtm; while( ( nPivotR > pTop ) && ( nPivotR[ -1 ].Key == Pivot.Key ) ) { nPivotR--; } if( nPivotR == pTop ) { goto IncStack; } nPtrR = nPivotR; nPivotL = pTop - 1; nPtrL = pTop - 1; for( ; ; ) { while( ( ++nPtrL )->Key < Pivot.Key ); while( Pivot.Key < ( --nPtrR )->Key ) if( nPtrR == pTop ) break; if( nPtrL >= nPtrR ) break; SwapUU( nPtrL, nPtrR ); if( nPtrL->Key == Pivot.Key ) { nPivotL++; SwapUU( nPivotL, nPtrL ); } if( Pivot.Key == nPtrR->Key ) { nPivotR--; SwapUU( nPtrR, nPivotR ); } } SwapUU( nPtrL, pBtm ); nPtrR = nPtrL - 1; nPtrL = nPtrL + 1; for( nTemp = pTop; nTemp <= nPivotL; nTemp++, nPtrR-- ) { SwapUU( nTemp, nPtrR ); } for( nTemp = pBtm - 1; nTemp >= nPivotR; nTemp--, nPtrL++ ) { SwapUU( nPtrL, nTemp ); } StackPtr--; if( nPtrL < pBtm ) { //DoReMiFaSortP( nPtrL, pBtm, pAbort ); StackPtr++; Stack[ StackPtr ].Top = nPtrL; Stack[ StackPtr ].Btm = pBtm; } if( nPtrR > pTop ) { //DoReMiFaSortP( pTop, nPtrR, pAbort ); StackPtr++; Stack[ StackPtr ].Top = pTop; Stack[ StackPtr ].Btm = nPtrR; } } else { InsertionP( pTop, pBtm ); IncStack: StackPtr--; } CheckStack: if( *pAbort ) { dwResult = 1223; break; } } while( StackPtr != ( size_t ) -1 ); HeapFree( hStack, 0, Stack ); HeapDestroy( hStack ); } else { dwResult = 8; } } return dwResult; } #endif int QsortComp(const void* arg1, const void* arg2) { SortRecStruct *r1 = ( SortRecStruct* ) arg1; SortRecStruct *r2 = ( SortRecStruct* ) arg2; if( r1->Key > r2->Key ) return 1; if( r1->Key < r2->Key ) return -1; return 0; } DWORD __stdcall qsortVc(SortRecStruct* pArray, size_t pRecCount, bool* pAbort) { qsort( pArray, pRecCount, sizeof( SortRecStruct ), QsortComp ); if( *pAbort ) return 1223; return 0; } inline static DWORD __stdcall med3(DWORD x, DWORD y, DWORD z) { if( x < y ) if( y < z ) return y; else if( z < x ) return x; else return z; else if( z < y ) return y; else if( x < z ) return x; else return z; } static DWORD __stdcall QuickSortR(SortRecStruct* pArray, int pTop, int pBtm, bool* pAbort) { if( pTop < pBtm ) // if pBtm < 0 ---> need "int" type { int i = pTop, j = pBtm; DWORD pivot = med3( pArray[ i ].Key, pArray[ i + ( j - i ) / 2 ].Key, pArray[ j ].Key ); for( ; ; ) { while( pArray[ i ].Key < pivot ) i++; while( pivot < pArray[ j ].Key ) j--; if( i >= j ) break; SwapUU( &pArray[ i ], &pArray[ j ] ); i++; j--; } if( *pAbort ) return 1223; QuickSortR( pArray, pTop, i - 1, pAbort ); QuickSortR( pArray, j + 1, pBtm, pAbort ); } return 0; } DWORD __stdcall QuickSort(SortRecStruct* pArray, size_t pRecCount, bool* pAbort) { return QuickSortR( pArray, 0, ( int ) ( pRecCount - 1 ), pAbort ); } static DWORD __stdcall QuickSort2R(SortRecStruct* pArray, size_t pTop, size_t pBtm, bool* pAbort) { DWORD Pivot; size_t sztScanL, sztScanR, sztMid; if( pTop < pBtm ) { sztScanL = pTop; sztScanR = pBtm; sztMid = ( pBtm - pTop ) / 2 + pTop; Pivot = med3( pArray[ pTop ].Key, pArray[ sztMid ].Key, pArray[ pBtm ].Key ); for( ; ; ) { while( pArray[ sztScanL ].Key < Pivot ) sztScanL++; while( Pivot < pArray[ sztScanR ].Key ) sztScanR--; if( sztScanL >= sztScanR ) break; SwapUU( &pArray[ sztScanL ], &pArray[ sztScanR ] ); sztScanL++; sztScanR--; } if( *pAbort ) return 1223; QuickSort2R( pArray, pTop, sztScanL - 1, pAbort ); QuickSort2R( pArray, sztScanR + 1, pBtm, pAbort ); } return 0; } DWORD __stdcall QuickSort2(SortRecStruct* pArray, size_t pRecCount, bool* pAbort) { return QuickSort2R( pArray, 0, ( int ) ( pRecCount - 1 ), pAbort ); } static DWORD __stdcall QuickSortPR(SortRecStruct* pTop, SortRecStruct* pBtm, bool* pAbort) { DWORD Pivot; SortRecStruct *RecScanL, *RecScanR, *RecMid; if( pTop < pBtm ) { RecScanL = pTop; RecScanR = pBtm; RecMid = ( pBtm - pTop ) / 2 + pTop; Pivot = med3( pTop->Key, RecMid->Key, pBtm->Key ); for( ; ; ) { while( RecScanL->Key < Pivot ) RecScanL++; while( Pivot < RecScanR->Key ) RecScanR--; if( RecScanL >= RecScanR ) break; SwapUU( RecScanL, RecScanR ); RecScanL++; RecScanR--; } if( *pAbort ) return 1223; QuickSortPR( pTop, RecScanL - 1, pAbort ); QuickSortPR( RecScanR + 1, pBtm, pAbort ); } return 0; } DWORD __stdcall QuickSortP(SortRecStruct* pArray, size_t pRecCount, bool* pAbort) { return QuickSortPR( pArray, &pArray[ pRecCount - 1 ], pAbort ); }