/* MergeSort.cpp */ #include "stdafx.h" #include "MergeSort.h" #include #pragma optimize( "t", on ) static void __stdcall SwapUU(void* p1, void* p2) { char *c1 = ( char* ) p1, *c2 = ( char* ) p2, cdata; size_t sztRecLength = sizeof( SortRecStruct ); __m128i XmmTemp; DWORD dwTemp; while( sztRecLength >= 16 ) { XmmTemp = _mm_loadu_si128( ( __m128i* ) c1 ); _mm_storeu_si128( ( __m128i* ) c1, _mm_loadu_si128( ( __m128i* ) c2 ) ); _mm_storeu_si128( ( __m128i* ) c2, XmmTemp ); c1 += 16; c2 += 16; sztRecLength -= 16; } if( sztRecLength >= 8 ) { XmmTemp = _mm_loadl_epi64( ( __m128i* ) c1 ); _mm_storel_epi64( ( __m128i* ) c1, _mm_loadl_epi64( ( __m128i* ) c2 ) ); _mm_storel_epi64( ( __m128i* ) c2, XmmTemp ); c1 += 8; c2 += 8; sztRecLength -= 8; } if( sztRecLength >= 4 ) { dwTemp = *( ( DWORD* ) c1 ); *( ( int* ) c1 ) = *( ( DWORD* ) c2 ); *( ( DWORD* ) c2 ) = dwTemp; c1 += 4; c2 += 4; sztRecLength -=4; } while( sztRecLength-- ) { cdata = *c1; *c1++ = *c2; *c2++ = cdata; } } static void __stdcall CopyRecAU(void* pDest, void* pSrc) { char *cDest = ( char* ) pDest, *cSrc = ( char* ) pSrc, cdata; size_t sztRecLength = sizeof( SortRecStruct ); while( sztRecLength >= 16 ) { _mm_store_si128( ( __m128i* ) cDest, _mm_loadu_si128( ( __m128i* ) cSrc ) ); cDest += 16; cSrc += 16; sztRecLength -= 16; } if( sztRecLength >= 8 ) { _mm_storel_epi64( ( __m128i* ) cDest, _mm_loadl_epi64( ( __m128i* ) cSrc ) ); cDest += 8; cSrc += 8; sztRecLength -= 8; } if( sztRecLength >= 4 ) { *( ( int* ) cDest ) = *( ( DWORD* ) cSrc ); cDest += 4; cSrc += 4; sztRecLength -= 4; } while( sztRecLength-- ) { cdata = *cDest; *cDest++ = *cSrc; *cSrc++ = cdata; } } static void __stdcall CopyRecUA(void* pDest, void* pSrc) { char *cDest = ( char* ) pDest, *cSrc = ( char* ) pSrc, cdata; size_t sztRecLength = sizeof( SortRecStruct ); while( sztRecLength >= 16 ) { _mm_storeu_si128( ( __m128i* ) cDest, _mm_load_si128( ( __m128i* ) cSrc ) ); cDest += 16; cSrc += 16; sztRecLength -= 16; } if( sztRecLength >= 8 ) { _mm_storel_epi64( ( __m128i* ) cDest, _mm_loadl_epi64( ( __m128i* ) cSrc ) ); cDest += 8; cSrc += 8; sztRecLength -= 8; } if( sztRecLength >= 4 ) { *( ( int* ) cDest ) = *( ( DWORD* ) cSrc ); cDest += 4; cSrc += 4; sztRecLength -= 4; } while( sztRecLength-- ) { cdata = *cDest; *cDest++ = *cSrc; *cSrc++ = cdata; } } DWORD __stdcall MergeSortA(SortRecStruct* pArray, size_t pRecCount, bool* pAbort) { SortRecStruct *Buffer; size_t sztAllocSize, sztSpan, sztPtr, sztMid, sztBtm, sztDest, sztPtrL, sztPtrR; int nResult = 0; if( pRecCount >= 2 ) { sztAllocSize = sizeof( SortRecStruct ) * pRecCount; Buffer = ( SortRecStruct* ) VirtualAlloc( NULL, sztAllocSize, MEM_COMMIT, PAGE_READWRITE ); if( Buffer ) { sztSpan = 1; do { sztPtr = 0; do { sztMid = min( sztPtr + sztSpan, pRecCount ); sztBtm = min( sztPtr + 2 * sztSpan, pRecCount ); sztDest = sztPtrL = sztPtr; sztPtrR = sztMid; do { if( sztPtrL < sztMid && ( sztPtrR >= sztBtm || pArray[ sztPtrL ].Key <= pArray[ sztPtrR ].Key ) ) CopyRecAU( &Buffer[ sztDest ], &pArray[ sztPtrL++ ] ); else CopyRecAU( &Buffer[ sztDest ], &pArray[ sztPtrR++ ] ); sztDest++; } while( sztDest < sztBtm ); sztPtr = sztSpan * 2 + sztPtr; } while( sztPtr < pRecCount ); memcpy( pArray, Buffer, sztAllocSize ); sztSpan *= 2; if( *pAbort ) return 1223; } while( sztSpan < pRecCount ); VirtualFree( Buffer, sztAllocSize, MEM_DECOMMIT ); VirtualFree( Buffer, 0, MEM_RELEASE ); } else nResult = GetLastError(); } return nResult; } static void __stdcall MergeProcB(SortRecStruct* pArray, SortRecStruct* pTemp, size_t pTop, size_t pMid, size_t pBtm) { size_t sztPtr, sztLeftPtr, sztCount, sztTempDest; sztLeftPtr = pMid - 1; sztTempDest = pTop; sztCount = pBtm - pTop + 1; while( ( pTop <= sztLeftPtr ) && ( pMid <= pBtm ) ) { if( pArray[ pTop ].Key <= pArray[ pMid ].Key ) { CopyRecAU( &pTemp[ sztTempDest ], &pArray[ pTop++ ] ); } else { CopyRecAU( &pTemp[ sztTempDest ], &pArray[ pMid++ ] ); } sztTempDest++; } // while( pTop <= sztLeftPtr ) { CopyRecAU( &pTemp[ sztTempDest++ ], &pArray[ pTop++ ] ); } // while( pMid <= pBtm ) { CopyRecAU( &pTemp[ sztTempDest++ ], &pArray[ pMid++ ] ); } // for( sztPtr = 0; sztPtr < sztCount; pBtm--, sztPtr++ ) { CopyRecUA( &pArray[ pBtm ], &pTemp[ pBtm ] ); } } static void __stdcall DualSort(SortRecStruct* p1, SortRecStruct* p2) { if( p1->Key > p2->Key ) SwapUU( p1, p2 ); } static DWORD __stdcall MergeSortBB(SortRecStruct* pArray, SortRecStruct* pTemp, size_t pTop, size_t pBtm, bool* pAbort) { size_t sztMidPtr; if( pBtm > pTop ) { if( ( pBtm - pTop ) > 1 ) { sztMidPtr = pTop + ( pBtm - pTop ) / 2; MergeSortBB( pArray, pTemp, pTop, sztMidPtr, pAbort ); if( *pAbort ) return 1223; MergeSortBB( pArray, pTemp, sztMidPtr + 1, pBtm, pAbort ); if( *pAbort ) return 1223; MergeProcB( pArray, pTemp, pTop, sztMidPtr + 1, pBtm ); if( *pAbort ) return 1223; } else { while( pTop < pBtm ) { DualSort( &pArray[ pTop ], &pArray[ pBtm ] ); pTop += 2; } } } return 0; } DWORD __stdcall MergeSortBR(SortRecStruct* pArray, size_t pTop, size_t pBtm, bool* pAbort) { SortRecStruct *OutBuff; size_t sztAllocSize; int nStatus, nResult = 0; if( pBtm > pTop ) { sztAllocSize = sizeof( SortRecStruct ) * ( pBtm - pTop + 1 ); OutBuff = ( SortRecStruct* ) VirtualAlloc( NULL, sztAllocSize, MEM_COMMIT, PAGE_READWRITE ); if( OutBuff == NULL ) nResult = GetLastError(); else { nStatus = MergeSortBB( pArray, OutBuff, pTop, pBtm, pAbort ); VirtualFree( OutBuff, sztAllocSize, MEM_DECOMMIT ); VirtualFree( OutBuff, 0, MEM_RELEASE ); if( nStatus ) nResult = nStatus; } } return nResult; } DWORD __stdcall MergeSortB(SortRecStruct* pArray, size_t pRecCount, bool* pAbort) { return MergeSortBR( pArray, 0, pRecCount - 1, pAbort ); } static void __stdcall MergeProcC(SortRecStruct* pTemp, SortRecStruct* pTop, SortRecStruct* pMid, SortRecStruct* pBtm) { SortRecStruct *RecPtr, *RecLeftPtr, *TempDest; size_t sztCount; RecPtr = pTop; RecLeftPtr = pMid - 1; TempDest = pTemp; while( ( RecPtr <= RecLeftPtr ) && ( pMid <= pBtm ) ) { if( RecPtr->Key <= pMid->Key ) { CopyRecAU( TempDest, RecPtr++ ); } else { CopyRecAU( TempDest, pMid++ ); } TempDest++; } // while( RecPtr <= RecLeftPtr ) { CopyRecAU( TempDest++, RecPtr++ ); } // while( pMid <= pBtm ) { CopyRecAU( TempDest++, pMid++ ); } // sztCount = ( pBtm - pTop ) + 1; while( sztCount ) { CopyRecUA( pTop++, pTemp++ ); sztCount--; } } static DWORD __stdcall MergeSortCC(SortRecStruct* pTemp, SortRecStruct* pTop, SortRecStruct* pBtm, bool* pAbort) { SortRecStruct *RecMidPtr; if( pBtm > pTop ) { if( ( pBtm - pTop ) > 1 ) { RecMidPtr = ( pBtm - pTop ) / 2 + pTop; MergeSortCC( pTemp, pTop, RecMidPtr, pAbort ); if( *pAbort ) return 1223; MergeSortCC( pTemp, RecMidPtr + 1, pBtm, pAbort ); if( *pAbort ) return 1223; MergeProcC( pTemp, pTop, RecMidPtr + 1, pBtm ); if( *pAbort ) return 1223; } else { while( pTop < pBtm ) { DualSort( pTop, pBtm ); pTop += 2; } } } return 0; } DWORD __stdcall MergeSortCR(SortRecStruct* pArray, size_t pTop, size_t pBtm, bool* pAbort) { SortRecStruct *OutBuff; size_t sztAllocSize; int nStatus, nResult = 0; if( pBtm > pTop ) { sztAllocSize = sizeof( SortRecStruct ) * ( pBtm - pTop + 1 ); OutBuff = ( SortRecStruct* ) VirtualAlloc( NULL, sztAllocSize, MEM_COMMIT, PAGE_READWRITE ); if( OutBuff == NULL ) nResult = GetLastError(); else { nStatus = MergeSortCC( OutBuff, pArray, &pArray[ pBtm ], pAbort ); VirtualFree( OutBuff, sztAllocSize, MEM_DECOMMIT ); VirtualFree( OutBuff, 0, MEM_RELEASE ); if( nStatus ) nResult = nStatus; } } return nResult; } DWORD __stdcall MergeSortC(SortRecStruct* pArray, size_t pRecCount, bool* pAbort) { return MergeSortCR( pArray, 0, pRecCount - 1, pAbort ); }