Windows 7: overshoot C++ std::this_thread::sleep_for
There is quite a bit of additional code executed if using sleep_for vs SleepEx.
For example calling SleepEx(15) generates the following assembly in debug mode (Visual Studio 2015):
; 9 : SleepEx(15, false); mov esi, esp push 0 push 15 ; 0000000fH call DWORD PTR __imp__SleepEx@8 cmp esi, esp call __RTC_CheckEsp
By contrast this code
const std::chrono::milliseconds duration(15);std::this_thread::sleep_for(duration);
Generates the following:
; 9 : std::this_thread::sleep_for(std::chrono::milliseconds(15)); mov DWORD PTR $T1[ebp], 15 ; 0000000fH lea eax, DWORD PTR $T1[ebp] push eax lea ecx, DWORD PTR $T2[ebp] call duration push eax call sleep_for add esp, 4
This calls into:
duration PROC ; std::chrono::duration<__int64,std::ratio<1,1000> >::duration<__int64,std::ratio<1,1000> ><int,void>, COMDAT; _this$ = ecx; 113 : { // construct from representation push ebp mov ebp, esp sub esp, 204 ; 000000ccH push ebx push esi push edi push ecx lea edi, DWORD PTR [ebp-204] mov ecx, 51 ; 00000033H mov eax, -858993460 ; ccccccccH rep stosd pop ecx mov DWORD PTR _this$[ebp], ecx; 112 : : _MyRep(static_cast<_Rep>(_Val)) mov eax, DWORD PTR __Val$[ebp] mov eax, DWORD PTR [eax] cdq mov ecx, DWORD PTR _this$[ebp] mov DWORD PTR [ecx], eax mov DWORD PTR [ecx+4], edx; 114 : } mov eax, DWORD PTR _this$[ebp] pop edi pop esi pop ebx mov esp, ebp pop ebp ret 4duration ENDP
And calls into
sleep_for PROC ; std::this_thread::sleep_for<__int64,std::ratio<1,1000> >, COMDAT ; 151 : { // sleep for duration push ebp mov ebp, esp sub esp, 268 ; 0000010cH push ebx push esi push edi lea edi, DWORD PTR [ebp-268] mov ecx, 67 ; 00000043H mov eax, -858993460 ; ccccccccH rep stosd mov eax, DWORD PTR ___security_cookie xor eax, ebp mov DWORD PTR __$ArrayPad$[ebp], eax ; 152 : stdext::threads::xtime _Tgt = _To_xtime(_Rel_time); mov eax, DWORD PTR __Rel_time$[ebp] push eax lea ecx, DWORD PTR $T1[ebp] push ecx call to_xtime add esp, 8 mov edx, DWORD PTR [eax] mov DWORD PTR $T2[ebp], edx mov ecx, DWORD PTR [eax+4] mov DWORD PTR $T2[ebp+4], ecx mov edx, DWORD PTR [eax+8] mov DWORD PTR $T2[ebp+8], edx mov eax, DWORD PTR [eax+12] mov DWORD PTR $T2[ebp+12], eax mov ecx, DWORD PTR $T2[ebp] mov DWORD PTR __Tgt$[ebp], ecx mov edx, DWORD PTR $T2[ebp+4] mov DWORD PTR __Tgt$[ebp+4], edx mov eax, DWORD PTR $T2[ebp+8] mov DWORD PTR __Tgt$[ebp+8], eax mov ecx, DWORD PTR $T2[ebp+12] mov DWORD PTR __Tgt$[ebp+12], ecx ; 153 : sleep_until(&_Tgt); lea eax, DWORD PTR __Tgt$[ebp] push eax call sleep_until add esp, 4 ; 154 : } push edx mov ecx, ebp push eax lea edx, DWORD PTR $LN5@sleep_for call @_RTC_CheckStackVars@8 pop eax pop edx pop edi pop esi pop ebx mov ecx, DWORD PTR __$ArrayPad$[ebp] xor ecx, ebp call @__security_check_cookie@4 add esp, 268 ; 0000010cH cmp ebp, esp call __RTC_CheckEsp mov esp, ebp pop ebp ret 0 npad 3 $LN5@sleep_for: DD 1 DD $LN4@sleep_for $LN4@sleep_for: DD -24 ; ffffffe8H DD 16 ; 00000010H DD $LN3@sleep_for $LN3@sleep_for: DB 95 ; 0000005fH DB 84 ; 00000054H DB 103 ; 00000067H DB 116 ; 00000074H DB 0 sleep_for ENDP
Some conversion happens:
to_xtime PROC ; std::_To_xtime<__int64,std::ratio<1,1000> >, COMDAT; 758 : { // convert duration to xtime push ebp mov ebp, esp sub esp, 348 ; 0000015cH push ebx push esi push edi lea edi, DWORD PTR [ebp-348] mov ecx, 87 ; 00000057H mov eax, -858993460 ; ccccccccH rep stosd mov eax, DWORD PTR ___security_cookie xor eax, ebp mov DWORD PTR __$ArrayPad$[ebp], eax; 759 : xtime _Xt;; 760 : if (_Rel_time <= chrono::duration<_Rep, _Period>::zero()) lea eax, DWORD PTR $T7[ebp] push eax call duration_zero ; std::chrono::duration<__int64,std::ratio<1,1000> >::zero add esp, 4 push eax mov ecx, DWORD PTR __Rel_time$[ebp] push ecx call chronos_operator ; std::chrono::operator<=<__int64,std::ratio<1,1000>,__int64,std::ratio<1,1000> > add esp, 8 movzx edx, al test edx, edx je SHORT $LN2@To_xtime; 761 : { // negative or zero relative time, return zero; 762 : _Xt.sec = 0; xorps xmm0, xmm0 movlpd QWORD PTR __Xt$[ebp], xmm0; 763 : _Xt.nsec = 0; mov DWORD PTR __Xt$[ebp+8], 0; 764 : }; 765 : else jmp $LN3@To_xtime$LN2@To_xtime:; 766 : { // positive relative time, convert; 767 : chrono::nanoseconds _T0 =; 768 : chrono::system_clock::now().time_since_epoch(); lea eax, DWORD PTR $T5[ebp] push eax lea ecx, DWORD PTR $T6[ebp] push ecx call system_clock_now ; std::chrono::system_clock::now add esp, 4 mov ecx, eax call time_since_ephoch ; std::chrono::time_point<std::chrono::system_clock,std::chrono::duration<__int64,std::ratio<1,10000000> > >::time_since_epoch push eax lea ecx, DWORD PTR __T0$8[ebp] call duration ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,10000000>,void>; 769 : _T0 += _Rel_time; mov eax, DWORD PTR __Rel_time$[ebp] push eax lea ecx, DWORD PTR $T4[ebp] call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,1000>,void> lea ecx, DWORD PTR $T4[ebp] push ecx lea ecx, DWORD PTR __T0$8[ebp] call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::operator+=; 770 : _Xt.sec = chrono::duration_cast<chrono::seconds>(_T0).count(); lea eax, DWORD PTR __T0$8[ebp] push eax lea ecx, DWORD PTR $T3[ebp] push ecx call duration_cast ; std::chrono::duration_cast<std::chrono::duration<__int64,std::ratio<1,1> >,__int64,std::ratio<1,1000000000> > add esp, 8 mov ecx, eax call duration_count ; std::chrono::duration<__int64,std::ratio<1,1> >::count mov DWORD PTR __Xt$[ebp], eax mov DWORD PTR __Xt$[ebp+4], edx; 771 : _T0 -= chrono::seconds(_Xt.sec); lea eax, DWORD PTR __Xt$[ebp] push eax lea ecx, DWORD PTR $T1[ebp] call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1> >::duration<__int64,std::ratio<1,1> ><__int64,void> push eax lea ecx, DWORD PTR $T2[ebp] call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::duration<__int64,std::ratio<1,1000000000> ><__int64,std::ratio<1,1>,void> lea ecx, DWORD PTR $T2[ebp] push ecx lea ecx, DWORD PTR __T0$8[ebp] call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::operator-=; 772 : _Xt.nsec = (long)_T0.count(); lea ecx, DWORD PTR __T0$8[ebp] call duration_ratio ; std::chrono::duration<__int64,std::ratio<1,1000000000> >::count mov DWORD PTR __Xt$[ebp+8], eax$LN3@To_xtime:; 773 : }; 774 : return (_Xt); mov eax, DWORD PTR $T9[ebp] mov ecx, DWORD PTR __Xt$[ebp] mov DWORD PTR [eax], ecx mov edx, DWORD PTR __Xt$[ebp+4] mov DWORD PTR [eax+4], edx mov ecx, DWORD PTR __Xt$[ebp+8] mov DWORD PTR [eax+8], ecx mov edx, DWORD PTR __Xt$[ebp+12] mov DWORD PTR [eax+12], edx mov eax, DWORD PTR $T9[ebp]; 775 : } push edx mov ecx, ebp push eax lea edx, DWORD PTR $LN8@To_xtime call @_RTC_CheckStackVars@8 pop eax pop edx pop edi pop esi pop ebx mov ecx, DWORD PTR __$ArrayPad$[ebp] xor ecx, ebp call @__security_check_cookie@4 add esp, 348 ; 0000015cH cmp ebp, esp call __RTC_CheckEsp mov esp, ebp pop ebp ret 0$LN8@To_xtime: DD 2 DD $LN7@To_xtime$LN7@To_xtime: DD -24 ; ffffffe8H DD 16 ; 00000010H DD $LN5@To_xtime DD -40 ; ffffffd8H DD 8 DD $LN6@To_xtime$LN6@To_xtime: DB 95 ; 0000005fH DB 84 ; 00000054H DB 48 ; 00000030H DB 0$LN5@To_xtime: DB 95 ; 0000005fH DB 88 ; 00000058H DB 116 ; 00000074H DB 0to_xtime ENDP
Eventually the imported function gets called, the same one SleepEx has used.
sleep_until PROC ; std::this_thread::sleep_until, COMDAT; 131 : { // sleep until _Abs_time push ebp mov ebp, esp sub esp, 192 ; 000000c0H push ebx push esi push edi lea edi, DWORD PTR [ebp-192] mov ecx, 48 ; 00000030H mov eax, -858993460 ; ccccccccH rep stosd; 132 : _Thrd_sleep(_Abs_time); mov esi, esp mov eax, DWORD PTR __Abs_time$[ebp] push eax call DWORD PTR __imp___Thrd_sleep add esp, 4 cmp esi, esp call __RTC_CheckEsp; 133 : } pop edi pop esi pop ebx add esp, 192 ; 000000c0H cmp ebp, esp call __RTC_CheckEsp mov esp, ebp pop ebp ret 0sleep_until ENDP
You should also be aware even SleepEx may not give 100% exact results as per the MSDN documentation https://msdn.microsoft.com/en-us/library/windows/desktop/ms686307(v=vs.85).aspx
This function causes a thread to relinquish the remainder of its time slice and become unrunnable for an interval based on the value of dwMilliseconds. The system clock "ticks" at a constant rate. If dwMilliseconds is less than the resolution of the system clock, the thread may sleep for less than the specified length of time. If dwMilliseconds is greater than one tick but less than two, the wait can be anywhere between one and two ticks, and so on. To increase the accuracy of the sleep interval, call the timeGetDevCaps function to determine the supported minimum timer resolution and the timeBeginPeriod function to set the timer resolution to its minimum. Use caution when calling timeBeginPeriod, as frequent calls can significantly affect the system clock, system power usage, and the scheduler. If you call timeBeginPeriod, call it one time early in the application and be sure to call the timeEndPeriod function at the very end of the application.