Visual C++ versus new T[N] { 0 }

auto const p = std::unique_ptr(new unsigned char[1024 * 50] { 0 });

link.exe зависает, кушая при этом процессор.

auto const p = std::unique_ptr(new unsigned char[1024 * 50] { 0 });

Результат: fatal error C1063: compiler limit : compiler stack overflow

auto const p = std::unique_ptr(new unsigned char[1024] { 0 });

Сгенерированные инструкции:

00007FF6D1CF1A3A  call        operator new[] (07FF6D1CF1E30h)  
00007FF6D1CF1A3F  xor         ecx,ecx  
00007FF6D1CF1A41  test        rax,rax  
00007FF6D1CF1A44  je          wmain+383h (07FF6D1CF1D9Bh)  
00007FF6D1CF1A4A  mov         qword ptr [rax],rcx  
00007FF6D1CF1A4D  mov         qword ptr [rax+8],rcx  
00007FF6D1CF1A51  mov         qword ptr [rax+10h],rcx  
00007FF6D1CF1A55  mov         qword ptr [rax+18h],rcx  
00007FF6D1CF1A59  mov         qword ptr [rax+20h],rcx  
00007FF6D1CF1A5D  mov         qword ptr [rax+28h],rcx  
00007FF6D1CF1A61  mov         qword ptr [rax+30h],rcx  
00007FF6D1CF1A65  mov         qword ptr [rax+38h],rcx  
00007FF6D1CF1A69  mov         qword ptr [rax+40h],rcx  
00007FF6D1CF1A6D  mov         qword ptr [rax+48h],rcx  
00007FF6D1CF1A71  mov         qword ptr [rax+50h],rcx  
00007FF6D1CF1A75  mov         qword ptr [rax+58h],rcx  
00007FF6D1CF1A79  mov         qword ptr [rax+60h],rcx  
00007FF6D1CF1A7D  mov         qword ptr [rax+68h],rcx  
00007FF6D1CF1A81  mov         qword ptr [rax+70h],rcx  
00007FF6D1CF1A85  mov         qword ptr [rax+78h],rcx  
00007FF6D1CF1A89  mov         qword ptr [rax+80h],rcx  

Заполнение пасяти нулями шмомпилятор от Быдлософт развернул в 128 инструкций mov.

То же, но компилятором Intel C++:

00007FF755B1102B  call        operator new[] (07FF755B13C50h)  
00007FF755B11053  call        _intel_fast_memcpy (07FF755B11B50h)  

_intel_fast_memcpy для моего Core i5 2XXX выбрала реализацию с циклом следующего вида:

00007FF755B132A0  movdqa      xmm0,xmmword ptr [rdx]  
00007FF755B132A4  movdqa      xmm1,xmmword ptr [rdx+10h]  
00007FF755B132A9  movdqa      xmmword ptr [rcx],xmm0  
00007FF755B132AD  movdqa      xmmword ptr [rcx+10h],xmm1  
00007FF755B132B2  lea         r8,[r8-80h]  
00007FF755B132B6  movdqa      xmm2,xmmword ptr [rdx+20h]  
00007FF755B132BB  movdqa      xmm3,xmmword ptr [rdx+30h]  
00007FF755B132C0  movdqa      xmmword ptr [rcx+20h],xmm2  
00007FF755B132C5  movdqa      xmmword ptr [rcx+30h],xmm3  
00007FF755B132CA  movdqa      xmm0,xmmword ptr [rdx+40h]  
00007FF755B132CF  movdqa      xmm1,xmmword ptr [rdx+50h]  
00007FF755B132D4  cmp         r8,0A8h  
00007FF755B132DB  movdqa      xmmword ptr [rcx+40h],xmm0  
00007FF755B132E0  movdqa      xmmword ptr [rcx+50h],xmm1  
00007FF755B132E5  movdqa      xmm2,xmmword ptr [rdx+60h]  
00007FF755B132EA  movdqa      xmm3,xmmword ptr [rdx+70h]  
00007FF755B132EF  lea         rdx,[rdx+80h]  
00007FF755B132F6  movdqa      xmmword ptr [rcx+60h],xmm2  
00007FF755B132FB  movdqa      xmmword ptr [rcx+70h],xmm3  
00007FF755B13300  lea         rcx,[rcx+80h]  
00007FF755B13307  jge         __intel_memcpy+0E90h (07FF755B132A0h)  

А если выбрать Favor Small Code в настройках компилятора, то заполнение нулями превращается в ожидаемый и привычный rep movs:

00007FF615041053  rep movs    qword ptr [rdi],qword ptr [rsi]

WinDbg: find probable CONTEXT records

This script finds and pretty prints all probable CONTEXT struct instances throughout x64 process address space:

0:000> .foreach ( CxrPtr { s -[w1]b 0x00000000000000000 L?FFFFFFFFFFFFFFFF 2b 00 2b 00 53 00 2b 00 } ) { .cxr ${CxrPtr}-@@(#FIELD_OFFSET(ntdll!_CONTEXT, SegDs)) }
rax=000000000f2907e0 rbx=00000001420b70f0 rcx=0000000010c3d130
rdx=0000000010c3cad8 rsi=00000001420b7d08 rdi=000000013fda9cb0
rip=000007fe99e71cc9 rsp=0000000010c3e850 rbp=0000000010c3e870
 r8=0000000010c2a000  r9=000000000f2907e0 r10=000007fef6bd6738
r11=0000000000000001 r12=0000000140e4fb00 r13=000000033fcc69f8
r14=0000000010c3f098 r15=0000000000000004
iopl=0         nv up ei pl nz na pe nc
cs=0033  ss=002b  ds=002b  es=002b  fs=0053  gs=002b             efl=00000202
000007fe`99e71cc9 8a4510          mov     al,byte ptr [rbp+10h] ss:00000000`10c3e880=00


rax=000000004685a478 rbx=0000000241d02878 rcx=0000000000000000
rdx=0000000000000000 rsi=0000000241c9d708 rdi=0000000241d02838
rip=000007fe9d9d39f4 rsp=000000004685a450 rbp=000000004685a4a0
 r8=0000000441b49850  r9=0000000000000000 r10=000007fe9b1e1ac0
r11=0000000441b49870 r12=0000000241c90e88 r13=000007fe9b299448
r14=00000001406cc858 r15=0000000441b24af0
iopl=0         nv up ei pl nz na po nc
cs=0033  ss=002b  ds=002b  es=002b  fs=0053  gs=002b             efl=00010206
000007fe`9d9d39f4 803900          cmp     byte ptr [rcx],0 ds:00000000`00000000=??

Then you would normally use RIP and RSP registers to find relevant code and thread context:

0:000> !IP2MD 000007fe99e71cc9 
MethodDesc:   000007fe98e229c0
Method Name:  Replay.Core.Implementation.AutomaticUpdate.PatchDetector.IsPatched(System.Diagnostics.FileVersionInfo)
Class:        000007fe98dec4a0
MethodTable:  000007fe98e22a70
mdToken:      0000000006000463
Module:       000007fe988acb20
IsJitted:     yes
CodeAddr:     000007fe99e71c50
Transparency: Critical

0:000> !IP2MD 000007fefde1940d  
Failed to request MethodData, not in JIT code range
0:000> ln 000007fefde1940d
(000007fe`fde193d0)   KERNELBASE!RaiseException+0x39   |  (000007fe`fde19420)   KERNELBASE!CreateMutexExW

0:000> !address 000000004685a450

Mapping file section regions...
Mapping module regions...
Mapping PEB regions...
Mapping TEB and stack regions...
Mapping heap regions...
Mapping page heap regions...
Mapping other regions...
Mapping stack trace database regions...
Mapping activation context regions...

Usage:                  Stack
Base Address:           00000000`46852000
End Address:            00000000`46860000
Region Size:            00000000`0000e000
State:                  00001000 MEM_COMMIT
Protect:                00000004 PAGE_READWRITE
Type:                   00020000 MEM_PRIVATE
Allocation Base:        00000000`46460000
Allocation Protect:     00000004 PAGE_READWRITE
More info:              ~88k

0:000> ~88k
Child-SP          RetAddr           Call Site
00000000`4685ed48 000007fe`fde110dc ntdll!NtWaitForSingleObject+0xa
00000000`4685ed50 000007fe`f7e89622 KERNELBASE!WaitForSingleObjectEx+0x79
00000000`4685edf0 000007fe`f7e89841 clr!CLRSemaphore::Wait+0x8a
00000000`4685eeb0 000007fe`f7e897ec clr!ThreadpoolMgr::UnfairSemaphore::Wait+0x134
00000000`4685eef0 000007fe`f7d733de clr!ThreadpoolMgr::WorkerThreadStart+0x204
00000000`4685efb0 00000000`77a959ed clr!Thread::intermediateThreadProc+0x7d
00000000`4685fb70 00000000`77ccc541 kernel32!BaseThreadInitThunk+0xd
00000000`4685fba0 00000000`00000000 ntdll!RtlUserThreadStart+0x1d

March 13, 2015

