Hungry Mind , Blog about everything in IT - C#, Java, C++, .NET, Windows, WinAPI, ...

Visual C++ versus new T[N] { 0 }

auto const p = std::unique_ptr(new unsigned char[1024 * 50] { 0 });

link.exe зависает, кушая при этом процессор.

auto const p = std::unique_ptr(new unsigned char[1024 * 50] { 0 });

Результат: fatal error C1063: compiler limit : compiler stack overflow

auto const p = std::unique_ptr(new unsigned char[1024] { 0 });

Сгенерированные инструкции:

00007FF6D1CF1A3A  call        operator new[] (07FF6D1CF1E30h)  
00007FF6D1CF1A3F  xor         ecx,ecx  
00007FF6D1CF1A41  test        rax,rax  
00007FF6D1CF1A44  je          wmain+383h (07FF6D1CF1D9Bh)  
00007FF6D1CF1A4A  mov         qword ptr [rax],rcx  
00007FF6D1CF1A4D  mov         qword ptr [rax+8],rcx  
00007FF6D1CF1A51  mov         qword ptr [rax+10h],rcx  
00007FF6D1CF1A55  mov         qword ptr [rax+18h],rcx  
00007FF6D1CF1A59  mov         qword ptr [rax+20h],rcx  
00007FF6D1CF1A5D  mov         qword ptr [rax+28h],rcx  
00007FF6D1CF1A61  mov         qword ptr [rax+30h],rcx  
00007FF6D1CF1A65  mov         qword ptr [rax+38h],rcx  
00007FF6D1CF1A69  mov         qword ptr [rax+40h],rcx  
00007FF6D1CF1A6D  mov         qword ptr [rax+48h],rcx  
00007FF6D1CF1A71  mov         qword ptr [rax+50h],rcx  
00007FF6D1CF1A75  mov         qword ptr [rax+58h],rcx  
00007FF6D1CF1A79  mov         qword ptr [rax+60h],rcx  
00007FF6D1CF1A7D  mov         qword ptr [rax+68h],rcx  
00007FF6D1CF1A81  mov         qword ptr [rax+70h],rcx  
00007FF6D1CF1A85  mov         qword ptr [rax+78h],rcx  
00007FF6D1CF1A89  mov         qword ptr [rax+80h],rcx  
...

Заполнение пасяти нулями шмомпилятор от Быдлософт развернул в 128 инструкций mov.

То же, но компилятором Intel C++:

00007FF755B1102B  call        operator new[] (07FF755B13C50h)  
...
00007FF755B11053  call        _intel_fast_memcpy (07FF755B11B50h)  

_intel_fast_memcpy для моего Core i5 2XXX выбрала реализацию с циклом следующего вида:

00007FF755B132A0  movdqa      xmm0,xmmword ptr [rdx]  
00007FF755B132A4  movdqa      xmm1,xmmword ptr [rdx+10h]  
00007FF755B132A9  movdqa      xmmword ptr [rcx],xmm0  
00007FF755B132AD  movdqa      xmmword ptr [rcx+10h],xmm1  
00007FF755B132B2  lea         r8,[r8-80h]  
00007FF755B132B6  movdqa      xmm2,xmmword ptr [rdx+20h]  
00007FF755B132BB  movdqa      xmm3,xmmword ptr [rdx+30h]  
00007FF755B132C0  movdqa      xmmword ptr [rcx+20h],xmm2  
00007FF755B132C5  movdqa      xmmword ptr [rcx+30h],xmm3  
00007FF755B132CA  movdqa      xmm0,xmmword ptr [rdx+40h]  
00007FF755B132CF  movdqa      xmm1,xmmword ptr [rdx+50h]  
00007FF755B132D4  cmp         r8,0A8h  
00007FF755B132DB  movdqa      xmmword ptr [rcx+40h],xmm0  
00007FF755B132E0  movdqa      xmmword ptr [rcx+50h],xmm1  
00007FF755B132E5  movdqa      xmm2,xmmword ptr [rdx+60h]  
00007FF755B132EA  movdqa      xmm3,xmmword ptr [rdx+70h]  
00007FF755B132EF  lea         rdx,[rdx+80h]  
00007FF755B132F6  movdqa      xmmword ptr [rcx+60h],xmm2  
00007FF755B132FB  movdqa      xmmword ptr [rcx+70h],xmm3  
00007FF755B13300  lea         rcx,[rcx+80h]  
00007FF755B13307  jge         __intel_memcpy+0E90h (07FF755B132A0h)  

А если выбрать Favor Small Code в настройках компилятора, то заполнение нулями превращается в ожидаемый и привычный rep movs:

...
00007FF615041053  rep movs    qword ptr [rdi],qword ptr [rsi]
Copyright 2007-2011 Chabster