I used exactly the same compiler (g++ 4.1.2) and there was a difference
in the generated assembly listing (my architecture is 32 bits, intel
pentium 4)
Could you post the generated assembly when NO_ALIASING_OPTIMIZATION is
defined ?
Sure (here's the 32-bit version, as you might be able to compare it better):
$ g++ -m32 -O3 -S scratch.cpp
$ cat scratch.s
.file "scratch.cpp"
.section .ctors,"aw",@progbits
.align 4
.long _GLOBAL__I__Z6smoothPiS_
.text
.align 2
.p2align 4,,15
..globl _Z6smoothPiS_
.type _Z6smoothPiS_, @function
_Z6smoothPiS_:
..LFB1435:
pushl %ebp
..LCFI0:
movl %esp, %ebp
..LCFI1:
movl 12(%ebp), %edx
pushl %esi
..LCFI2:
movl 8(%ebp), %esi
pushl %ebx
..LCFI3:
movl 8(%edx), %eax
leal 8(%edx), %ebx
addl 4(%edx), %eax
addl (%edx), %eax
leal 4(%edx), %ecx
movl %eax, (%esi)
movl 4(%ebx), %eax
addl 4(%ecx), %eax
addl 4(%edx), %eax
movl %eax, 4(%esi)
movl 8(%edx), %eax
addl 8(%ecx), %eax
addl 8(%ebx), %eax
movl %eax, 8(%esi)
movl 12(%edx), %eax
addl 12(%ecx), %eax
addl 12(%ebx), %eax
movl %eax, 12(%esi)
movl 16(%edx), %eax
addl 16(%ecx), %eax
addl 16(%ebx), %eax
movl %eax, 16(%esi)
movl 20(%edx), %eax
addl 20(%ecx), %eax
addl 20(%ebx), %eax
movl %eax, 20(%esi)
movl 24(%edx), %eax
addl 24(%ecx), %eax
addl 24(%ebx), %eax
movl %eax, 24(%esi)
movl 28(%edx), %eax
addl 28(%ecx), %eax
addl 28(%ebx), %eax
movl %eax, 28(%esi)
movl 32(%edx), %eax
addl 32(%ecx), %eax
addl 32(%ebx), %eax
movl %eax, 32(%esi)
movl 36(%edx), %eax
addl 36(%ecx), %eax
addl 36(%ebx), %eax
movl %eax, 36(%esi)
movl 40(%edx), %eax
addl 40(%ecx), %eax
addl 40(%ebx), %eax
movl %eax, 40(%esi)
movl 44(%edx), %eax
addl 44(%ecx), %eax
addl 44(%ebx), %eax
movl %eax, 44(%esi)
movl 48(%edx), %eax
addl 48(%ecx), %eax
addl 48(%ebx), %eax
movl %eax, 48(%esi)
movl 52(%edx), %eax
addl 52(%ecx), %eax
addl 52(%ebx), %eax
movl %eax, 52(%esi)
movl 56(%edx), %eax
addl 56(%ecx), %eax
addl 56(%ebx), %eax
movl %eax, 56(%esi)
movl 60(%edx), %eax
addl 60(%ecx), %eax
addl 60(%ebx), %eax
movl %eax, 60(%esi)
movl 64(%edx), %eax
addl 64(%ecx), %eax
addl 64(%ebx), %eax
movl %eax, 64(%esi)
popl %ebx
popl %esi
popl %ebp
ret
..LFE1435:
.size _Z6smoothPiS_, .-_Z6smoothPiS_
..globl __gxx_personality_v0
.align 2
.p2align 4,,15
..globl _Z4fillPi
.type _Z4fillPi, @function
_Z4fillPi:
..LFB1436:
pushl %ebp
..LCFI4:
xorl %eax, %eax
movl %esp, %ebp
..LCFI5:
movl 8(%ebp), %edx
.p2align 4,,7
..L4:
movl %eax, (%edx,%eax,4)
addl $1, %eax
cmpl $50000, %eax
jne .L4
popl %ebp
ret
..LFE1436:
.size _Z4fillPi, .-_Z4fillPi
.align 2
.p2align 4,,15
.type _Z41__static_initialization_and_destruction_0ii, @function
_Z41__static_initialization_and_destruction_0ii:
..LFB1591:
pushl %ebp
..LCFI6:
movl %esp, %ebp
..LCFI7:
subl $24, %esp
..LCFI8:
subl $1, %eax
je .L15
..L14:
leave
ret
.p2align 4,,7
..L15:
cmpl $65535, %edx
jne .L14
movl $_ZSt8__ioinit, (%esp)
call _ZNSt8ios_base4InitC1Ev
movl $__dso_handle, 8(%esp)
movl $0, 4(%esp)
movl $__tcf_0, (%esp)
call __cxa_atexit
leave
ret
..LFE1591:
.size _Z41__static_initialization_and_destruction_0ii, .-_Z41__static_initialization_and_destruction_0ii
.align 2
.p2align 4,,15
.type _GLOBAL__I__Z6smoothPiS_, @function
_GLOBAL__I__Z6smoothPiS_:
..LFB1593:
pushl %ebp
..LCFI9:
movl $65535, %edx
movl %esp, %ebp
..LCFI10:
movl $1, %eax
popl %ebp
jmp _Z41__static_initialization_and_destruction_0ii
..LFE1593:
.size _GLOBAL__I__Z6smoothPiS_, .-_GLOBAL__I__Z6smoothPiS_
.align 2
.p2align 4,,15
.type __tcf_0, @function
__tcf_0:
..LFB1592:
pushl %ebp
..LCFI11:
movl %esp, %ebp
..LCFI12:
movl $_ZSt8__ioinit, 8(%ebp)
popl %ebp
jmp _ZNSt8ios_base4InitD1Ev
..LFE1592:
.size __tcf_0, .-__tcf_0
.section .rodata.str1.1,"aMS",@progbits,1
..LC0:
.string "Time smooth(): "
..LC3:
.string " ms\n"
.section .rodata.cst4,"aM",@progbits,4
.align 4
..LC1:
.long 1232348160
.align 4
..LC2:
.long 1148846080
.text
.align 2
.p2align 4,,15
..globl main
.type main, @function
main:
..LFB1437:
leal 4(%esp), %ecx
..LCFI13:
andl $-16, %esp
pushl -4(%ecx)
..LCFI14:
pushl %ebp
..LCFI15:
movl %esp, %ebp
..LCFI16:
pushl %edi
..LCFI17:
pushl %esi
..LCFI18:
pushl %ebx
..LCFI19:
pushl %ecx
..LCFI20:
subl $400024, %esp
..LCFI21:
leal -200016(%ebp), %esi
movl $200000, 8(%esp)
leal -400016(%ebp), %edi
movl $0, 4(%esp)
movl %esi, (%esp)
call memset
movl $200000, 8(%esp)
movl $0, 4(%esp)
movl %edi, (%esp)
call memset
xorl %eax, %eax
.p2align 4,,7
..L21:
movl %eax, (%esi,%eax,4)
addl $1, %eax
cmpl $50000, %eax
jne .L21
movl %edi, 4(%esp)
xorl %ebx, %ebx
movl %edi, (%esp)
call _Z6smoothPiS_
call clock
movl %eax, -400020(%ebp)
.p2align 4,,7
..L23:
movl %esi, 4(%esp)
addl $1, %ebx
movl %edi, (%esp)
call _Z6smoothPiS_
cmpl $100000000, %ebx
jne .L23
call clock
movl $.LC0, 4(%esp)
movl $_ZSt4cout, (%esp)
movl %eax, %ebx
call _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
subl -400020(%ebp), %ebx
pushl %ebx
fildl (%esp)
addl $4, %esp
fdivs .LC1
movl %eax, (%esp)
fmuls .LC2
fstpl 4(%esp)
call _ZNSolsEd
movl $.LC3, 4(%esp)
movl %eax, (%esp)
call _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_PKc
movl -400016(%ebp), %eax
movl $_ZSt4cout, (%esp)
movl %eax, 4(%esp)
call _ZNSolsEi
addl $400024, %esp
xorl %eax, %eax
popl %ecx
popl %ebx
popl %esi
popl %edi
popl %ebp
leal -4(%ecx), %esp
ret
..LFE1437:
.size main, .-main
.local _ZSt8__ioinit
.comm _ZSt8__ioinit,1,1
.weakref _Z20__gthrw_pthread_oncePiPFvvE,pthread_once
.weakref _Z27__gthrw_pthread_getspecificj,pthread_getspecific
.weakref _Z27__gthrw_pthread_setspecificjPKv,pthread_setspecific
.weakref _Z22__gthrw_pthread_createPmPK14pthread_attr_tPFPvS3_ES3_,pthread_create
.weakref _Z22__gthrw_pthread_cancelm,pthread_cancel
.weakref _Z26__gthrw_pthread_mutex_lockP15pthread_mutex_t,pthread_mutex_lock
.weakref _Z29__gthrw_pthread_mutex_trylockP15pthread_mutex_t,pthread_mutex_trylock
.weakref _Z28__gthrw_pthread_mutex_unlockP15pthread_mutex_t,pthread_mutex_unlock
.weakref _Z26__gthrw_pthread_mutex_initP15pthread_mutex_tPK19pthread_mutexattr_t,pthread_mutex_init
.weakref _Z26__gthrw_pthread_key_createPjPFvPvE,pthread_key_create
.weakref _Z26__gthrw_pthread_key_deletej,pthread_key_delete
.weakref _Z30__gthrw_pthread_mutexattr_initP19pthread_mutexattr_t,pthread_mutexattr_init
.weakref _Z33__gthrw_pthread_mutexattr_settypeP19pthread_mutexattr_ti,pthread_mutexattr_settype
.weakref _Z33__gthrw_pthread_mutexattr_destroyP19pthread_mutexattr_t,pthread_mutexattr_destroy
.section .eh_frame,"a",@progbits
..Lframe1:
.long .LECIE1-.LSCIE1
..LSCIE1:
.long 0x0
.byte 0x1
.string "zP"
.uleb128 0x1
.sleb128 -4
.byte 0x8
.uleb128 0x5
.byte 0x0
.long __gxx_personality_v0
.byte 0xc
.uleb128 0x4
.uleb128 0x4
.byte 0x88
.uleb128 0x1
.align 4
..LECIE1:
..LSFDE5:
.long .LEFDE5-.LASFDE5
..LASFDE5:
.long .LASFDE5-.Lframe1
.long .LFB1591
.long .LFE1591-.LFB1591
.uleb128 0x0
.byte 0x4
.long .LCFI6-.LFB1591
.byte 0xe
.uleb128 0x8
.byte 0x85
.uleb128 0x2
.byte 0x4
.long .LCFI7-.LCFI6
.byte 0xd
.uleb128 0x5
.align 4
..LEFDE5:
..LSFDE11:
.long .LEFDE11-.LASFDE11
..LASFDE11:
.long .LASFDE11-.Lframe1
.long .LFB1437
.long .LFE1437-.LFB1437
.uleb128 0x0
.byte 0x4
.long .LCFI13-.LFB1437
.byte 0xc
.uleb128 0x1
.uleb128 0x0
.byte 0x9
.uleb128 0x4
.uleb128 0x1
.byte 0x4
.long .LCFI14-.LCFI13
.byte 0xc
.uleb128 0x4
.uleb128 0x4
.byte 0x4
.long .LCFI15-.LCFI14
.byte 0xe
.uleb128 0x8
.byte 0x85
.uleb128 0x2
.byte 0x4
.long .LCFI16-.LCFI15
.byte 0xd
.uleb128 0x5
.byte 0x4
.long .LCFI20-.LCFI16
.byte 0x84
.uleb128 0x6
.byte 0x83
.uleb128 0x5
.byte 0x86
.uleb128 0x4
.byte 0x87
.uleb128 0x3
.align 4
..LEFDE11:
.ident "GCC: (GNU) 4.1.2 20070626 (Red Hat 4.1.2-14)"
.section .note.GNU-stack,"",@progbits