S
sh.vipin
Below is a piece of bit manipulation code. here in function
"eval_output_out" there are some macros.
Say we have to versions of the same code
1. variable "b" in "eval_output_out" is initialized
2. variable "b" in "eval_output_out" is NOT initialized
code expands such that it is independent of past value of variable
"b", i dumped pre processed output of two versions and there was just
one difference of b initialized in one and not in other.
QUERY IS : why version 1 runs faster than 2 with -O3 option of gcc.
if i just dump the assmebly of two versions using "gcc -S" behavior is
expected which is version #1 (with initialization) has one extra
instruction to initialize variable.
But when i dump the assembly of two versions with "gcc -O3 -S"
assembly of version 2 (without initialization) takes 4 extra
instruction.
can somebody please explain me why is this happening.??
ASSEMBLY OF VERSION #1 [ WITH INITIALIZATION]
------------------------------------------------
..globl eval_output_out
.type eval_output_out,@function
eval_output_out:
pushl %ebp
movl width_masks+4, %edx
movl %esp, %ebp
movl 8(%ebp), %eax
movl %edx, %ecx
notl %edx
andl 4(%eax), %edx
andl (%eax), %ecx
movl %edx, 4(%eax)
sall $2, %ecx
andl width_masks+4, %ecx
orl %ecx, %edx
movl %edx, 4(%eax)
leave
ret
ASSEMBLY OF VERSION #2 [ WITH INITIALIZATION]
------------------------------------------------
..globl eval_output_out
.type eval_output_out,@function
eval_output_out:
movl width_masks+4, %edx
pushl %ebp
leal 0(,%edx,4), %eax
movl %esp, %ebp
notl %eax
pushl %ebx
movl 8(%ebp), %ecx
andl %eax, %ebx
movl %edx, %eax
andl (%ecx), %eax
notl %edx
andl 4(%ecx), %edx
sall $2, %eax
movl %edx, 4(%ecx)
orl %eax, %ebx
andl width_masks+4, %ebx
orl %ebx, %edx
movl %edx, 4(%ecx)
movl (%esp), %ebx
leave
ret
SOURCE CODE
------------------------------------
static unsigned int width_masks[32] = {0x1,0x3,0x7,0xF,0x1F,0x3F,0x7F,
0xFF,0x1FF,0x3FF,0x7FF,0xFFF,0x1FFF,0x3FFF,0x7FFF,0xFFFF,0x1FFFF,
0x3FFFF,0x7FFFF,0xFFFFF,0x1FFFFF,0x3FFFFF,0x7FFFFF,0xFFFFFF,0x1FFFFFF,
0x3FFFFFF,0x7FFFFFF,0xFFFFFFF,0x1FFFFFFF,0x3FFFFFFF,0x7FFFFFFF,
0xFFFFFFFF};
typedef struct _top_test_model{
unsigned int inputs[1];
unsigned int outputs[1];
}top_test_model;
#define LOGICAL_RIGHT_SHIFT(x,y) (((y) >= 32)?0x0x)>>(y))
#define get_arr_var_bits(reg_int, lsb_row, lsb_col, width)
(LOGICAL_RIGHT_SHIFT(((width_masks[width-1] << lsb_col) &
reg_int[lsb_row]),(lsb_col)))
#define reset_arr_var_bits(reg_int, lsb_row, lsb_col, width)
(reg_int[lsb_row] &= ~(width_masks[width-1] << (lsb_col)))
#define set_arr_var_bits(reg_int, lsb_row, lsb_col, width, val)
(reg_int[lsb_row] = reset_arr_var_bits(reg_int, lsb_row, lsb_col,
width) | ((val) << (lsb_col)))
#define get_var_bits(reg_int, lsb_col, width)
(LOGICAL_RIGHT_SHIFT(((width_masks[width-1] << (lsb_col)) &
(reg_int)), (lsb_col)))
#define rst_bits(reg_int, lsb_col, width) ((reg_int) &=
~(width_masks[width-1] << (lsb_col)))
#define set_var_bits(reg_int, lsb_col, width, val) (reg_int =
(rst_bits(reg_int, lsb_col, width)) | ((val) << (lsb_col)))
void eval_output_out(top_test_model *model){
unsigned int b;
/* take 2 bits starting from bit position 0 from variable model-
set_var_bits(b,2,2,get_arr_var_bits(model->inputs,0,0,2));
/* take 2 bits starting from bit position 0 from variable b and set
them in 2 bits in model->outputs[0]
starting from bit position 0 */
set_arr_var_bits(model->outputs,0,0,2,get_var_bits(b,0,2));
}
int main (int argc, char *argv){
top_test_model t;
eval_output_out(&t);
}
"eval_output_out" there are some macros.
Say we have to versions of the same code
1. variable "b" in "eval_output_out" is initialized
2. variable "b" in "eval_output_out" is NOT initialized
code expands such that it is independent of past value of variable
"b", i dumped pre processed output of two versions and there was just
one difference of b initialized in one and not in other.
QUERY IS : why version 1 runs faster than 2 with -O3 option of gcc.
if i just dump the assmebly of two versions using "gcc -S" behavior is
expected which is version #1 (with initialization) has one extra
instruction to initialize variable.
But when i dump the assembly of two versions with "gcc -O3 -S"
assembly of version 2 (without initialization) takes 4 extra
instruction.
can somebody please explain me why is this happening.??
ASSEMBLY OF VERSION #1 [ WITH INITIALIZATION]
------------------------------------------------
..globl eval_output_out
.type eval_output_out,@function
eval_output_out:
pushl %ebp
movl width_masks+4, %edx
movl %esp, %ebp
movl 8(%ebp), %eax
movl %edx, %ecx
notl %edx
andl 4(%eax), %edx
andl (%eax), %ecx
movl %edx, 4(%eax)
sall $2, %ecx
andl width_masks+4, %ecx
orl %ecx, %edx
movl %edx, 4(%eax)
leave
ret
ASSEMBLY OF VERSION #2 [ WITH INITIALIZATION]
------------------------------------------------
..globl eval_output_out
.type eval_output_out,@function
eval_output_out:
movl width_masks+4, %edx
pushl %ebp
leal 0(,%edx,4), %eax
movl %esp, %ebp
notl %eax
pushl %ebx
movl 8(%ebp), %ecx
andl %eax, %ebx
movl %edx, %eax
andl (%ecx), %eax
notl %edx
andl 4(%ecx), %edx
sall $2, %eax
movl %edx, 4(%ecx)
orl %eax, %ebx
andl width_masks+4, %ebx
orl %ebx, %edx
movl %edx, 4(%ecx)
movl (%esp), %ebx
leave
ret
SOURCE CODE
------------------------------------
static unsigned int width_masks[32] = {0x1,0x3,0x7,0xF,0x1F,0x3F,0x7F,
0xFF,0x1FF,0x3FF,0x7FF,0xFFF,0x1FFF,0x3FFF,0x7FFF,0xFFFF,0x1FFFF,
0x3FFFF,0x7FFFF,0xFFFFF,0x1FFFFF,0x3FFFFF,0x7FFFFF,0xFFFFFF,0x1FFFFFF,
0x3FFFFFF,0x7FFFFFF,0xFFFFFFF,0x1FFFFFFF,0x3FFFFFFF,0x7FFFFFFF,
0xFFFFFFFF};
typedef struct _top_test_model{
unsigned int inputs[1];
unsigned int outputs[1];
}top_test_model;
#define LOGICAL_RIGHT_SHIFT(x,y) (((y) >= 32)?0x0x)>>(y))
#define get_arr_var_bits(reg_int, lsb_row, lsb_col, width)
(LOGICAL_RIGHT_SHIFT(((width_masks[width-1] << lsb_col) &
reg_int[lsb_row]),(lsb_col)))
#define reset_arr_var_bits(reg_int, lsb_row, lsb_col, width)
(reg_int[lsb_row] &= ~(width_masks[width-1] << (lsb_col)))
#define set_arr_var_bits(reg_int, lsb_row, lsb_col, width, val)
(reg_int[lsb_row] = reset_arr_var_bits(reg_int, lsb_row, lsb_col,
width) | ((val) << (lsb_col)))
#define get_var_bits(reg_int, lsb_col, width)
(LOGICAL_RIGHT_SHIFT(((width_masks[width-1] << (lsb_col)) &
(reg_int)), (lsb_col)))
#define rst_bits(reg_int, lsb_col, width) ((reg_int) &=
~(width_masks[width-1] << (lsb_col)))
#define set_var_bits(reg_int, lsb_col, width, val) (reg_int =
(rst_bits(reg_int, lsb_col, width)) | ((val) << (lsb_col)))
void eval_output_out(top_test_model *model){
unsigned int b;
/* take 2 bits starting from bit position 0 from variable model-
starting from bit position from 2*/inputs[0] and set them in 2 bits in variable "b"
set_var_bits(b,2,2,get_arr_var_bits(model->inputs,0,0,2));
/* take 2 bits starting from bit position 0 from variable b and set
them in 2 bits in model->outputs[0]
starting from bit position 0 */
set_arr_var_bits(model->outputs,0,0,2,get_var_bits(b,0,2));
}
int main (int argc, char *argv){
top_test_model t;
eval_output_out(&t);
}