I'll eventually go to display-lists or similar in OpenGL, but I was
wondering if I could optimize this any further?
def draw_string( string )
size = font.height
x = 0
GL::Enable( GL::TEXTURE_2D )
GL::Begin( GL::QUADS )
string.each_byte do |char|
offset = char - 32
GL::TexCoord2f( @tex_coords_left[offset], @tex_coords_top[offset] );
GL::Vertex( x, 0 )
GL::TexCoord2f( @tex_coords_left[offset], @tex_coords_bottom[offset] );
GL::Vertex( x, size )
GL::TexCoord2f( @tex_coords_right[offset], @tex_coords_bottom[offset] );
GL::Vertex( x + size, size )
GL::TexCoord2f( @tex_coords_right[offset], @tex_coords_top[offset] );
GL::Vertex( x + size, 0 )
x += @sizes[char-32][0]
end
GL::End()
GL:
isable( GL::TEXTURE_2D )
end
This should be a bit faster:
def draw_string( string )
size = font.height
x = 0
GL::Enable( GL::TEXTURE_2D )
GL::Begin( GL::QUADS )
# lvars are often faster than dvars ...
char = offset = left = top = bottom = right = 0
string.each_byte do |char|
offset = char - 32
# ... and also faster than ivars (array access vs. hash lookup)
# we save an extra method call too
top = @tex_coords_top[offset]
bottom = @tex_coords_bottom[offset]
left = @tex_coords_left[offset]
right = @tex_coords_right[offset]
GL::TexCoord2f( left, top );
GL::Vertex( x, 0 )
GL::TexCoord2f( left, bottom );
GL::Vertex( x, size )
GL::TexCoord2f( right, bottom );
GL::Vertex( x + size, size )
GL::TexCoord2f( right, top );
GL::Vertex( x + size, 0 )
x += @sizes[char-32][0]
end
GL::End()
GL:
isable( GL::TEXTURE_2D )
end
You can't expect much from such micro-optimizations, but they do help a bit:
RUBY_VERSION # => "1.8.4"
require 'benchmark'
puts "ivar vs. lvar"
Benchmark.bm(10) do |bm|
o = Class.new do
def initialize(ivar); @iv = ivar end
def using_ivar; 1000000.times { @iv; @iv; @iv; @iv; @iv} end # !> useless use of a variable in void context
def using_lvar; iv = @iv; 1000000.times { iv; iv; iv; iv; iv} end # !> useless use of a variable in void context
def using_ivar2
1000000.times {
@iv; @iv; @iv; @iv; @iv # !> useless use of a variable in void context
@iv; @iv; @iv; @iv; @iv # !> useless use of a variable in void context
@iv; @iv; @iv; @iv; @iv # !> useless use of a variable in void context
@iv; @iv; @iv; @iv; @iv # !> useless use of a variable in void context
}
end
def using_lvar2
iv = @iv; 1000000.times {
iv; iv; iv; iv; iv # !> useless use of a variable in void context
iv; iv; iv; iv; iv # !> useless use of a variable in void context
iv; iv; iv; iv; iv # !> useless use of a variable in void context
iv; iv; iv; iv; iv # !> useless use of a variable in void context
}
end
end.new(1)
bm.report("ivar"){ o.using_ivar }
bm.report("lvar"){ o.using_lvar }
bm.report("ivar (x4)"){ o.using_ivar2 }
bm.report("lvar (x4)"){ o.using_lvar2 }
end
puts "dvar vs. lvar"
Benchmark.bm(10) do |bm|
bm.report("dvar"){ 1000000.times{|i| i = 1} }
j = 0
bm.report("lvar"){ 1000000.times{|j| j = 1} }
end
# >> ivar vs. lvar
# >> user system total real
# >> ivar 0.850000 0.000000 0.850000 ( 0.886111)
# >> lvar 0.530000 0.000000 0.530000 ( 0.529777)
# >> ivar (x4) 2.900000 0.010000 2.910000 ( 2.998297)
# >> lvar (x4) 1.580000 0.000000 1.580000 ( 1.645420)
# >> dvar vs. lvar
# >> user system total real
# >> dvar 0.380000 0.000000 0.380000 ( 0.390633)
# >> lvar 0.360000 0.000000 0.360000 ( 0.374979)
Access and assignment to lvars will often be faster than to dvars,
especially if:
* you're accessing a dynamic variable from an enclosing lexical scope
* there are lots of variables
foo { a = 1; bar{ b1 = 2; ...; b100 = 100; baz{ c = 3; foobar{ a } } } }
=====
Whereas lvars are subscripted directly from an array, dvars are looked up
linearly.