N
Nikolai Lugovoi
Got some segfault errors when trying to run rake test for rails.
Minimal code example to reproduce is: ruby -e 'gem "rake"'
the odd thing is that these segfaults are not 100% reproducible,
sometimes it works as expected:
-----8<----------
$ ruby -e 'gem "rake"'
<internal:gem_prelude>:187:in `push_gem_version_on_load_path': Could
not find RubyGem rake (>=3D 0) (Gem::LoadError)
=A0=A0=A0=A0=A0=A0=A0 from /home/nlugovoi/lib/ruby/1.9.1/rubygems/defaults.=
rb:0:in
`rescue in default_exec_format'
=A0=A0=A0=A0=A0=A0=A0 from -e:1:in `<main>'
-----8<----------
But sometimes it just fails:
-----8<----------
$ ruby -e 'gem "rake"'
/home/nlugovoi/lib/ruby/1.9.1/rubygems.rb:1093: [BUG] Segmentation fault
ruby 1.9.2dev (2009-09-04 trunk 24743) [i686-linux]
-- control frame ----------
c:0012 p:---- s:0043 b:0043 l:000042 d:000042 CFUNC=A0 :require
c:0011 p:0209 s:0039 b:0039 l:000038 d:000038 TOP
/home/nlugovoi/lib/ruby/1.9.1/rubygems.rb:1093
c:0010 p:---- s:0036 b:0036 l:000035 d:000035 FINISH
c:0009 p:---- s:0034 b:0034 l:000033 d:000033 CFUNC=A0 :require
c:0008 p:0092 s:0030 b:0030 l:000029 d:000029 METHOD <internal:gem_prelude>=
:159
c:0007 p:0017 s:0027 b:0027 l:000026 d:000026 METHOD <internal:gem_prelude>=
:279
c:0006 p:---- s:0023 b:0023 l:000022 d:000022 FINISH
c:0005 p:0043 s:0021 b:0020 l:000019 d:000019 METHOD <internal:gem_prelude>=
:187
Segmentation fault
-----8<----------
From gdb I get:
-----8<----------
#0=A0 0x0813a387 in vm_backtrace_push (arg=3D0xbfffa0ec, file=3D0,
line_no=3D0, name=3D0) at vm.c:755
#1=A0 0x0813a2db in vm_backtrace_each (th=3D0x81faa98, lev=3D-2,
iter=3D0x813a363 <vm_backtrace_push>, arg=3D0xbfffa0ec) at vm.c:733
#2=A0 0x0813a44c in vm_backtrace (th=3D0x81faa98, lev=3D-1) at vm.c:768
#3=A0 0x08138ac7 in rb_make_backtrace () at vm_eval.c:1426
#4=A0 0x0805b8ac in rb_longjmp (tag=3D6, mesg=3D140818900) at eval.c:363
#5=A0 0x0805bb5e in rb_exc_raise (mesg=3D140818900) at eval.c:419
#6=A0 0x0816d319 in rb_raise (exc=3D136482940, fmt=3D0x81a3676 "no such fil=
e
to load -- %s") at error.c:1132
#7=A0 0x0816f580 in load_failed (fname=3D140824860) at load.c:532
754=A0=A0=A0=A0=A0=A0=A0=A0 bt =3D rb_enc_sprintf(rb_enc_compatible(file, n=
ame), "%s:%d:in `%s'",
755=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0 RSTRING_PTR(file), line_no, RSTRING_PTR(name));
(gdb) up
#1=A0 0x0813a2db in vm_backtrace_each (th=3D0x81faa98, lev=3D-2,
iter=3D0x813a363 <vm_backtrace_push>, arg=3D0xbfffa0ec) at vm.c:733
733=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 if ((*iter)=
(arg, file, line_no, iseq->name)) break;
(gdb) l
728=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 if (cfp->pc !=3D 0) {
729=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 rb_iseq_t *=
iseq =3D cfp->iseq;
730
731=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 line_no =3D=
rb_vm_get_sourceline(cfp);
732=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 file =3D is=
eq->filename;
733=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 if ((*iter)=
(arg, file, line_no, iseq->name)) break;
734=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 }
-----8<----------
My theory is that when method Kernel#gem, originally defined in
gem_prelude,=A0 is redefined/discarded in rubygems.rb, then original
iseq for method becomes orphaned and is freed by garbage collector.
But it is still referenced from control frame, so segfault.
I tried quick&dirty patch to explicitly mark iseq objects, not sure
if it is correct way:
-----8<----------
--- a/vm.c
+++ b/vm.c
@@ -1415,7 +1415,21 @@ static int
vm_mark_each_thread_func(st_data_t key, st_data_t value, st_data_t dummy)
{
VALUE thval =3D (VALUE)key;
+ rb_thread_t *th;
+ rb_control_frame_t *cfp;
+
+ GetThreadPtr(thval, th);
+
+ cfp =3D th->cfp;
+ while (!RUBY_VM_CONTROL_FRAME_STACK_OVERFLOW_P(th, cfp)) {
+ if (RUBY_VM_NORMAL_ISEQ_P(cfp->iseq)) {
+ rb_gc_mark(cfp->iseq->self);
+ }
+ cfp =3D RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
+ }
+
rb_gc_mark(thval);
+
return ST_CONTINUE;
}
-----8<----------
and that seemed to eliminate such segfaults
Any other explanations or fixes?
Minimal code example to reproduce is: ruby -e 'gem "rake"'
the odd thing is that these segfaults are not 100% reproducible,
sometimes it works as expected:
-----8<----------
$ ruby -e 'gem "rake"'
<internal:gem_prelude>:187:in `push_gem_version_on_load_path': Could
not find RubyGem rake (>=3D 0) (Gem::LoadError)
=A0=A0=A0=A0=A0=A0=A0 from /home/nlugovoi/lib/ruby/1.9.1/rubygems/defaults.=
rb:0:in
`rescue in default_exec_format'
=A0=A0=A0=A0=A0=A0=A0 from -e:1:in `<main>'
-----8<----------
But sometimes it just fails:
-----8<----------
$ ruby -e 'gem "rake"'
/home/nlugovoi/lib/ruby/1.9.1/rubygems.rb:1093: [BUG] Segmentation fault
ruby 1.9.2dev (2009-09-04 trunk 24743) [i686-linux]
-- control frame ----------
c:0012 p:---- s:0043 b:0043 l:000042 d:000042 CFUNC=A0 :require
c:0011 p:0209 s:0039 b:0039 l:000038 d:000038 TOP
/home/nlugovoi/lib/ruby/1.9.1/rubygems.rb:1093
c:0010 p:---- s:0036 b:0036 l:000035 d:000035 FINISH
c:0009 p:---- s:0034 b:0034 l:000033 d:000033 CFUNC=A0 :require
c:0008 p:0092 s:0030 b:0030 l:000029 d:000029 METHOD <internal:gem_prelude>=
:159
c:0007 p:0017 s:0027 b:0027 l:000026 d:000026 METHOD <internal:gem_prelude>=
:279
c:0006 p:---- s:0023 b:0023 l:000022 d:000022 FINISH
c:0005 p:0043 s:0021 b:0020 l:000019 d:000019 METHOD <internal:gem_prelude>=
:187
Segmentation fault
-----8<----------
From gdb I get:
-----8<----------
#0=A0 0x0813a387 in vm_backtrace_push (arg=3D0xbfffa0ec, file=3D0,
line_no=3D0, name=3D0) at vm.c:755
#1=A0 0x0813a2db in vm_backtrace_each (th=3D0x81faa98, lev=3D-2,
iter=3D0x813a363 <vm_backtrace_push>, arg=3D0xbfffa0ec) at vm.c:733
#2=A0 0x0813a44c in vm_backtrace (th=3D0x81faa98, lev=3D-1) at vm.c:768
#3=A0 0x08138ac7 in rb_make_backtrace () at vm_eval.c:1426
#4=A0 0x0805b8ac in rb_longjmp (tag=3D6, mesg=3D140818900) at eval.c:363
#5=A0 0x0805bb5e in rb_exc_raise (mesg=3D140818900) at eval.c:419
#6=A0 0x0816d319 in rb_raise (exc=3D136482940, fmt=3D0x81a3676 "no such fil=
e
to load -- %s") at error.c:1132
#7=A0 0x0816f580 in load_failed (fname=3D140824860) at load.c:532
754=A0=A0=A0=A0=A0=A0=A0=A0 bt =3D rb_enc_sprintf(rb_enc_compatible(file, n=
ame), "%s:%d:in `%s'",
755=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=
=A0=A0=A0=A0 RSTRING_PTR(file), line_no, RSTRING_PTR(name));
(gdb) up
#1=A0 0x0813a2db in vm_backtrace_each (th=3D0x81faa98, lev=3D-2,
iter=3D0x813a363 <vm_backtrace_push>, arg=3D0xbfffa0ec) at vm.c:733
733=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 if ((*iter)=
(arg, file, line_no, iseq->name)) break;
(gdb) l
728=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 if (cfp->pc !=3D 0) {
729=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 rb_iseq_t *=
iseq =3D cfp->iseq;
730
731=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 line_no =3D=
rb_vm_get_sourceline(cfp);
732=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 file =3D is=
eq->filename;
733=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 if ((*iter)=
(arg, file, line_no, iseq->name)) break;
734=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0=A0 }
-----8<----------
My theory is that when method Kernel#gem, originally defined in
gem_prelude,=A0 is redefined/discarded in rubygems.rb, then original
iseq for method becomes orphaned and is freed by garbage collector.
But it is still referenced from control frame, so segfault.
I tried quick&dirty patch to explicitly mark iseq objects, not sure
if it is correct way:
-----8<----------
--- a/vm.c
+++ b/vm.c
@@ -1415,7 +1415,21 @@ static int
vm_mark_each_thread_func(st_data_t key, st_data_t value, st_data_t dummy)
{
VALUE thval =3D (VALUE)key;
+ rb_thread_t *th;
+ rb_control_frame_t *cfp;
+
+ GetThreadPtr(thval, th);
+
+ cfp =3D th->cfp;
+ while (!RUBY_VM_CONTROL_FRAME_STACK_OVERFLOW_P(th, cfp)) {
+ if (RUBY_VM_NORMAL_ISEQ_P(cfp->iseq)) {
+ rb_gc_mark(cfp->iseq->self);
+ }
+ cfp =3D RUBY_VM_PREVIOUS_CONTROL_FRAME(cfp);
+ }
+
rb_gc_mark(thval);
+
return ST_CONTINUE;
}
-----8<----------
and that seemed to eliminate such segfaults
Any other explanations or fixes?