D
Daniel Berger
Hi all,
After a recent "idiomatic" thread, I decided to try and refactor the
core File.basename method. I think I did a pretty good job. It's about
10 lines short, easier to read (I think), and self contained, i.e. no
calls to rmext().
However it is a *tad* slower, but not by much. I was wondering if
anyone would help me tweak it. Below is the (self contained) code,
extconf.rb file, a test suite (taken from Rubicon - it passes all
tests) and a benchmark program.
Any and all help appreciated.
Regards,
Dan
PS - Note that I do not expect this code to work on Windows, for which I
use a completely different approach using their builtin path handling
functions.
/* base.c */
#include "ruby.h"
#include <stdio.h>
#include <strings.h>
#include <libgen.h>
static VALUE base_basename(int argc, VALUE* argv, VALUE klass){
VALUE rbFileName, rbExt, rbBase;
rb_scan_args(argc, argv, "11", &rbFileName, &rbExt);
StringValue(rbFileName);
if(RSTRING(rbFileName)->len == 0) /* edge case */
rbBase = rbFileName;
else
rbBase = rb_str_new2(basename(StringValuePtr(rbFileName)));
if(RTEST(rbExt)){
char* base = RSTRING(rbBase)->ptr; /* for readability */
char* ext = RSTRING(rbExt)->ptr; /* ditto */
if(!strcmp(ext, ".*")){
int length = strlen(base) - strlen(strrchr(base, '.'));
rbBase = rb_str_new(base, length);
}
else{
if(strstr(base, ext)){
int length = strlen(base) - strlen(ext);
int span = strcspn(base, ext);
if(length == span)
rbBase = rb_str_new(base, length);
}
}
}
return rbBase;
}
void Init_base(){
VALUE rbBase = rb_define_class("Base", rb_cObject);
rb_define_singleton_method(rbBase, "basename", base_basename, -1);
}
# extconf.rb
require "mkmf"
create_makefile("base")
# test.rb - borrowed asserts from Rubicon project
$:.unshift(Dir.pwd)
require "base"
require "test/unit"
class TC_Base < Test::Unit::TestCase
def setup
@file = File.join("_test", "_touched")
end
def test_basename
assert_equal("_touched", File.basename(@file))
assert_equal("tmp", File.basename(File.join("/tmp")))
assert_equal("b", File.basename(File.join(*%w( g f d s a b))))
assert_equal("tmp", File.basename("/tmp", ".*"))
assert_equal("tmp", File.basename("/tmp", ".c"))
assert_equal("tmp", File.basename("/tmp.c", ".c"))
assert_equal("tmp", File.basename("/tmp.c", ".*"))
assert_equal("tmp.o", File.basename("/tmp.o", ".c"))
assert_equal("tmp", File.basename(File.join("/tmp/")))
assert_equal("/", File.basename("/"))
assert_equal("/", File.basename("//"))
assert_equal("base", File.basename("dir///base", ".*"))
assert_equal("base", File.basename("dir///base", ".c"))
assert_equal("base", File.basename("dir///base.c", ".c"))
assert_equal("base", File.basename("dir///base.c", ".*"))
assert_equal("base.o", File.basename("dir///base.o", ".c"))
assert_equal("base", File.basename("dir///base///"))
assert_equal("base", File.basename("dir//base/", ".*"))
assert_equal("base", File.basename("dir//base/", ".c"))
assert_equal("base", File.basename("dir//base.c/", ".c"))
assert_equal("base", File.basename("dir//base.c/", ".*"))
assert_equal("base.o", File.basename("dir//base.o/", ".c"))
end
def teardown
@file = nil
end
end
# basebench.rb
$:.unshift(Dir.pwd)
require "base"
require "benchmark"
include Benchmark
MAX = 200000
bm do |x|
x.report("Old #1"){
MAX.times{
File.basename("/foo/bar/baz.rb")
}
}
x.report("New #1"){
MAX.times{
Base.basename("/foo/bar/baz.rb")
}
}
puts
x.report("Old #2"){
MAX.times{
File.basename("/foo/bar/baz.rb",".rb")
}
}
x.report("New #2"){
MAX.times{
Base.basename("/foo/bar/baz.rb",".rb")
}
}
puts
x.report("Old #3"){
MAX.times{
File.basename("foo.rb", ".rb")
}
}
x.report("New #3"){
MAX.times{
Base.basename("foo.rb", ".rb")
}
}
puts
x.report("Old #4"){
MAX.times{
File.basename("foo.rb", ".py")
}
}
x.report("New #4"){
MAX.times{
Base.basename("foo.rb", ".py")
}
}
puts
x.report("Old #5"){
MAX.times{
File.basename("foo.rb.py", ".rb")
}
}
x.report("New #5"){
MAX.times{
Base.basename("foo.rb.py", ".rb")
}
}
end
# Current results on Sunblade 150, Solaris 10, 512 MB RAM, USIIe 650 MHz
user system total real
Old #1 1.990000 0.870000 2.860000 ( 2.963725)
New #1 2.010000 0.880000 2.890000 ( 2.981809)
Old #2 2.280000 0.880000 3.160000 ( 3.279448)
New #2 2.360000 0.880000 3.240000 ( 3.364783)
Old #3 2.210000 0.880000 3.090000 ( 3.271177)
New #3 2.660000 0.890000 3.550000 ( 3.685172)
Old #4 1.710000 0.850000 2.560000 ( 2.646357)
New #4 2.250000 0.880000 3.130000 ( 3.253422)
Old #5 1.690000 0.850000 2.540000 ( 2.652825)
New #5 2.380000 0.890000 3.270000 ( 3.417572)
After a recent "idiomatic" thread, I decided to try and refactor the
core File.basename method. I think I did a pretty good job. It's about
10 lines short, easier to read (I think), and self contained, i.e. no
calls to rmext().
However it is a *tad* slower, but not by much. I was wondering if
anyone would help me tweak it. Below is the (self contained) code,
extconf.rb file, a test suite (taken from Rubicon - it passes all
tests) and a benchmark program.
Any and all help appreciated.
Regards,
Dan
PS - Note that I do not expect this code to work on Windows, for which I
use a completely different approach using their builtin path handling
functions.
/* base.c */
#include "ruby.h"
#include <stdio.h>
#include <strings.h>
#include <libgen.h>
static VALUE base_basename(int argc, VALUE* argv, VALUE klass){
VALUE rbFileName, rbExt, rbBase;
rb_scan_args(argc, argv, "11", &rbFileName, &rbExt);
StringValue(rbFileName);
if(RSTRING(rbFileName)->len == 0) /* edge case */
rbBase = rbFileName;
else
rbBase = rb_str_new2(basename(StringValuePtr(rbFileName)));
if(RTEST(rbExt)){
char* base = RSTRING(rbBase)->ptr; /* for readability */
char* ext = RSTRING(rbExt)->ptr; /* ditto */
if(!strcmp(ext, ".*")){
int length = strlen(base) - strlen(strrchr(base, '.'));
rbBase = rb_str_new(base, length);
}
else{
if(strstr(base, ext)){
int length = strlen(base) - strlen(ext);
int span = strcspn(base, ext);
if(length == span)
rbBase = rb_str_new(base, length);
}
}
}
return rbBase;
}
void Init_base(){
VALUE rbBase = rb_define_class("Base", rb_cObject);
rb_define_singleton_method(rbBase, "basename", base_basename, -1);
}
# extconf.rb
require "mkmf"
create_makefile("base")
# test.rb - borrowed asserts from Rubicon project
$:.unshift(Dir.pwd)
require "base"
require "test/unit"
class TC_Base < Test::Unit::TestCase
def setup
@file = File.join("_test", "_touched")
end
def test_basename
assert_equal("_touched", File.basename(@file))
assert_equal("tmp", File.basename(File.join("/tmp")))
assert_equal("b", File.basename(File.join(*%w( g f d s a b))))
assert_equal("tmp", File.basename("/tmp", ".*"))
assert_equal("tmp", File.basename("/tmp", ".c"))
assert_equal("tmp", File.basename("/tmp.c", ".c"))
assert_equal("tmp", File.basename("/tmp.c", ".*"))
assert_equal("tmp.o", File.basename("/tmp.o", ".c"))
assert_equal("tmp", File.basename(File.join("/tmp/")))
assert_equal("/", File.basename("/"))
assert_equal("/", File.basename("//"))
assert_equal("base", File.basename("dir///base", ".*"))
assert_equal("base", File.basename("dir///base", ".c"))
assert_equal("base", File.basename("dir///base.c", ".c"))
assert_equal("base", File.basename("dir///base.c", ".*"))
assert_equal("base.o", File.basename("dir///base.o", ".c"))
assert_equal("base", File.basename("dir///base///"))
assert_equal("base", File.basename("dir//base/", ".*"))
assert_equal("base", File.basename("dir//base/", ".c"))
assert_equal("base", File.basename("dir//base.c/", ".c"))
assert_equal("base", File.basename("dir//base.c/", ".*"))
assert_equal("base.o", File.basename("dir//base.o/", ".c"))
end
def teardown
@file = nil
end
end
# basebench.rb
$:.unshift(Dir.pwd)
require "base"
require "benchmark"
include Benchmark
MAX = 200000
bm do |x|
x.report("Old #1"){
MAX.times{
File.basename("/foo/bar/baz.rb")
}
}
x.report("New #1"){
MAX.times{
Base.basename("/foo/bar/baz.rb")
}
}
puts
x.report("Old #2"){
MAX.times{
File.basename("/foo/bar/baz.rb",".rb")
}
}
x.report("New #2"){
MAX.times{
Base.basename("/foo/bar/baz.rb",".rb")
}
}
puts
x.report("Old #3"){
MAX.times{
File.basename("foo.rb", ".rb")
}
}
x.report("New #3"){
MAX.times{
Base.basename("foo.rb", ".rb")
}
}
puts
x.report("Old #4"){
MAX.times{
File.basename("foo.rb", ".py")
}
}
x.report("New #4"){
MAX.times{
Base.basename("foo.rb", ".py")
}
}
puts
x.report("Old #5"){
MAX.times{
File.basename("foo.rb.py", ".rb")
}
}
x.report("New #5"){
MAX.times{
Base.basename("foo.rb.py", ".rb")
}
}
end
# Current results on Sunblade 150, Solaris 10, 512 MB RAM, USIIe 650 MHz
user system total real
Old #1 1.990000 0.870000 2.860000 ( 2.963725)
New #1 2.010000 0.880000 2.890000 ( 2.981809)
Old #2 2.280000 0.880000 3.160000 ( 3.279448)
New #2 2.360000 0.880000 3.240000 ( 3.364783)
Old #3 2.210000 0.880000 3.090000 ( 3.271177)
New #3 2.660000 0.890000 3.550000 ( 3.685172)
Old #4 1.710000 0.850000 2.560000 ( 2.646357)
New #4 2.250000 0.880000 3.130000 ( 3.253422)
Old #5 1.690000 0.850000 2.540000 ( 2.652825)
New #5 2.380000 0.890000 3.270000 ( 3.417572)