E
Eric Wong
Hi all,
Hopefully the new maintainer of Hpricot will see this because I have no
idea who to send it to now...
I originally had a version of this patch posted in _why's Trac sometime
in 2008 until he took Trac down. I've tried emailing him directly on
several occasions since and never got a response, last was February
2009. Come to think of it, the only time I ever got any response from
him was my initial email to him about fast_xs back in October 2007.
So here it is, I've rebased my patch against
2c961095954d5aaa5c046f4c773c62c3d5902ef4 on
git://github.com/whymirror/hpricot.git
Also I have an entire repo up on git://git.bogomips.org/hpricot and
viewable from http://git.bogomips.org/cgit/hpricot.git
From 636e3f453b2bbc0c7486b91eda452fe4767e4bbc Mon Sep 17 00:00:00 2001
From: Eric Wong <[email protected]>
Date: Sat, 7 Feb 2009 21:31:15 -0800
Subject: [PATCH] * ext/fast_xs: latest changes from mainline fast_xs
- alloca() dependency removed. An extra memory allocation and
memory copy is saved by writing directly to the string object
returned by rb_str_new(0, len) (both the Ruby 1.8.6 and 1.9
code has this usage in it, so it should be safe
This also allows fast_xs to work on strings larger than
the stack size.
- general readability and cleanups
- fast_xs works with Ruby 1.9
- Don't rely on ruby_digitmap being a valid symbol
---
ext/fast_xs/fast_xs.c | 101 ++++++++++++++++++++++++-------------------------
1 files changed, 50 insertions(+), 51 deletions(-)
diff --git a/ext/fast_xs/fast_xs.c b/ext/fast_xs/fast_xs.c
index 4a30a6c..04b175f 100644
--- a/ext/fast_xs/fast_xs.c
+++ b/ext/fast_xs/fast_xs.c
@@ -1,8 +1,5 @@
-#define VERSION "0.1"
-
#include <ruby.h>
#include <assert.h>
-/* #include <stdio.h> */
#ifndef RARRAY_LEN
#define RARRAY_LEN(arr) RARRAY(arr)->len
@@ -72,11 +69,6 @@ static const int cp_1252[] = {
n = cp_1252[n - 128]; \
} while(0)
-#define return_const_len(x) do { \
- memcpy(buf, x, sizeof(x) - 1); \
- return (sizeof(x) - 1); \
-} while (0)
-
static inline size_t bytes_for(int n)
{
if (n < 1000)
@@ -91,18 +83,24 @@ static inline size_t bytes_for(int n)
return sizeof("") - 1;
}
-static long escape(char *buf, int n)
+static size_t escape(char *buf, int n)
{
+
+#define return_const_len(x) do { \
+ memcpy(buf, x, sizeof(x) - 1); \
+ return (sizeof(x) - 1); \
+} while (0)
+
/* handle ASCII first */
if (likely(n < 128)) {
- if (likely(n >= 0x20 || n == 0x9 || n == 0xA || n == 0xD)) {
- if (unlikely(n == 34))
+ if (likely(n >= 0x20 || n == '\t' || n == '\n' || n == '\r')) {
+ if (unlikely(n == '"'))
return_const_len(""");
- if (unlikely(n == 38))
+ if (unlikely(n == '&'))
return_const_len("&");
- if (unlikely(n == 60))
+ if (unlikely(n == '<'))
return_const_len("<");
- if (unlikely(n == 62))
+ if (unlikely(n == '>'))
return_const_len(">");
buf[0] = (char)n;
return 1;
@@ -112,16 +110,18 @@ static long escape(char *buf, int n)
return 1;
}
+#undef return_const_len
+
CP_1252_ESCAPE(n);
if (VALID_VALUE(n)) {
/* return snprintf(buf, sizeof(""), "&#%i;", n); */
- RUBY_EXTERN const char ruby_digitmap[];
- int rv = 3; /* &#; */
+ static const char digitmap[] = "0123456789";
+ size_t rv = sizeof("&#;") - 1;
buf += bytes_for(n);
*--buf = ';';
do {
- *--buf = ruby_digitmap[(int)(n % 10)];
+ *--buf = digitmap[(int)(n % 10)];
++rv;
} while (n /= 10);
*--buf = '#';
@@ -132,27 +132,6 @@ static long escape(char *buf, int n)
return 1;
}
-#undef return_const_len
-
-static long escaped_len(int n)
-{
- if (likely(n < 128)) {
- if (unlikely(n == 34))
- return (sizeof(""") - 1);
- if (unlikely(n == 38))
- return (sizeof("&") - 1);
- if (unlikely(n == 60 || n == 62))
- return (sizeof(">") - 1);
- return 1;
- }
-
- CP_1252_ESCAPE(n);
-
- if (VALID_VALUE(n))
- return bytes_for(n);
- return 1;
-}
-
static VALUE unpack_utf8(VALUE self)
{
return rb_funcall(self, unpack_id, 1, U_fmt);
@@ -163,28 +142,48 @@ static VALUE unpack_uchar(VALUE self)
return rb_funcall(self, unpack_id, 1, C_fmt);
}
-VALUE fast_xs(VALUE self)
+/*
+ * escapes strings for XML
+ * The double-quote (") character is translated to """
+ */
+static VALUE fast_xs(VALUE self)
{
long i;
- struct RArray *array;
- char *s, *c;
- long s_len = 0;
+ VALUE array;
+ char *c;
+ size_t s_len;
VALUE *tmp;
+ VALUE rv;
+
+ array = rb_rescue(unpack_utf8, self, unpack_uchar, self);
+
+ for (tmp = RARRAY_PTR(array), s_len = i = RARRAY_LEN(array);
+ --i >= 0;
+ tmp++) {
+ int n = NUM2INT(*tmp);
+ if (likely(n < 128)) {
+ if (unlikely(n == '"'))
+ s_len += (sizeof(""") - 2);
+ if (unlikely(n == '&'))
+ s_len += (sizeof("&") - 2);
+ if (unlikely(n == '>' || n == '<'))
+ s_len += (sizeof(">") - 2);
+ continue;
+ }
- array = RARRAY(rb_rescue(unpack_utf8, self, unpack_uchar, self));
+ CP_1252_ESCAPE(n);
- tmp = RARRAY_PTR(array);
- for (i = RARRAY_LEN(array); --i >= 0; tmp++)
- s_len += escaped_len(NUM2INT(*tmp));
+ if (VALID_VALUE(n))
+ s_len += bytes_for(n) - 1;
+ }
- c = s = alloca(s_len + 1);
+ rv = rb_str_new(NULL, s_len);
+ c = RSTRING_PTR(rv);
- tmp = RARRAY_PTR(array);
- for (i = RARRAY_LEN(array); --i >= 0; tmp++)
+ for (tmp = RARRAY_PTR(array), i = RARRAY_LEN(array); --i >= 0; tmp++)
c += escape(c, NUM2INT(*tmp));
- *c = '\0';
- return rb_str_new(s, s_len);
+ return rv;
}
void Init_fast_xs(void)
Hopefully the new maintainer of Hpricot will see this because I have no
idea who to send it to now...
I originally had a version of this patch posted in _why's Trac sometime
in 2008 until he took Trac down. I've tried emailing him directly on
several occasions since and never got a response, last was February
2009. Come to think of it, the only time I ever got any response from
him was my initial email to him about fast_xs back in October 2007.
So here it is, I've rebased my patch against
2c961095954d5aaa5c046f4c773c62c3d5902ef4 on
git://github.com/whymirror/hpricot.git
Also I have an entire repo up on git://git.bogomips.org/hpricot and
viewable from http://git.bogomips.org/cgit/hpricot.git
From 636e3f453b2bbc0c7486b91eda452fe4767e4bbc Mon Sep 17 00:00:00 2001
From: Eric Wong <[email protected]>
Date: Sat, 7 Feb 2009 21:31:15 -0800
Subject: [PATCH] * ext/fast_xs: latest changes from mainline fast_xs
- alloca() dependency removed. An extra memory allocation and
memory copy is saved by writing directly to the string object
returned by rb_str_new(0, len) (both the Ruby 1.8.6 and 1.9
code has this usage in it, so it should be safe
This also allows fast_xs to work on strings larger than
the stack size.
- general readability and cleanups
- fast_xs works with Ruby 1.9
- Don't rely on ruby_digitmap being a valid symbol
---
ext/fast_xs/fast_xs.c | 101 ++++++++++++++++++++++++-------------------------
1 files changed, 50 insertions(+), 51 deletions(-)
diff --git a/ext/fast_xs/fast_xs.c b/ext/fast_xs/fast_xs.c
index 4a30a6c..04b175f 100644
--- a/ext/fast_xs/fast_xs.c
+++ b/ext/fast_xs/fast_xs.c
@@ -1,8 +1,5 @@
-#define VERSION "0.1"
-
#include <ruby.h>
#include <assert.h>
-/* #include <stdio.h> */
#ifndef RARRAY_LEN
#define RARRAY_LEN(arr) RARRAY(arr)->len
@@ -72,11 +69,6 @@ static const int cp_1252[] = {
n = cp_1252[n - 128]; \
} while(0)
-#define return_const_len(x) do { \
- memcpy(buf, x, sizeof(x) - 1); \
- return (sizeof(x) - 1); \
-} while (0)
-
static inline size_t bytes_for(int n)
{
if (n < 1000)
@@ -91,18 +83,24 @@ static inline size_t bytes_for(int n)
return sizeof("") - 1;
}
-static long escape(char *buf, int n)
+static size_t escape(char *buf, int n)
{
+
+#define return_const_len(x) do { \
+ memcpy(buf, x, sizeof(x) - 1); \
+ return (sizeof(x) - 1); \
+} while (0)
+
/* handle ASCII first */
if (likely(n < 128)) {
- if (likely(n >= 0x20 || n == 0x9 || n == 0xA || n == 0xD)) {
- if (unlikely(n == 34))
+ if (likely(n >= 0x20 || n == '\t' || n == '\n' || n == '\r')) {
+ if (unlikely(n == '"'))
return_const_len(""");
- if (unlikely(n == 38))
+ if (unlikely(n == '&'))
return_const_len("&");
- if (unlikely(n == 60))
+ if (unlikely(n == '<'))
return_const_len("<");
- if (unlikely(n == 62))
+ if (unlikely(n == '>'))
return_const_len(">");
buf[0] = (char)n;
return 1;
@@ -112,16 +110,18 @@ static long escape(char *buf, int n)
return 1;
}
+#undef return_const_len
+
CP_1252_ESCAPE(n);
if (VALID_VALUE(n)) {
/* return snprintf(buf, sizeof(""), "&#%i;", n); */
- RUBY_EXTERN const char ruby_digitmap[];
- int rv = 3; /* &#; */
+ static const char digitmap[] = "0123456789";
+ size_t rv = sizeof("&#;") - 1;
buf += bytes_for(n);
*--buf = ';';
do {
- *--buf = ruby_digitmap[(int)(n % 10)];
+ *--buf = digitmap[(int)(n % 10)];
++rv;
} while (n /= 10);
*--buf = '#';
@@ -132,27 +132,6 @@ static long escape(char *buf, int n)
return 1;
}
-#undef return_const_len
-
-static long escaped_len(int n)
-{
- if (likely(n < 128)) {
- if (unlikely(n == 34))
- return (sizeof(""") - 1);
- if (unlikely(n == 38))
- return (sizeof("&") - 1);
- if (unlikely(n == 60 || n == 62))
- return (sizeof(">") - 1);
- return 1;
- }
-
- CP_1252_ESCAPE(n);
-
- if (VALID_VALUE(n))
- return bytes_for(n);
- return 1;
-}
-
static VALUE unpack_utf8(VALUE self)
{
return rb_funcall(self, unpack_id, 1, U_fmt);
@@ -163,28 +142,48 @@ static VALUE unpack_uchar(VALUE self)
return rb_funcall(self, unpack_id, 1, C_fmt);
}
-VALUE fast_xs(VALUE self)
+/*
+ * escapes strings for XML
+ * The double-quote (") character is translated to """
+ */
+static VALUE fast_xs(VALUE self)
{
long i;
- struct RArray *array;
- char *s, *c;
- long s_len = 0;
+ VALUE array;
+ char *c;
+ size_t s_len;
VALUE *tmp;
+ VALUE rv;
+
+ array = rb_rescue(unpack_utf8, self, unpack_uchar, self);
+
+ for (tmp = RARRAY_PTR(array), s_len = i = RARRAY_LEN(array);
+ --i >= 0;
+ tmp++) {
+ int n = NUM2INT(*tmp);
+ if (likely(n < 128)) {
+ if (unlikely(n == '"'))
+ s_len += (sizeof(""") - 2);
+ if (unlikely(n == '&'))
+ s_len += (sizeof("&") - 2);
+ if (unlikely(n == '>' || n == '<'))
+ s_len += (sizeof(">") - 2);
+ continue;
+ }
- array = RARRAY(rb_rescue(unpack_utf8, self, unpack_uchar, self));
+ CP_1252_ESCAPE(n);
- tmp = RARRAY_PTR(array);
- for (i = RARRAY_LEN(array); --i >= 0; tmp++)
- s_len += escaped_len(NUM2INT(*tmp));
+ if (VALID_VALUE(n))
+ s_len += bytes_for(n) - 1;
+ }
- c = s = alloca(s_len + 1);
+ rv = rb_str_new(NULL, s_len);
+ c = RSTRING_PTR(rv);
- tmp = RARRAY_PTR(array);
- for (i = RARRAY_LEN(array); --i >= 0; tmp++)
+ for (tmp = RARRAY_PTR(array), i = RARRAY_LEN(array); --i >= 0; tmp++)
c += escape(c, NUM2INT(*tmp));
- *c = '\0';
- return rb_str_new(s, s_len);
+ return rv;
}
void Init_fast_xs(void)