Extending snprintf() - how to modify va_list?

A

A. Farber

Hello,

I'm programming a web application and for that I'm trying
to extend the standard C string functions, so that they can
handle the application/x-www-form-urlencoded format
(which encodes non-alphanumeric characters as %XY).

I've written my own xstrlen(), xstrlcat(), xstrlcpy and xstrdup()
(please see the source code on the bottom of this post).

But I can't figure out how to extend the last function I need -
the snprintf(). Namely I'd like to add an %a to its format
and for each corresponding string I'd call the xstrdup()
thus converting it application/x-www-form-urlencoded
and finally I'd replaced the %a itself by the normal %s
and call the normal (v)snprintf (and free()d the strings):

int
xsnprintf(char *dst, size_t size, char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);

/* HERE:
loop through fmt,
replace %a by %s,
xstrdup(va_arg(ap, char*))
*/

vsnprintf(dst, size, fmt, ap);
va_end(ap);

/* HERE:
free() the strings xstrdup()ed above */
}

However I can't figure out how to put my xstrdup()ed
strings into the va_list ap, before I call vsnprintf().

Does anybody please have an idea here?

Regards
Alex

static const char hex[] = "0123456789ABCDEF";

size_t
xstrlen(const char *str)
{
size_t len;

for (len = 0; *str; ++str)
/* [a-zA-Z0-9._-] or space require 1 byte */
if (isalnum(*str) ||
'.' == *str ||
'_' == *str ||
'-' == *str ||
' ' == *str)
len += 1;
/* all other chars will be converted to %XX */
else
len += 3;

return len;
}

size_t
xstrlcpy(char *dst, const unsigned char *src, size_t size)
{
size_t len;

if (0 == size)
return xstrlen(src);

/* copy at most (size - 1) chars from src to dst */
for (len = 0; *src && len < size - 1; ++src)
/* just copy [a-zA-Z0-9._-] */
if (isalnum(*src) ||
'.' == *src ||
'_' == *src ||
'-' == *src)
dst[len++] = *src;
/* convert spaces to pluses */
else if (' ' == *src)
dst[len++] = '+';
/* convert other chars to %XX */
else {
if (len >= size - 3)
break;
dst[len++] = '%';
dst[len++] = hex[*src >> 4];
dst[len++] = hex[*src & 0xF];
}
dst[len] = 0;

if (0 == *src)
return len;
else
return len + xstrlen(src);
}

size_t
xstrlcat(char *dst, const char *src, size_t size)
{
size_t len = strlen(dst);

if (size <= len)
return size + xstrlen(src);

return xstrlcpy(&dst[len], src, size - len);
}

char*
xstrdup(const char *src)
{
char *copy;
size_t size = xstrlen(src) + 1;

if ((copy = malloc(size)) == NULL)
return NULL;
memmove(copy, src, size);

return copy;
}
 
C

Chris Torek

... I can't figure out how to extend the last function I need -
the snprintf(). Namely I'd like to add an %a to its format
and for each corresponding string I'd call the xstrdup()
thus converting it application/x-www-form-urlencoded
and finally I'd replaced the %a itself by the normal %s
and call the normal (v)snprintf (and free()d the strings):

There is no way to insert or replace various arguments in a
va_list.

What you *can* do is handle each argument yourself. Admittedly
this will take a lot more code:
int
xsnprintf(char *dst, size_t size, char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);

/* HERE:
loop through fmt,
replace %a by %s,
xstrdup(va_arg(ap, char*))
*/

vsnprintf(dst, size, fmt, ap);
va_end(ap);

/* HERE:
free() the strings xstrdup()ed above */
}

Instead:

int xsnprintf(char *dst, size_t size, const char *fmt, ...) {
va_list ap;
int ret;

va_start(ap, fmt);
ret = vxsnprintf(dst, size, fmt, ap); /* always use a v- version */
va_end(ap);
}

int vxsnprintf(char *dst, size_t size, const char *fmt, va_list ap) {
int final = 0, ret;
const char *stop;
char *partbuf = NULL;
size_t partlen = 0;
char *strarg;
int flags;
#define LLONG 2
#define LONG 1

while (*fmt != '\0') {
for (stop = fmt;;) {
stop = strchr(stop, '%');
if (stop == NULL) {
stop = fmt + strlen(fmt);
break;
}
if (stop[1] == '%') /* %% - not a conversion */
stop += 2;
else
break;
}
/* either stop[0] is '\0' or we have a conversion */
if (stop[0] == '\0') {
/* no more % conversions, just feed the rest to snprintf */
ret = snprintf(dst, size, fmt);
if (ret < 0)
final = ret;
else
final += ret;
break;
}
flags = 0;
stop++; /* skip over '%' */
reswitch:
switch (*stop++) {
case 'l':
/* %ld, %lu, %lld, %llu etc */
if (flags & LONG)
flags |= LLONG;
else
flags |= LONG;
goto reswitch;
case 'L':
flags |= LONGDBL;
goto reswitch;
case '#': case '.':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
goto reswitch;
case 'd': case 'i':
partbuf = collect(fmt, stop, partbuf, partlen);
if (flags & LLONG)
ret = snprintf(dst, size, partbuf, va_arg(ap, long long));
else if (flags & LFLAG)
ret = snprintf(dst, size, partbuf, va_arg(ap, long));
else
ret = snprintf(dst, size, partbuf, va_arg(ap, int));
break;
case 'o': case 'u': case 'x': case 'X':
partbuf = collect(fmt, stop, partbuf, partlen);
if (flags & LLONG)
ret = snprintf(dst, size, partbuf,
va_arg(ap, unsigned long long));
else if (flags & LONG)
ret = snprintf(dst, size, partbuf,
va_arg(ap, unsigned long));
else
ret = snprintf(dst, size, partbuf,
va_arg(ap, unsigned int));
break;
case 'a': /* your new format */
partbuf = collect(fmt, stop, partbuf, partlen);
partbuf[partlen - 1] = 's';
strarg = xstrdup(va_arg(ap, char *));
ret = snprintf(dst, size, partbuf, strarg);
free(strarg);
break;
case 's':
partbuf = collect(fmt, stop, partbuf, partlen);
if (flags & LONG)
ret = snprintf(dst, size, partbuf, va_arg(ap, wchar_t *));
else
ret = snprintf(dst, size, partbuf, va_arg(ap, char *));
break;
case 'c':
partbuf = collect(fmt, stop, partbuf, partlen);
ret = snprintf(dst, size, partbuf, va_arg(ap, int));
break;
case 'f': case 'e': case 'E': case 'g': case 'G':
partbuf = collect(fmt, stop, partbuf, partlen);
if (flags & LONGDBL)
ret = snprintf(dst, size, partbuf,
va_arg(ap, long double));
else
ret = snprintf(dst, size, partbuf, va_arg(ap, double));
break;
default:
panic("vxsnprintf: invalid or unhandled format character");
}
/* at this point, ret is the return from a partial sprintf */
if (ret < 0) {
final = ret;
break;
}
final += ret;
dst += ret;
if (ret > size)
size = 0;
else
size -= ret;
}
free(partbuf);
return ret;
}

The above is 100% untested, and of course, you have to write
"collect" (which is pretty obvious, it is just a realloc and
memcpy and '\0'-terminate, or reuse existing buffer if big enough).
Modifying it to handle "*" (indirect) width and precision arguments
is left as an exercise. :)
 
A

A. Farber

Hmm, that's a big chunk you've typed Chris, thanks. Many realloc and
snprintf calls...
I guess it's easier for me to scavenge the bsd-snprintf.c from OpenSSH
portable

Regards
Alex

--
http://preferans.de/

Chris said:
... I can't figure out how to extend the last function I need -
the snprintf(). Namely I'd like to add an %a to its format
and for each corresponding string I'd call the xstrdup()
thus converting it application/x-www-form-urlencoded
and finally I'd replaced the %a itself by the normal %s
and call the normal (v)snprintf (and free()d the strings):

There is no way to insert or replace various arguments in a
va_list.

What you *can* do is handle each argument yourself. Admittedly
this will take a lot more code:
int
xsnprintf(char *dst, size_t size, char *fmt, ...)
{
va_list ap;
va_start(ap, fmt);

/* HERE:
loop through fmt,
replace %a by %s,
xstrdup(va_arg(ap, char*))
*/

vsnprintf(dst, size, fmt, ap);
va_end(ap);

/* HERE:
free() the strings xstrdup()ed above */
}

Instead:

int xsnprintf(char *dst, size_t size, const char *fmt, ...) {
va_list ap;
int ret;

va_start(ap, fmt);
ret = vxsnprintf(dst, size, fmt, ap); /* always use a v- version */
va_end(ap);
}

int vxsnprintf(char *dst, size_t size, const char *fmt, va_list ap) {
int final = 0, ret;
const char *stop;
char *partbuf = NULL;
size_t partlen = 0;
char *strarg;
int flags;
#define LLONG 2
#define LONG 1

while (*fmt != '\0') {
for (stop = fmt;;) {
stop = strchr(stop, '%');
if (stop == NULL) {
stop = fmt + strlen(fmt);
break;
}
if (stop[1] == '%') /* %% - not a conversion */
stop += 2;
else
break;
}
/* either stop[0] is '\0' or we have a conversion */
if (stop[0] == '\0') {
/* no more % conversions, just feed the rest to snprintf */
ret = snprintf(dst, size, fmt);
if (ret < 0)
final = ret;
else
final += ret;
break;
}
flags = 0;
stop++; /* skip over '%' */
reswitch:
switch (*stop++) {
case 'l':
/* %ld, %lu, %lld, %llu etc */
if (flags & LONG)
flags |= LLONG;
else
flags |= LONG;
goto reswitch;
case 'L':
flags |= LONGDBL;
goto reswitch;
case '#': case '.':
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
goto reswitch;
case 'd': case 'i':
partbuf = collect(fmt, stop, partbuf, partlen);
if (flags & LLONG)
ret = snprintf(dst, size, partbuf, va_arg(ap, long long));
else if (flags & LFLAG)
ret = snprintf(dst, size, partbuf, va_arg(ap, long));
else
ret = snprintf(dst, size, partbuf, va_arg(ap, int));
break;
case 'o': case 'u': case 'x': case 'X':
partbuf = collect(fmt, stop, partbuf, partlen);
if (flags & LLONG)
ret = snprintf(dst, size, partbuf,
va_arg(ap, unsigned long long));
else if (flags & LONG)
ret = snprintf(dst, size, partbuf,
va_arg(ap, unsigned long));
else
ret = snprintf(dst, size, partbuf,
va_arg(ap, unsigned int));
break;
case 'a': /* your new format */
partbuf = collect(fmt, stop, partbuf, partlen);
partbuf[partlen - 1] = 's';
strarg = xstrdup(va_arg(ap, char *));
ret = snprintf(dst, size, partbuf, strarg);
free(strarg);
break;
case 's':
partbuf = collect(fmt, stop, partbuf, partlen);
if (flags & LONG)
ret = snprintf(dst, size, partbuf, va_arg(ap, wchar_t *));
else
ret = snprintf(dst, size, partbuf, va_arg(ap, char *));
break;
case 'c':
partbuf = collect(fmt, stop, partbuf, partlen);
ret = snprintf(dst, size, partbuf, va_arg(ap, int));
break;
case 'f': case 'e': case 'E': case 'g': case 'G':
partbuf = collect(fmt, stop, partbuf, partlen);
if (flags & LONGDBL)
ret = snprintf(dst, size, partbuf,
va_arg(ap, long double));
else
ret = snprintf(dst, size, partbuf, va_arg(ap, double));
break;
default:
panic("vxsnprintf: invalid or unhandled format character");
}
/* at this point, ret is the return from a partial sprintf */
if (ret < 0) {
final = ret;
break;
}
final += ret;
dst += ret;
if (ret > size)
size = 0;
else
size -= ret;
}
free(partbuf);
return ret;
}

The above is 100% untested, and of course, you have to write
"collect" (which is pretty obvious, it is just a realloc and
memcpy and '\0'-terminate, or reuse existing buffer if big enough).
Modifying it to handle "*" (indirect) width and precision arguments
is left as an exercise. :)
 

Ask a Question

Want to reply to this thread or ask your own question?

You'll need to choose a username for the site, which only take a couple of moments. After that, you can post your question and our members will help you out.

Ask a Question

Members online

No members online now.

Forum statistics

Threads
473,982
Messages
2,570,190
Members
46,736
Latest member
zacharyharris

Latest Threads

Top