The problem with strings bigger than 2GB (PTRDIFF_MAX) is not relevant
in the environment of lcc-win32.
I've left the comments in the code for comp.lang.c purposes.
I will add code in the first part of the strrepl function to test for
the conditions you mention and call your function if the requirements
are met.
We can do better than that. I hinted at how but got inspired to implement
it.
The second-hardest scenario (input and output pointers alias; replacement
token shorter than or equal to replaced token) can be dealt with easily by
modifying the function I posted to use memmove instead of memcpy in one of
the loop body calls, and at the end instead of strcpy.
The worst-case scenario (as above but replacement token longer than
replaced token) can be dealt with using a reverse strstr function. I've
implemented a naive portable version but as the compiler author you may be
able to improve on it using asm tricks. Anyhow gcc optimises it pretty
well.
So now every code-path in the function is avoiding redundant operations.
Even for worst-case scenario with the non-library str_rev_str
implementation the new function runs about 150 times faster than the
original for large strings with many small replacements.
In response to Dik T. Winter: yes, adding NULL-handling is trivial, but
IMO it unnecessarily clutters the code and an empty replacement string
is best represented by "" alone. This is consistent with other string
library functions.
Mildly tested:
/** A version of strstr that works in reverse - starting at string end and
* searching towards the beginning.
*
* strend and tokend must point to the last char before the terminating nul,
* not to the nul itself.
*
* Returns NULL on not found, otherwise a pointer to the last char of tok
* within str ("last char" with the above meaning).
*/
const char *str_rev_str(const char *strend, const char *strstart,
const char *tokend, const char *tokstart) {
const char *t, *last, *p;
for (p = strend; ; p = last - 1) {
for (; *p != *tokend; p--) {
if (p == strstart)
return NULL;
}
last = p;
t = tokend;
do {
if (t == tokstart) {
/* this is undefined if tokend - tokstart > PTRDIFF_MAX */
return p + (tokend - tokstart);
} else if (p == strstart)
return NULL;
} while (*--p == *--t);
if (last == strstart)
return NULL;
}
}
/** Call only when the string is being modified in-place and newlen > oldlen
*/
size_t repl_inplace_rev(char *str, const char *old, const char *new,
size_t oldlen, size_t newlen, size_t orglen, size_t retlen) {
const char *oldend = old + oldlen - 1; size_t count = 0;
char *r = str + retlen;
const char *q, *p = str + orglen - 1;
*r = '\0';
for(; (q = str_rev_str(p, str, oldend, old)) != NULL; p = q - oldlen) {
/* this is undefined if p - q > PTRDIFF_MAX */
ptrdiff_t l = p - q;
count++;
memmove(r - l, q + 1, l);
r -= l;
memcpy(r - newlen, new, newlen);
r -= newlen;
if (q - str < oldlen)
/* don't drop off the beginning of the string when there's a
* match at the beginning */
break;
}
return count;
}
size_t repl_get_lengths(const char *str, const char *old, size_t oldlen,
size_t newlen, size_t *orglen)
{
const char *p, *q;
size_t retlen;
if (oldlen != newlen) {
size_t count = 0;
for (p = str; (q = strstr(p, old)) != NULL; p = q + oldlen)
count++;
/* this is undefined if p - str > PTRDIFF_MAX */
*orglen = p - str + strlen(p);
retlen = *orglen + count * (newlen - oldlen);
} else
retlen = *orglen = strlen(str);
return retlen;
}
size_t str_repl(const char *str, const char *old, const char *new, char *ret) {
char *r;
const char *p, *q;
size_t count = 0;
size_t oldlen = strlen(old);
size_t newlen = strlen(new);
if (ret == NULL || (str == ret && newlen > oldlen)) {
size_t orglen;
size_t retlen = repl_get_lengths(str, old, oldlen, newlen, &orglen);
if (ret == NULL)
return retlen + 1;
count = repl_inplace_rev(ret, old, new, oldlen, newlen, orglen, retlen);
} else {
for (r = ret, p = str; (q = strstr(p, old)) != NULL; p = q + oldlen) {
/* this is undefined if q - p > PTRDIFF_MAX */
ptrdiff_t l = q - p;
/* when ret can't alias str, can use memcpy instead */
memmove(r, p, l);
r += l;
memcpy(r, new, newlen);
r += newlen;
count++;
}
/* when ret can't alias str, can instead use strcpy(r, p); */
memmove(r, p, strlen(p) + 1);
}
return count;
}