Well I'd really appreciate if anyone could report C implementations
which the following program requests them to report.
This program attempts to demonstrate that the definition of pointer
arithmetic makes the informative note in C99's Annex J tough to
rationalize.
Under J.2:
"An array subscript is out of range, even if an object is apparently
accessible with the
given subscript (as in the lvalue expression a[1][7] given the
declaration int
a[4][5]) (6.5.6).
Here is the program. Thanks!:
/**
* bounds.c
*
* Check if a C implementation might encode bounds
* information into its pointer representation.
*
* (C) Shao Miller, 2010. All rights reserved.
* Permission is granted to:
* - Copy the source code
* - Compile the source code into an executable program
* - Execute the resulting program
*
* Please report any interesting cases! Thank you!
*/
#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
static void please_report(void) {
printf("PLEASE REPORT this C implementation's name and version to:
\n"
"Usenet: comp.lang.c: \"Bounds Checking as Undefined Behaviour?\"\n"
"Or:\n"
"
http://groups.google.com/group/comp.lang.c/browse_thread/thread/
c4c847820e1f25f1\n\n");
return;
}
unsigned char *claim1(void) {
/* Claim #1: Initialized to 3 at program startup */
static unsigned char c = 3;
return &c;
}
static unsigned char claim2(unsigned char param) {
unsigned char *cp;
static int fill_set = 0;
cp = claim1();
if (fill_set)
goto claim;
check:
if (param > 2) {
--param;
--*cp;
goto check;
}
--*cp;
--*cp;
fill_set = 1;
claim:
/* Claim #2: If param is 3, then by claim #1, we return 0 */
return *cp;
}
static void claim3(unsigned char *area, size_t count) {
unsigned char fill;
fill = claim2(3);
while (count)
area[--count] = fill;
/* Claim #3: By claim #2, area will be filled with 0 */
return;
}
struct ptr_wrapper {
/* No padding before the first member */
int *ip;
/* Possible unspecified padding */
};
int main(void) {
struct ptr_wrapper s1, s2, final1, final2;
unsigned char s1_copy[sizeof (struct ptr_wrapper)];
unsigned char s2_copy[sizeof (struct ptr_wrapper)];
unsigned char mixer1[sizeof s1_copy];
unsigned char mixer2[sizeof s2_copy];
void *vp;
int (*inta)[10];
size_t sz = sizeof *inta;
unsigned char *copier;
int encoded_bounds;
int *oob_tester;
vp = malloc(sz);
if (!vp) {
printf("Out of memory. Sorry.\n");
return 1;
}
/**
* The allocated space might be an object.
* It might be an array object.
* What are its bounds at this moment?
* What is the type for the object(s)?
*/
s1.ip = vp;
/**
* Is pointer arithmetic with s1.ip well-defined? How many
elements?
* Claim #4: We have not modified any values in the allocated space,
* so we haven't established an effective type for it yet.
*/
/* Copy s1 */
sz = sizeof s1;
copier = (unsigned char *)&s1;
while (sz) {
--sz;
s1_copy[sz] = copier[sz];
}
inta = vp;
s2.ip = *inta;
/**
* Is pointer arithmetic with s1.ip well-defined? How many
elements?
* Claim #5: We have not modified any values in the allocated space,
* so we haven't established an effective type for it yet.
*/
/* Copy s2 */
sz = sizeof s2;
copier = (unsigned char *)&s2;
while (sz) {
--sz;
s2_copy[sz] = copier[sz];
}
/* Fill mixer1 */
claim3(mixer1, sizeof mixer1);
/* Claim #6: By claim #3, mixer1 is filled with 0 */
/* Fill mixer2 */
claim3(mixer2, sizeof mixer2);
/* Claim #7: By claim #3, mixer2 is filled with 0 */
/* Mix s1_copy into mixer1 */
sz = sizeof s1_copy;
copier = (unsigned char *)s1_copy;
while (sz) {
--sz;
mixer1[sz] |= copier[sz];
}
/* Claim #8: By claim #6 and the ORing above, mixer1 is a copy of s1
*/
/* Mix s2_copy into mixer2 */
sz = sizeof s2_copy;
copier = (unsigned char *)s2_copy;
while (sz) {
--sz;
mixer2[sz] |= copier[sz];
}
/* Claim #9: By claim #7 and the ORing above, mixer2 is a copy of s2
*/
/**
* Compare the pointer representations copied from s1 and s2.
* Since there is no padding before the pointer member, these are
* the first bytes in the sequence.
*/
sz = sizeof s1.ip;
encoded_bounds = 0;
while (sz) {
--sz;
if (mixer1[sz] != mixer2[sz])
encoded_bounds = 1;
}
if (encoded_bounds) {
printf("This C implementation may encode bounds in its pointer"
" representation!\n");
please_report();
/* Check for comparison equality */
if (s1.ip == s2.ip) {
printf("Furthermore, this C implementation compares\n"
"pointers with different bounds as being equal!\n");
please_report();
}
}
/* Copy mixer1 into final1 */
sz = sizeof mixer1;
copier = (unsigned char *)&final1;
while (sz) {
--sz;
copier[sz] = mixer1[sz];
}
/* Copy mixer2 into final2 */
sz = sizeof mixer2;
copier = (unsigned char *)&final2;
while (sz) {
--sz;
copier[sz] = mixer2[sz];
}
/**
* Have any bounds implied by the original pointers carried across
* into final1 and final2? How can a C implementation have passed
* them along?
*/
printf("Run-time test for this C implementation's bounds-checking.
\n"
"If you do not see a message indicating success down below, then
\n");
please_report();
/* Do we establish the int[10] effective type here? */
final2.ip[9] = 5;
/* Is the pointer arithmetic implied below well-defined? */
final2.ip[10] = 5;
/* Is the pointer arithmetic implied below well-defined? */
final1.ip[10] = 5;
/* Perhaps somehow, the implementation will have noted OOB? */
/* How about this reasoning? */
oob_tester = final2.ip;
oob_tester++;
/* If the first element is an int[1], now we point one-past. Do it
again. */
oob_tester++;
/* But, perhaps there was an int[1] there, too. We point one-past
it now. */
oob_tester++;
/* And so on */
oob_tester++;
oob_tester++;
oob_tester++;
oob_tester++;
oob_tester++;
oob_tester++; /* We are pointing at a ninth int element */
oob_tester++; /* We are pointing one-past a ninth int element */
*oob_tester = 5; /* Out-of-bounds? */
/* Put differently, */
oob_tester = final2.ip;
*((((((((((oob_tester + 1) + 1) + 1) + 1) + 1) + 1) + 1) + 1) + 1) +
1) = 5;
/* Versus */
*(oob_tester + 10) = 5;
printf("Bounds-checking test succeeded.\n\n");
return 0;
}