L
lancer6238
Dear all,
I'm trying to implement the Smith-Waterman algorithm for DNA sequence
alignment. I'm aligning a 2040 bp query sequence against a 5040 bp
sequence. I'll be trying to implement a parallel version of it later,
that's why I have the MPI timer function to measure the speedup.
seq[0] and foundseq0 are the query sequence. size is the length of the
sequences.
The problem seems to occur when I'm trying to increase the memory
allocated to the 2 1D arrays foundseq0 and foundseq1. When compiling,
I get the warnings
warning: assignment makes pointer from integer without a cast
warning: assignment makes pointer from integer without a cast
for the lines
if (temp = realloc(foundseq0, sizeof(char) * (count + foundsize[0]))
== NULL)
and
if (temp = realloc(foundseq1, sizeof(char) * (count + foundsize[k]))
== NULL)
although temp, foundseq0 and foundseq1 are char pointers.
I run the program, and I get the "Segmentation Fault" error and the
program stops right after the
printf("A\n");
statement.
However, the program works fine for very small sequences, 14bp against
13bp, and there is no need to reallocate memory.
Please advise.
Below is the code:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#define MAX_SEQ 6000 // maximum length of sequence
#define N 2 // total number of sequences
/* Assume that the number of sequences in the database to be compared
with the
query sequence is known, and that the size of each sequence is also
known. */
struct table
{
int previ;
int prevj;
double value;
};
int main(int argc, char** argv)
{
FILE *fin, *fin1;
char input[FILENAME_MAX], *foundseq0, *foundseq1, *temp;
static char seq[N][MAX_SEQ+1];
double max = 0.0, maxscore = 0.0, test, start, end;
int size[N], i = 0, k, j = 0, stop = 0, count = 1, c;
int row = 1, maxrow = 0, column = 1, maxcolumn = 0;
int prevrow, prevcol, foundsize[N];
struct table **cell;
MPI_Init(&argc, &argv);
fin = fopen("files3.txt", "r");
if (!fin)
{
printf("Input file cannot be opened!\n");
exit(0);
}
while (fscanf(fin, "%s", input) != EOF)
{
if (i >= N)
{
printf("Number of sequences is more than that allowed!\n");
exit(0);
}
fin1 = fopen(input, "r");
if (!fin1)
{
printf("Input sequence file cannot be opened!\n");
exit(0);
}
printf("%s\n", input);
fscanf(fin1, "%d ", &size);
printf("%d\n", size);
while ((c = fgetc(fin1)) != EOF)
{
if (c != '\n')
{
if (j >= MAX_SEQ)
{
printf("Sequence is longer than that allowed!\n");
exit(0);
}
seq[j] = c;
j++;
}
}
printf("\n");
j = 0;
i++;
fclose(fin1);
}
for (i = 0 ; i < N ; i++)
foundsize = size;
printf("Finished reading from files\n");
MPI_Barrier (MPI_COMM_WORLD);
start = MPI_Wtime();
/* Start DNA sequence comparison */
for (k = 1 ; k < N ; k++)
{
cell = (struct table**)calloc(size[0]+1, sizeof(struct table*));
for (i = 0 ; i < size[0]+1 ; i++)
cell = (struct table*)calloc(size[k]+1, sizeof(struct table));
printf("Initialization\n");
for (i = 1 ; i <= size[0] ; i++)
{
for (j = 1 ; j <= size[k] ; j++)
{
if (seq[0][i-1] == seq[k][j-1])
cell[j].value = 1.0;
else
cell[j].value = -1.0/3.0;
}
}
/* Calculate scoring matrix */
while (!stop)
{
for (i = row-1 ; i >= 0 ; i--) // check subcolumn
{
test = cell[column].value - (1.0 + (1.0/3.0)*(double)
(row-i));
if (test > max)
{
max = test;
cell[row][column].previ = i;
cell[row][column].prevj = column;
}
}
for (j = column-1 ; j >= 0 ; j--) // check subrow
{
test = cell[row][j].value - (1.0 + (1.0/3.0) * (double)
(column-j));
if (test > max)
{
max = test;
cell[row][column].previ = row;
cell[row][column].prevj = j;
}
}
test = cell[row-1][column-1].value + cell[row][column].value;
if (test > max)
{
max = test;
cell[row][column].previ = row-1;
cell[row][column].prevj = column-1;
}
cell[row][column].value = max;
/* find maximum score in matrix */
if (maxscore < max)
{
maxscore = max;
maxrow = row;
maxcolumn = column;
}
max = 0.0; // reset max score of cell
if (column >= size[k])
{
column = 1;
row++;
printf("Row = %d\n", row);
}
else
column++;
if (column == 1 && row > size[0])
stop = 1;
}
/* DNA sequence alignment */
foundsize[0] = size[0];
foundseq0 = (char*)calloc(foundsize[0], sizeof(char));
foundseq1 = (char*)calloc(foundsize[k], sizeof(char));
row = maxrow;
column = maxcolumn;
stop = 0;
count = 0;
printf("Alignment\n");
while (!stop)
{
/* store the DNA alignments */
prevrow = cell[row][column].previ;
prevcol = cell[row][column].prevj;
if (cell[row][column].value == 0.0)
break;
if (row == prevrow)
foundseq0[count] = '-';
else
foundseq0[count] = seq[0][row-1];
if (column == prevcol)
foundseq1[count] = '-';
else
foundseq1[count] = seq[k][column-1];
row = prevrow;
column = prevcol;
count++;
printf("Count = %d\n", count);
if (count >= foundsize[0])
{
printf("A\n"); <--- Program stops here
if (temp = realloc(foundseq0, sizeof(char) * (count +
foundsize[0])) == NULL)
{
printf("ERROR: realloc failed");
exit(0);
}
foundsize[0] += count;
foundseq0 = temp;
}
if (count >= foundsize[k])
{
printf("B\n");
if (temp = realloc(foundseq1, sizeof(char) * (count +
foundsize[k])) == NULL)
{
printf("ERROR: realloc failed");
exit(0);
}
foundsize[k] += count;
foundseq1 = temp;
}
}
/* Print the alignment */
for (i = count-1 ; i >= 0 ; i--)
printf("%c", foundseq0);
printf("\n");
for (i = count-1 ; i >= 0 ; i--)
{
if (foundseq1 == foundseq0)
printf("|");
else
printf(" ");
}
printf("\n");
for (i = count-1 ; i >= 0 ; i--)
printf("%c", foundseq1);
printf("\n");
free(foundseq0);
free(foundseq1);
for (i = 0 ; i < size[0]+1 ; i++)
free(cell);
free(cell);
}
end = MPI_Wtime();
printf("Elapsed time = %lfs\n", end-start);
fclose(fin);
MPI_Finalize();
return 0;
}
Thank you.
Regards,
Rayne
I'm trying to implement the Smith-Waterman algorithm for DNA sequence
alignment. I'm aligning a 2040 bp query sequence against a 5040 bp
sequence. I'll be trying to implement a parallel version of it later,
that's why I have the MPI timer function to measure the speedup.
seq[0] and foundseq0 are the query sequence. size is the length of the
sequences.
The problem seems to occur when I'm trying to increase the memory
allocated to the 2 1D arrays foundseq0 and foundseq1. When compiling,
I get the warnings
warning: assignment makes pointer from integer without a cast
warning: assignment makes pointer from integer without a cast
for the lines
if (temp = realloc(foundseq0, sizeof(char) * (count + foundsize[0]))
== NULL)
and
if (temp = realloc(foundseq1, sizeof(char) * (count + foundsize[k]))
== NULL)
although temp, foundseq0 and foundseq1 are char pointers.
I run the program, and I get the "Segmentation Fault" error and the
program stops right after the
printf("A\n");
statement.
However, the program works fine for very small sequences, 14bp against
13bp, and there is no need to reallocate memory.
Please advise.
Below is the code:
#include <stdio.h>
#include <stdlib.h>
#include <mpi.h>
#define MAX_SEQ 6000 // maximum length of sequence
#define N 2 // total number of sequences
/* Assume that the number of sequences in the database to be compared
with the
query sequence is known, and that the size of each sequence is also
known. */
struct table
{
int previ;
int prevj;
double value;
};
int main(int argc, char** argv)
{
FILE *fin, *fin1;
char input[FILENAME_MAX], *foundseq0, *foundseq1, *temp;
static char seq[N][MAX_SEQ+1];
double max = 0.0, maxscore = 0.0, test, start, end;
int size[N], i = 0, k, j = 0, stop = 0, count = 1, c;
int row = 1, maxrow = 0, column = 1, maxcolumn = 0;
int prevrow, prevcol, foundsize[N];
struct table **cell;
MPI_Init(&argc, &argv);
fin = fopen("files3.txt", "r");
if (!fin)
{
printf("Input file cannot be opened!\n");
exit(0);
}
while (fscanf(fin, "%s", input) != EOF)
{
if (i >= N)
{
printf("Number of sequences is more than that allowed!\n");
exit(0);
}
fin1 = fopen(input, "r");
if (!fin1)
{
printf("Input sequence file cannot be opened!\n");
exit(0);
}
printf("%s\n", input);
fscanf(fin1, "%d ", &size);
printf("%d\n", size);
while ((c = fgetc(fin1)) != EOF)
{
if (c != '\n')
{
if (j >= MAX_SEQ)
{
printf("Sequence is longer than that allowed!\n");
exit(0);
}
seq[j] = c;
j++;
}
}
printf("\n");
j = 0;
i++;
fclose(fin1);
}
for (i = 0 ; i < N ; i++)
foundsize = size;
printf("Finished reading from files\n");
MPI_Barrier (MPI_COMM_WORLD);
start = MPI_Wtime();
/* Start DNA sequence comparison */
for (k = 1 ; k < N ; k++)
{
cell = (struct table**)calloc(size[0]+1, sizeof(struct table*));
for (i = 0 ; i < size[0]+1 ; i++)
cell = (struct table*)calloc(size[k]+1, sizeof(struct table));
printf("Initialization\n");
for (i = 1 ; i <= size[0] ; i++)
{
for (j = 1 ; j <= size[k] ; j++)
{
if (seq[0][i-1] == seq[k][j-1])
cell[j].value = 1.0;
else
cell[j].value = -1.0/3.0;
}
}
/* Calculate scoring matrix */
while (!stop)
{
for (i = row-1 ; i >= 0 ; i--) // check subcolumn
{
test = cell[column].value - (1.0 + (1.0/3.0)*(double)
(row-i));
if (test > max)
{
max = test;
cell[row][column].previ = i;
cell[row][column].prevj = column;
}
}
for (j = column-1 ; j >= 0 ; j--) // check subrow
{
test = cell[row][j].value - (1.0 + (1.0/3.0) * (double)
(column-j));
if (test > max)
{
max = test;
cell[row][column].previ = row;
cell[row][column].prevj = j;
}
}
test = cell[row-1][column-1].value + cell[row][column].value;
if (test > max)
{
max = test;
cell[row][column].previ = row-1;
cell[row][column].prevj = column-1;
}
cell[row][column].value = max;
/* find maximum score in matrix */
if (maxscore < max)
{
maxscore = max;
maxrow = row;
maxcolumn = column;
}
max = 0.0; // reset max score of cell
if (column >= size[k])
{
column = 1;
row++;
printf("Row = %d\n", row);
}
else
column++;
if (column == 1 && row > size[0])
stop = 1;
}
/* DNA sequence alignment */
foundsize[0] = size[0];
foundseq0 = (char*)calloc(foundsize[0], sizeof(char));
foundseq1 = (char*)calloc(foundsize[k], sizeof(char));
row = maxrow;
column = maxcolumn;
stop = 0;
count = 0;
printf("Alignment\n");
while (!stop)
{
/* store the DNA alignments */
prevrow = cell[row][column].previ;
prevcol = cell[row][column].prevj;
if (cell[row][column].value == 0.0)
break;
if (row == prevrow)
foundseq0[count] = '-';
else
foundseq0[count] = seq[0][row-1];
if (column == prevcol)
foundseq1[count] = '-';
else
foundseq1[count] = seq[k][column-1];
row = prevrow;
column = prevcol;
count++;
printf("Count = %d\n", count);
if (count >= foundsize[0])
{
printf("A\n"); <--- Program stops here
if (temp = realloc(foundseq0, sizeof(char) * (count +
foundsize[0])) == NULL)
{
printf("ERROR: realloc failed");
exit(0);
}
foundsize[0] += count;
foundseq0 = temp;
}
if (count >= foundsize[k])
{
printf("B\n");
if (temp = realloc(foundseq1, sizeof(char) * (count +
foundsize[k])) == NULL)
{
printf("ERROR: realloc failed");
exit(0);
}
foundsize[k] += count;
foundseq1 = temp;
}
}
/* Print the alignment */
for (i = count-1 ; i >= 0 ; i--)
printf("%c", foundseq0);
printf("\n");
for (i = count-1 ; i >= 0 ; i--)
{
if (foundseq1 == foundseq0)
printf("|");
else
printf(" ");
}
printf("\n");
for (i = count-1 ; i >= 0 ; i--)
printf("%c", foundseq1);
printf("\n");
free(foundseq0);
free(foundseq1);
for (i = 0 ; i < size[0]+1 ; i++)
free(cell);
free(cell);
}
end = MPI_Wtime();
printf("Elapsed time = %lfs\n", end-start);
fclose(fin);
MPI_Finalize();
return 0;
}
Thank you.
Regards,
Rayne