F
Fei Liu
Yet another problem to deal with dynamic data type that can only be
determined at run time. For a netCDF file (a scientific data format), a
variable is defined with its associating dimensions, i.e. data(time, z,
x). Each dimension is defined as wel in the netCDF file, time(time),
z(z), x(x), for example
netcdf andrew_test_data {
dimensions:
XAX = 24 ;
bnds = 2 ;
YAX = 24 ;
ZAX = 24 ;
TAX = UNLIMITED ; // (24 currently)
variables:
double XAX(XAX) ;
XAX:units = "M" ;
XAXoint_spacing = "uneven" ;
XAX:axis = "X" ;
XAX:bounds = "XAX_bnds" ;
double XAX_bnds(XAX, bnds) ;
float WX(XAX) ;
WX:missing_value = -1.e+34f ;
WX:_FillValue = -1.e+34f ;
WX:long_name = "XBOXHI[GX=XAX]-XBOXLO[GX=XAX]" ;
double YAX(YAX) ;
YAX:units = "M" ;
YAXoint_spacing = "uneven" ;
YAX:axis = "Y" ;
YAX:bounds = "YAX_bnds" ;
double YAX_bnds(YAX, bnds) ;
float WY(YAX) ;
WY:missing_value = -1.e+34f ;
WY:_FillValue = -1.e+34f ;
WY:long_name = "YBOXHI[GY=YAX]-YBOXLO[GY=YAX]" ;
double ZAX(ZAX) ;
ZAX:units = "M" ;
ZAXoint_spacing = "uneven" ;
ZAX:axis = "Z" ;
ZAX:bounds = "ZAX_bnds" ;
double ZAX_bnds(ZAX, bnds) ;
float WZ(ZAX) ;
WZ:missing_value = -1.e+34f ;
WZ:_FillValue = -1.e+34f ;
WZ:long_name = "ZBOXHI[GZ=ZAX]-ZBOXLO[GZ=ZAX]" ;
double TAX(TAX) ;
TAX:units = "DAYS" ;
TAX:axis = "T" ;
TAX:bounds = "TAX_bnds" ;
double TAX_bnds(TAX, bnds) ;
float MXZT(TAX, ZAX, XAX) ;
MXZT:missing_value = -1.e+34f ;
MXZT:_FillValue = -1.e+34f ;
MXZT:long_name = "IF MODULO(VXZT,SBAD) GT NBAD THEN
VXZT" ;
MXZT:long_name_mod = "X=-9:36, Z=-9:36, T=-9:36" ;
To compute a running average alone X, Z of MXZT, the weight from the
bounding dimensions are applied (XAX_bnds, ZAX_bnds). To simplify the
situation, let's first consider a one dimensional data called data(x),
x(x), x_bnds(x, 2)
wx = x_bnds(x,1) - x_bnds(x,0)
avg_data = sum(data(i)*wx(i))/sum(wx(i))
Now generalize this to a variable with arbitrary number of dimension.
Let's again take the example, MXZT(t,z,x), x(x),x_bnds(x,2), z(z),
z_bnds(z,2), t(t), t_bnds(t,2)
Clearly, we can use a 3 dimensional array to hold the weight data to do
weighted average, here is a way to compute the 3D weight
weight = 1.
weight(i,j,k) *= wt(i)*wz(j)*wx(k)
But now the problem is since the dimension and ordering of dimension
(it could be data(t, y, x), data(t, z, y), data(t, z,y,x),
data(y,x).......) are unknown before hand and have to be farmed from
the data file. Correspondingly a generic algorithm is needed to compute
this weight variable. I have the following algorithm, it works but it's
slow...
int find_index(data_op_param * param, const string & dimtype, int i,
int j, int k, int l){
int index = find_axis_dim(param->idim, param->ndim, param->dimids,
dimtype); /* find the index of the named dimension */
if(index == -1) return index; /* the named dimension does not exist
for this variable */
switch(param->ndim){ /* depending on the real number of
dimension the data has */
case 1: return l; break;
case 2:
switch(index){
case 0: return k; break;
case 1: return l; break;
}
break;
case 3:
switch(index){
case 0: return j; break;
case 1: return k; break;
case 2: return l; break;
}
break;
case 4:
switch(index){
case 0: return i; break;
case 1: return j; break;
case 2: return k; break;
case 3: return l; break;
}
break;
default: handle_error("can't handle data with dimension size > 4");
}
return index;
}
// bound is NDIM array
void compute_bound(array<double> & bound, size_t * idimlen, int ndim,
data_op_param * param){
int i, j, k, l;
assert(param->ndim >= 1);
map<string, int>::const_iterator end = param->axis->end();
for(i = 0; i < (int)idimlen[0]; i ++)
for(j = 0; j < (int)idimlen[1]; j ++)
for(k = 0; k < (int)idimlen[2]; k ++)
for(l = 0; l < (int)idimlen[3]; l ++){
map<string, int>::const_iterator iter;
if( (param->config->opcode & XAVG) && (iter =
param->axis->find("x")) != end){
int dimid = (*iter).second;
assert(dimid >= 0);
int index = find_index(param, "x", i, j, k, l);
if(index == -1) continue;
bound(i,j,k,l) *= param->idim[dimid].bound_data[index];
}
if( (param->config->opcode & YAVG) && (iter =
param->axis->find("y")) != end){
int dimid = (*iter).second;
assert(dimid >= 0);
int index = find_index(param, "y", i, j, k, l);
if(index == -1) continue;
bound(i,j,k,l) *= param->idim[dimid].bound_data[index];
}
if( (param->config->opcode & ZAVG) && (iter =
param->axis->find("z")) != end){
int dimid = (*iter).second;
assert(dimid >= 0);
int index = find_index(param, "z", i, j, k, l);
if(index == -1) continue;
bound(i,j,k,l) *= param->idim[dimid].bound_data[index];
}
if( (param->config->opcode & TIMEAVG) && (iter =
param->axis->find("time")) != end){
int dimid = (*iter).second;
assert(dimid >= 0);
int index = find_index(param, "time", i, j, k, l);
if(index == -1) continue;
bound(i,j,k,l) *= param->idim[dimid].bound_data[index];
}
}
}
Can you give me some idea how to do this efficiently?
determined at run time. For a netCDF file (a scientific data format), a
variable is defined with its associating dimensions, i.e. data(time, z,
x). Each dimension is defined as wel in the netCDF file, time(time),
z(z), x(x), for example
netcdf andrew_test_data {
dimensions:
XAX = 24 ;
bnds = 2 ;
YAX = 24 ;
ZAX = 24 ;
TAX = UNLIMITED ; // (24 currently)
variables:
double XAX(XAX) ;
XAX:units = "M" ;
XAXoint_spacing = "uneven" ;
XAX:axis = "X" ;
XAX:bounds = "XAX_bnds" ;
double XAX_bnds(XAX, bnds) ;
float WX(XAX) ;
WX:missing_value = -1.e+34f ;
WX:_FillValue = -1.e+34f ;
WX:long_name = "XBOXHI[GX=XAX]-XBOXLO[GX=XAX]" ;
double YAX(YAX) ;
YAX:units = "M" ;
YAXoint_spacing = "uneven" ;
YAX:axis = "Y" ;
YAX:bounds = "YAX_bnds" ;
double YAX_bnds(YAX, bnds) ;
float WY(YAX) ;
WY:missing_value = -1.e+34f ;
WY:_FillValue = -1.e+34f ;
WY:long_name = "YBOXHI[GY=YAX]-YBOXLO[GY=YAX]" ;
double ZAX(ZAX) ;
ZAX:units = "M" ;
ZAXoint_spacing = "uneven" ;
ZAX:axis = "Z" ;
ZAX:bounds = "ZAX_bnds" ;
double ZAX_bnds(ZAX, bnds) ;
float WZ(ZAX) ;
WZ:missing_value = -1.e+34f ;
WZ:_FillValue = -1.e+34f ;
WZ:long_name = "ZBOXHI[GZ=ZAX]-ZBOXLO[GZ=ZAX]" ;
double TAX(TAX) ;
TAX:units = "DAYS" ;
TAX:axis = "T" ;
TAX:bounds = "TAX_bnds" ;
double TAX_bnds(TAX, bnds) ;
float MXZT(TAX, ZAX, XAX) ;
MXZT:missing_value = -1.e+34f ;
MXZT:_FillValue = -1.e+34f ;
MXZT:long_name = "IF MODULO(VXZT,SBAD) GT NBAD THEN
VXZT" ;
MXZT:long_name_mod = "X=-9:36, Z=-9:36, T=-9:36" ;
To compute a running average alone X, Z of MXZT, the weight from the
bounding dimensions are applied (XAX_bnds, ZAX_bnds). To simplify the
situation, let's first consider a one dimensional data called data(x),
x(x), x_bnds(x, 2)
wx = x_bnds(x,1) - x_bnds(x,0)
avg_data = sum(data(i)*wx(i))/sum(wx(i))
Now generalize this to a variable with arbitrary number of dimension.
Let's again take the example, MXZT(t,z,x), x(x),x_bnds(x,2), z(z),
z_bnds(z,2), t(t), t_bnds(t,2)
Clearly, we can use a 3 dimensional array to hold the weight data to do
weighted average, here is a way to compute the 3D weight
weight = 1.
weight(i,j,k) *= wt(i)*wz(j)*wx(k)
But now the problem is since the dimension and ordering of dimension
(it could be data(t, y, x), data(t, z, y), data(t, z,y,x),
data(y,x).......) are unknown before hand and have to be farmed from
the data file. Correspondingly a generic algorithm is needed to compute
this weight variable. I have the following algorithm, it works but it's
slow...
int find_index(data_op_param * param, const string & dimtype, int i,
int j, int k, int l){
int index = find_axis_dim(param->idim, param->ndim, param->dimids,
dimtype); /* find the index of the named dimension */
if(index == -1) return index; /* the named dimension does not exist
for this variable */
switch(param->ndim){ /* depending on the real number of
dimension the data has */
case 1: return l; break;
case 2:
switch(index){
case 0: return k; break;
case 1: return l; break;
}
break;
case 3:
switch(index){
case 0: return j; break;
case 1: return k; break;
case 2: return l; break;
}
break;
case 4:
switch(index){
case 0: return i; break;
case 1: return j; break;
case 2: return k; break;
case 3: return l; break;
}
break;
default: handle_error("can't handle data with dimension size > 4");
}
return index;
}
// bound is NDIM array
void compute_bound(array<double> & bound, size_t * idimlen, int ndim,
data_op_param * param){
int i, j, k, l;
assert(param->ndim >= 1);
map<string, int>::const_iterator end = param->axis->end();
for(i = 0; i < (int)idimlen[0]; i ++)
for(j = 0; j < (int)idimlen[1]; j ++)
for(k = 0; k < (int)idimlen[2]; k ++)
for(l = 0; l < (int)idimlen[3]; l ++){
map<string, int>::const_iterator iter;
if( (param->config->opcode & XAVG) && (iter =
param->axis->find("x")) != end){
int dimid = (*iter).second;
assert(dimid >= 0);
int index = find_index(param, "x", i, j, k, l);
if(index == -1) continue;
bound(i,j,k,l) *= param->idim[dimid].bound_data[index];
}
if( (param->config->opcode & YAVG) && (iter =
param->axis->find("y")) != end){
int dimid = (*iter).second;
assert(dimid >= 0);
int index = find_index(param, "y", i, j, k, l);
if(index == -1) continue;
bound(i,j,k,l) *= param->idim[dimid].bound_data[index];
}
if( (param->config->opcode & ZAVG) && (iter =
param->axis->find("z")) != end){
int dimid = (*iter).second;
assert(dimid >= 0);
int index = find_index(param, "z", i, j, k, l);
if(index == -1) continue;
bound(i,j,k,l) *= param->idim[dimid].bound_data[index];
}
if( (param->config->opcode & TIMEAVG) && (iter =
param->axis->find("time")) != end){
int dimid = (*iter).second;
assert(dimid >= 0);
int index = find_index(param, "time", i, j, k, l);
if(index == -1) continue;
bound(i,j,k,l) *= param->idim[dimid].bound_data[index];
}
}
}
Can you give me some idea how to do this efficiently?