You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
308 lines
11 KiB
308 lines
11 KiB
/* This is part of the netCDF package.
|
|
Copyright 2018 University Corporation for Atmospheric Research/Unidata.
|
|
See COPYRIGHT file for conditions of use.
|
|
|
|
This is a very simple example which writes a netCDF file with
|
|
Unicode names encoded with UTF-8. It is the NETCDF3 equivalent
|
|
of tst_unicode.c
|
|
|
|
Russ Rew, Ed Hartnett
|
|
*/
|
|
|
|
#include <config.h>
|
|
#include <stdlib.h>
|
|
#include <nc_tests.h>
|
|
#include "err_macros.h"
|
|
#include <netcdf.h>
|
|
#include <string.h>
|
|
|
|
/* The data file we will create. */
|
|
#define FILE_NAME "tst_utf8.nc"
|
|
#define NDIMS 1
|
|
#define NX 18
|
|
#define ENUM_VALUE 2
|
|
#define BORING_NAME "boring"
|
|
|
|
/* (unnormalized) UTF-8 encoding for Unicode 8-character "Hello" in Greek */
|
|
char name_utf8[] = "\xCE\x9A\xCE\xB1\xCE\xBB\xCE\xB7\xCE\xBC\xE1\xBD\xB3\xCF\x81\xCE\xB1";
|
|
|
|
/* NFC normalized UTF-8 for Unicode 8-character "Hello" in Greek */
|
|
char norm_utf8[] = "\xCE\x9A\xCE\xB1\xCE\xBB\xCE\xB7\xCE\xBC\xCE\xAD\xCF\x81\xCE\xB1";
|
|
|
|
/* This is the struct for the compound type. */
|
|
struct comp {
|
|
int i;
|
|
};
|
|
|
|
/* Given an ncid, check the file to make sure it has all the objects I
|
|
* expect. */
|
|
int
|
|
check_nc4_file(int ncid)
|
|
{
|
|
int varid, dimid, attnum, grpid, grpid2, grpid3, numgrps;
|
|
int numtypes, enum_typeid, comp_typeid;
|
|
int class_in;
|
|
size_t att_len, size_in, num_mem, nfields_in;
|
|
nc_type att_type, base_type_in;
|
|
char name_in[NC_MAX_NAME + 1], strings_in[NC_MAX_NAME + 1], value;
|
|
|
|
/* Check the group. */
|
|
if (nc_inq_grps(ncid, &numgrps, &grpid)) ERR;
|
|
if (numgrps != 1) ERR;
|
|
name_in[0] = 0;
|
|
if (nc_inq_grpname(grpid, name_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8))) ERR;
|
|
|
|
/* Check the variable. */
|
|
if (nc_inq_varid(grpid, name_utf8, &varid)) ERR;
|
|
if (nc_inq_varname(grpid, varid, name_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8))) ERR;
|
|
if (nc_inq_varid(grpid, norm_utf8, &varid)) ERR;
|
|
name_in[0] = 0;
|
|
if (nc_inq_varname(grpid, varid, name_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8))) ERR;
|
|
if (nc_get_var(grpid, varid, strings_in)) ERR;
|
|
if (strncmp(name_utf8, strings_in, sizeof(name_utf8))) ERR;
|
|
strings_in[0] = '\0'; /* Reset my string buffer. */
|
|
|
|
/* Check the dimension. */
|
|
if (nc_inq_dimid(grpid, name_utf8, &dimid)) ERR;
|
|
if (nc_inq_dimname(grpid, dimid, name_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8))) ERR;
|
|
if (nc_inq_dimid(grpid, norm_utf8, &dimid)) ERR;
|
|
if (nc_inq_dimname(grpid, dimid, name_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8))) ERR;
|
|
|
|
/* Check the attribute. We don't normalize data or attribute
|
|
* values, so get exactly what was put for the value, but
|
|
* normalized values for names. */
|
|
if (nc_inq_attid(grpid, varid, norm_utf8, &attnum)) ERR;
|
|
if (attnum) ERR;
|
|
attnum = 99; /* Reset. */
|
|
if (nc_inq_attid(grpid, varid, name_utf8, &attnum)) ERR;
|
|
if (attnum) ERR;
|
|
if (nc_inq_att(grpid, varid, norm_utf8, &att_type, &att_len)) ERR;
|
|
if (att_type != NC_CHAR || att_len != sizeof(name_utf8)) ERR;
|
|
if (nc_get_att_text(grpid, varid, norm_utf8, strings_in)) ERR;
|
|
if (strncmp(name_utf8, strings_in, sizeof(name_utf8))) ERR;
|
|
|
|
/* Check the enum type. */
|
|
if (nc_inq_grps(grpid, &numgrps, &grpid2)) ERR;
|
|
if (numgrps != 1) ERR;
|
|
if (nc_inq_typeids(grpid2, &numtypes, &enum_typeid)) ERR;
|
|
if (numtypes != 1) ERR;
|
|
if (nc_inq_user_type(grpid2, enum_typeid, name_in, &size_in, &base_type_in,
|
|
&nfields_in, &class_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, strlen(norm_utf8)) || size_in != 1 ||
|
|
base_type_in != NC_BYTE || nfields_in != 1 || class_in != NC_ENUM) ERR;
|
|
name_in[0] = size_in = base_type_in = 0;
|
|
if (nc_inq_enum(grpid2, enum_typeid, name_in, &base_type_in, &size_in, &num_mem)) ERR;
|
|
if (strncmp(norm_utf8, name_in, strlen(norm_utf8)) || size_in != 1 ||
|
|
base_type_in != NC_BYTE || num_mem != 1) ERR;
|
|
if (nc_inq_enum_member(grpid2, enum_typeid, 0, name_in, &value)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8)) || value != ENUM_VALUE) ERR;
|
|
|
|
/* Check the compound type. */
|
|
if (nc_inq_grps(grpid2, &numgrps, &grpid3)) ERR;
|
|
if (numgrps != 1) ERR;
|
|
if (nc_inq_typeids(grpid3, &numtypes, &comp_typeid)) ERR;
|
|
if (numtypes != 1) ERR;
|
|
name_in[0] = 0;
|
|
if (nc_inq_user_type(grpid3, comp_typeid, name_in, &size_in, &base_type_in,
|
|
&nfields_in, &class_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8)) || size_in != sizeof(struct comp) ||
|
|
base_type_in != NC_NAT || nfields_in != 1 || class_in != NC_COMPOUND) ERR;
|
|
size_in = nfields_in = 999;
|
|
if (nc_inq_compound(grpid3, comp_typeid, name_in, &size_in, &nfields_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8)) || size_in != sizeof(struct comp) ||
|
|
nfields_in != 1) ERR;
|
|
name_in[0] = 0;
|
|
if (nc_inq_compound_fieldname(grpid3, comp_typeid, 0, name_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8))) ERR;
|
|
return NC_NOERR;
|
|
}
|
|
|
|
/* Given an ncid, check the file to make sure it has all the objects I
|
|
* expect. */
|
|
int
|
|
check_classic_file(int ncid)
|
|
{
|
|
int varid, dimid, attnum;
|
|
size_t att_len;
|
|
nc_type att_type;
|
|
char name_in[sizeof(name_utf8) + 1], strings_in[sizeof(name_utf8) + 1];
|
|
|
|
/* Check the variable. */
|
|
if (nc_inq_varid(ncid, name_utf8, &varid)) ERR;
|
|
if (nc_inq_varname(ncid, varid, name_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8))) ERR;
|
|
if (nc_inq_varid(ncid, norm_utf8, &varid)) ERR;
|
|
name_in[0] = 0;
|
|
if (nc_inq_varname(ncid, varid, name_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8))) ERR;
|
|
if (nc_get_var_text(ncid, varid, strings_in)) ERR;
|
|
if (strncmp(name_utf8, strings_in, sizeof(name_utf8))) ERR;
|
|
strings_in[0] = '\0'; /* Reset my string buffer. */
|
|
|
|
/* Check the dimension. */
|
|
if (nc_inq_dimid(ncid, name_utf8, &dimid)) ERR;
|
|
if (nc_inq_dimname(ncid, dimid, name_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8))) ERR;
|
|
if (nc_inq_dimid(ncid, norm_utf8, &dimid)) ERR;
|
|
if (nc_inq_dimname(ncid, dimid, name_in)) ERR;
|
|
if (strncmp(norm_utf8, name_in, sizeof(norm_utf8))) ERR;
|
|
|
|
/* Check the attribute. We don't normalize data or attribute
|
|
* values, so get exactly what was put for the value, but
|
|
* normalized values for names. */
|
|
if (nc_inq_attid(ncid, varid, norm_utf8, &attnum)) ERR;
|
|
if (attnum) ERR;
|
|
attnum = 99; /* Reset. */
|
|
if (nc_inq_attid(ncid, varid, name_utf8, &attnum)) ERR;
|
|
if (attnum) ERR;
|
|
if (nc_inq_att(ncid, varid, norm_utf8, &att_type, &att_len)) ERR;
|
|
if (att_type != NC_CHAR || att_len != sizeof(name_utf8)) ERR;
|
|
if (nc_get_att_text(ncid, varid, norm_utf8, strings_in)) ERR;
|
|
if (strncmp(name_utf8, strings_in, sizeof(name_utf8))) ERR;
|
|
return NC_NOERR;
|
|
}
|
|
|
|
int
|
|
main(int argc, char **argv)
|
|
{
|
|
printf("\n*** Testing UTF-8 names.\n");
|
|
printf("*** creating UTF-8 names in classic model netcdf files...");
|
|
{
|
|
int ncid, varid, dimids[NDIMS];
|
|
int f;
|
|
|
|
for (f = NC_FORMAT_CLASSIC; f < NC_FORMAT_NETCDF4_CLASSIC; f++)
|
|
{
|
|
if (nc_set_default_format(f, NULL)) ERR;
|
|
if (nc_create(FILE_NAME, NC_CLOBBER, &ncid)) ERR;
|
|
|
|
/* Define various netcdf objects with a Unicode UTF-8 encoded name
|
|
* that must be normalized. Where possible, also use the utf8
|
|
* string as the value. The name will be normalized, but not the
|
|
* value. */
|
|
if (nc_def_dim(ncid, name_utf8, NX, &dimids[0])) ERR;
|
|
if (nc_def_var(ncid, name_utf8, NC_CHAR, NDIMS, dimids, &varid)) ERR;
|
|
if (nc_put_att_text(ncid, varid, name_utf8, sizeof(name_utf8), name_utf8)) ERR;
|
|
|
|
if (nc_enddef(ncid)) ERR;
|
|
|
|
/* Write var data. */
|
|
if (nc_put_var_text(ncid, varid, name_utf8)) ERR;
|
|
|
|
/* Check the file. */
|
|
check_classic_file(ncid);
|
|
|
|
if (nc_close(ncid)) ERR;
|
|
|
|
/* Reopen the file and check again. */
|
|
if (nc_open(FILE_NAME, NC_NOWRITE, &ncid)) ERR;
|
|
check_classic_file(ncid);
|
|
if (nc_close(ncid)) ERR;
|
|
} /* next format */
|
|
}
|
|
SUMMARIZE_ERR;
|
|
|
|
#define DIM1_NAME "d1"
|
|
#define VAR1_NAME "v1"
|
|
#define ATT1_NAME "a1"
|
|
|
|
printf("*** renaming to UTF-8 names in classic model netcdf files...");
|
|
{
|
|
int ncid, varid, dimids[NDIMS];
|
|
int f;
|
|
|
|
for (f = NC_FORMAT_CLASSIC; f < NC_FORMAT_NETCDF4_CLASSIC; f++)
|
|
{
|
|
if (nc_set_default_format(f, NULL)) ERR;
|
|
if (nc_create(FILE_NAME, NC_CLOBBER, &ncid)) ERR;
|
|
|
|
/* Create objects. */
|
|
if (nc_def_dim(ncid, DIM1_NAME, NX, &dimids[0])) ERR;
|
|
if (nc_rename_dim(ncid, 0, name_utf8)) ERR;
|
|
if (nc_def_var(ncid, name_utf8, NC_CHAR, NDIMS, dimids, &varid)) ERR;
|
|
if (nc_put_att_text(ncid, varid, ATT1_NAME, sizeof(name_utf8), name_utf8)) ERR;
|
|
if (nc_rename_att(ncid, 0, ATT1_NAME, name_utf8)) ERR;
|
|
|
|
if (nc_enddef(ncid)) ERR;
|
|
|
|
/* Write var data. */
|
|
if (nc_put_var_text(ncid, varid, name_utf8)) ERR;
|
|
|
|
/* Check the file. */
|
|
check_classic_file(ncid);
|
|
|
|
if (nc_close(ncid)) ERR;
|
|
|
|
/* Reopen the file and check again. */
|
|
if (nc_open(FILE_NAME, NC_NOWRITE, &ncid)) ERR;
|
|
check_classic_file(ncid);
|
|
if (nc_close(ncid)) ERR;
|
|
} /* next format */
|
|
}
|
|
SUMMARIZE_ERR;
|
|
|
|
printf("*** creating UTF-8 names in netcdf-4 file...");
|
|
{
|
|
int ncid, varid, grpid, comp_typeid, enum_typeid, grpid2, grpid3;
|
|
int dimids[NDIMS];
|
|
char my_int = ENUM_VALUE;
|
|
|
|
if (nc_create(FILE_NAME, NC_NETCDF4 | NC_CLOBBER, &ncid)) ERR;
|
|
|
|
/* Define various netcdf objects with a Unicode UTF-8 encoded name
|
|
* that must be normalized. Where possible, also use the utf8
|
|
* string as the value. The name will be normalized, but not the
|
|
* value. */
|
|
if (nc_def_grp(ncid, name_utf8, &grpid)) ERR;
|
|
if (nc_def_dim(grpid, name_utf8, NX, &dimids[0])) ERR;
|
|
if (nc_def_var(grpid, name_utf8, NC_CHAR, NDIMS, dimids, &varid)) ERR;
|
|
if (nc_put_att_text(grpid, varid, name_utf8, sizeof(name_utf8), name_utf8)) ERR;
|
|
|
|
if (nc_def_grp(grpid, "tmp", &grpid2)) ERR;
|
|
if (nc_def_enum(grpid2, NC_BYTE, name_utf8, &enum_typeid)) ERR;
|
|
if (nc_insert_enum(grpid2, enum_typeid, name_utf8, &my_int)) ERR;
|
|
|
|
if (nc_def_grp(grpid2, "tmp", &grpid3)) ERR;
|
|
if (nc_def_compound(grpid3, sizeof(struct comp), name_utf8, &comp_typeid)) ERR;
|
|
if (nc_insert_compound(grpid3, comp_typeid, name_utf8, offsetof(struct comp, i), NC_INT)) ERR;
|
|
|
|
/* Write var data. */
|
|
if (nc_put_var_text(grpid, varid, name_utf8)) ERR;
|
|
|
|
/* Check the file. */
|
|
check_nc4_file(ncid);
|
|
|
|
if (nc_close(ncid)) ERR;
|
|
|
|
/* Reopen the file and check again. */
|
|
if (nc_open(FILE_NAME, NC_NOWRITE, &ncid)) ERR;
|
|
check_nc4_file(ncid);
|
|
if (nc_close(ncid)) ERR;
|
|
}
|
|
SUMMARIZE_ERR;
|
|
printf("*** ensuring UTF-8 normaization is applied in rename...");
|
|
{
|
|
int ncid, varid;
|
|
char name_in[NC_MAX_NAME + 1];
|
|
|
|
if (nc_create(FILE_NAME, NC_NETCDF4 | NC_CLOBBER, &ncid)) ERR;
|
|
if (nc_def_var(ncid, BORING_NAME, NC_CHAR, 0, NULL, &varid)) ERR;
|
|
if (nc_rename_var(ncid, varid, name_utf8)) ERR;
|
|
if (nc_inq_varname(ncid, 0, name_in)) ERR;
|
|
if (!strcmp(name_in, norm_utf8)) ERR;
|
|
if (nc_close(ncid)) ERR;
|
|
|
|
/* Reopen the file and check again. */
|
|
if (nc_open(FILE_NAME, NC_NOWRITE, &ncid)) ERR;
|
|
if (nc_inq_varname(ncid, 0, name_in)) ERR;
|
|
if (!strcmp(name_in, norm_utf8)) ERR;
|
|
if (nc_close(ncid)) ERR;
|
|
}
|
|
SUMMARIZE_ERR;
|
|
FINAL_RESULTS;
|
|
}
|
|
|