Netcdf quantize (#32)

* Add the source code * Add readme file * Update README.md LaTeX syntax * Add yaml workflow file for NetCDF tests * Reframe tests to use the discoverable unit test approach * Use regular netcdf * Allow the make command to run in the correct directory * Run the quantize executable * Relocate run command * Check that the test is running in the correct place * Syntax error * Syntax error * import numpy properly
MetOffice · Jul 10, 2024 · e370d06 · e370d06
1 parent 1d7cfbb
commit e370d06
Show file tree

Hide file tree

Showing 15 changed files with 487 additions and 0 deletions.
diff --git a/.github/workflows/run_netcdf_test.yml b/.github/workflows/run_netcdf_test.yml
@@ -0,0 +1,23 @@
+name: run-netcdf-test
+
+on:
+  push:
+    branches:
+      - main
+  pull_request:
+
+jobs:
+  netcdf_test:
+    name: netcdf test
+    runs-on: ubuntu-24.04
+    steps:
+      - uses: actions/checkout@v4
+      - name: Install dependencies
+        run: |
+            sudo apt update
+            sudo apt -yq install $(<netcdf_examples/dependencies_ncdf)
+            nc-config --version
+      - name: Build and run quantize test
+        run: |
+            python3 -m unittest discover -v -s netcdf_examples/
+        shell: bash
diff --git a/netcdf_examples/__init__.py b/netcdf_examples/__init__.py
diff --git a/netcdf_examples/dependencies_ncdf b/netcdf_examples/dependencies_ncdf
@@ -0,0 +1,5 @@
+g++
+libnetcdf-dev
+netcdf-bin
+python3-numpy
+python3-netcdf4
diff --git a/netcdf_examples/quantize_test/Makefile b/netcdf_examples/quantize_test/Makefile
@@ -0,0 +1,16 @@
+# Makefile intital
+
+CC = cc
+
+all: quantize
+
+quantize: quantize.c quantize.h quantize_params.c
+	$(CC) quantize.c -lnetcdf -o quantize.exe
+
+quantize_github: quantize.c quantize.h quantize_params.c
+	$(CC) quantize.c -lnetcdf -lm -o quantize.exe
+
+
+
+clean:
+	rm -rf *exe core *nc
diff --git a/netcdf_examples/quantize_test/README.md b/netcdf_examples/quantize_test/README.md
@@ -0,0 +1,29 @@
+# NetCDF quantize test
+
+The following test demonstrates the functionality of the NetCDF quantize feature, along with the deflate calls. This test requires NetCDF 4.9 or later. The choice of C rather than Fortran was made, as the C interface will be used for any applications within XIOS.
+
+The experiments are defined in the file `quantize_params.c`. There is a definition at the top for variable `NUM_PACKING_PARAMS` which defines the number of individual test files to be written. This sets the size of the array of `struct` `PackingParams`, which is also set in this file. The structure allows for a choice of different quantize methods, and significant digits
+
+```c
+struct PackingParams {
+  int compress; // perform compression, 1 for compression, 0 for not
+  int do_quantize; // perform quantization, 1 for quantization, 0 for not
+  int netcdf_quantize_mode; /* Chose netcdf quantization mode, choices:
+			       1) NC_QUANTIZE_BITGROOM
+			       2) NC_QUANTIZE_GRANULARBR
+			       3) NC_QUANTIZE_BITROUND */
+  int netcdf_nsd; /* Number of significant digits to preserve. For
+		     NC_QUANTIZE_BITGROOM and NC_QUANTIZE_GRANULARBR these are
+		     decimal significant figures, for NC_QUANTIZE_BITROUND
+		     these are binary signficant figures.
+		     Note: 1 decimal sf requires ~3.32 bits. */
+  char filename[200]; // Name of file to write to
+  char fieldname[200]; // Name of field to write to file
+};
+```
+
+The actual test code is in the file `quantize.c`. This test uses an analytic function for data, in a 2d array of size `dim1_size` x `dim2_size` set in the `main()` function. The analytic function is $z\left(x,y\right) = x \sin\left( \frac{2 \pi y}{y_\text{max}} \right)$.
+
+We loop over all the items in the `packing_params` array, writing the same data in each file defined by this array, setting up the quantization (if required) using the call to `nc_def_var_quantize()`, and the compression (if required) using the call to `nc_def_var_deflate()`. We gave hard wired the deflate to have no shuffle, and a deflate level of one.
+
+In addition to the writing of data, there are two timer calls, around the functions `nc_put_var_float()`, and `nc_close()`, giving individual and combined times for these two functions to return.
diff --git a/netcdf_examples/quantize_test/__init__.py b/netcdf_examples/quantize_test/__init__.py
diff --git a/netcdf_examples/quantize_test/quant_bg_3_comp.cdl b/netcdf_examples/quantize_test/quant_bg_3_comp.cdl
@@ -0,0 +1,30 @@
+netcdf quant_bg_3_comp {
+dimensions:
+	x = 10 ;
+	y = 10 ;
+variables:
+	float field(x, y) ;
+		field:_QuantizeBitGroomNumberOfSignificantDigits = 3 ;
+data:
+
+ field =
+  0, 0, 0, 0, 0, 0, -0, -5.738317e-42, -0, -5.738317e-42,
+  0, 0.5878906, 0.9509277, 0.9511718, 0.5876465, 3.590685e-09, -0.5876465, 
+    -0.9511718, -0.9509277, -0.5878906,
+  0, 1.175781, 1.901855, 1.902344, 1.175293, 7.18137e-09, -1.175293, 
+    -1.902344, -1.901855, -1.175781,
+  0, 1.763672, 2.852539, 2.853515, 1.763184, 1.077205e-08, -1.763184, 
+    -2.853515, -2.852539, -1.763672,
+  0, 2.351562, 3.803711, 3.804687, 2.350586, 1.436274e-08, -2.350586, 
+    -3.804687, -3.803711, -2.351562,
+  0, 2.939453, 4.753906, 4.755859, 2.938477, 1.794979e-08, -2.938477, 
+    -4.755859, -4.753906, -2.939453,
+  0, 3.527344, 5.705078, 5.707031, 3.526367, 2.154411e-08, -3.526367, 
+    -5.707031, -5.705078, -3.527344,
+  0, 4.115234, 6.65625, 6.658203, 4.113281, 2.513116e-08, -4.113281, 
+    -6.658203, -6.65625, -4.115234,
+  0, 4.703125, 7.607422, 7.609375, 4.701172, 2.872548e-08, -4.701172, 
+    -7.609375, -7.607422, -4.703125,
+  0, 5.291015, 8.558594, 8.562499, 5.289062, 3.23198e-08, -5.289062, 
+    -8.562499, -8.558594, -5.291015 ;
+}
diff --git a/netcdf_examples/quantize_test/quant_br_10b_comp.cdl b/netcdf_examples/quantize_test/quant_br_10b_comp.cdl
@@ -0,0 +1,30 @@
+netcdf quant_br_10b_comp {
+dimensions:
+	x = 10 ;
+	y = 10 ;
+variables:
+	float field(x, y) ;
+		field:_QuantizeBitRoundNumberOfSignificantBits = 10 ;
+data:
+
+ field =
+  0, 0, 0, 0, 0, 0, -0, -0, -0, -0,
+  0, 0.5878906, 0.9511719, 0.9511719, 0.5878906, 3.590685e-09, -0.5878906, 
+    -0.9511719, -0.9511719, -0.5878906,
+  0, 1.175781, 1.902344, 1.902344, 1.175781, 7.18137e-09, -1.175781, 
+    -1.902344, -1.902344, -1.175781,
+  0, 1.763672, 2.853516, 2.853516, 1.763672, 1.076842e-08, -1.763672, 
+    -2.853516, -2.853516, -1.763672,
+  0, 2.351562, 3.804688, 3.804688, 2.351562, 1.436274e-08, -2.351562, 
+    -3.804688, -3.804688, -2.351562,
+  0, 2.939453, 4.753906, 4.753906, 2.939453, 1.794251e-08, -2.939453, 
+    -4.753906, -4.753906, -2.939453,
+  0, 3.527344, 5.707031, 5.707031, 3.527344, 2.153683e-08, -3.527344, 
+    -5.707031, -5.707031, -3.527344,
+  0, 4.113281, 6.65625, 6.65625, 4.113281, 2.513116e-08, -4.113281, -6.65625, 
+    -6.65625, -4.113281,
+  0, 4.703125, 7.609375, 7.609375, 4.703125, 2.872548e-08, -4.703125, 
+    -7.609375, -7.609375, -4.703125,
+  0, 5.289062, 8.5625, 8.5625, 5.289062, 3.230525e-08, -5.289062, -8.5625, 
+    -8.5625, -5.289062 ;
+}
diff --git a/netcdf_examples/quantize_test/quant_gran_3_comp.cdl b/netcdf_examples/quantize_test/quant_gran_3_comp.cdl
@@ -0,0 +1,30 @@
+netcdf quant_gran_3_comp {
+dimensions:
+	x = 10 ;
+	y = 10 ;
+variables:
+	float field(x, y) ;
+		field:_QuantizeGranularBitRoundNumberOfSignificantDigits = 3 ;
+data:
+
+ field =
+  0, 0, 0, 0, 0, 0, -0, -0, -0, -0,
+  0, 0.5878906, 0.9511719, 0.9511719, 0.5878906, 3.587047e-09, -0.5878906, 
+    -0.9511719, -0.9511719, -0.5878906,
+  0, 1.171875, 1.898438, 1.898438, 1.171875, 7.18137e-09, -1.171875, 
+    -1.898438, -1.898438, -1.171875,
+  0, 1.765625, 2.851562, 2.851562, 1.765625, 1.076842e-08, -1.765625, 
+    -2.851562, -2.851562, -1.765625,
+  0, 2.351562, 3.804688, 3.804688, 2.351562, 1.437729e-08, -2.351562, 
+    -3.804688, -3.804688, -2.351562,
+  0, 2.9375, 4.757812, 4.757812, 2.9375, 1.792796e-08, -2.9375, -4.757812, 
+    -4.757812, -2.9375,
+  0, 3.523438, 5.703125, 5.703125, 3.523438, 2.153683e-08, -3.523438, 
+    -5.703125, -5.703125, -3.523438,
+  0, 4.117188, 6.65625, 6.65625, 4.117188, 2.514571e-08, -4.117188, -6.65625, 
+    -6.65625, -4.117188,
+  0, 4.703125, 7.609375, 7.609375, 4.703125, 2.869638e-08, -4.703125, 
+    -7.609375, -7.609375, -4.703125,
+  0, 5.289062, 8.5625, 8.5625, 5.289062, 3.230525e-08, -5.289062, -8.5625, 
+    -8.5625, -5.289062 ;
+}
diff --git a/netcdf_examples/quantize_test/quantize.c b/netcdf_examples/quantize_test/quantize.c
@@ -0,0 +1,113 @@
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h> 
+#include <math.h>
+#include <netcdf.h>
+#include <time.h>
+#include "quantize.h"
+#include "quantize_params.c"
+
+double clock_to_second(clock_t t) {
+  // Return double precision time in seconds
+  double time_taken;
+  time_taken = ((double) t) / CLOCKS_PER_SEC;
+  return time_taken;
+}
+
+void populateData(float* data_ptr, int dim1_size, int dim2_size) {
+  /* populate the data at address data_ptr with a 2d analytic function,
+     a sine wave along the j dimension, that scales along the i direction */
+  int ii, jj;
+  float dbl_dim2_size;
+  dbl_dim2_size = (float) dim2_size;
+  for (ii = 0; ii < dim1_size; ii++) {
+    for (jj = 0; jj < dim2_size; jj++) {
+      *(data_ptr + (ii * dim1_size) + jj) = \
+	sin(((2.0*PI)/dbl_dim2_size)*jj) *ii;
+    }
+  }
+}
+
+
+int main() {
+  int i;
+
+  // Define the number of dimensions and size of data
+  int ndims = 2;
+  int dim1_size = 10;
+  int dim2_size = 10;
+
+  float* data_ptr;
+  struct PackingParams* packing_params;
+  struct PackingParams my_param;
+
+  /* NetCDF related variables */
+  int ncid;
+  int x_dimid, y_dimid, varid;
+  int dimids[ndims];
+
+  // Time variable
+  clock_t t;
+  double write_time, close_time;
+
+  // Define our packing params from quantize_params.h
+  packing_params = define_params();
+
+  // Create data only once
+  data_ptr = (float*) calloc(dim1_size*dim2_size, sizeof(float));
+  populateData(data_ptr, dim1_size, dim2_size);
+
+
+  /* Loop over the array of packing_params defined in quantize_params.h,
+     setting up and writing the variable in data_ptr depending on the
+     individual parameter setting */
+  for (i = 0; i<NUM_PACKING_PARAMS; i++) {
+    my_param = *(packing_params + i);
+
+    // set up the netcdf file
+    nc_create(my_param.filename, NC_NETCDF4, &ncid);
+
+    nc_def_dim(ncid, "x", dim1_size, &x_dimid);
+    nc_def_dim(ncid, "y", dim2_size, &y_dimid);
+    dimids[0] = x_dimid;
+    dimids[1] = y_dimid;
+
+    // Define the variable
+    nc_def_var(ncid, my_param.fieldname, NC_FLOAT, ndims, dimids, &varid);
+
+    // Set up quantization if appropriate
+    if (my_param.do_quantize > 0) {
+      nc_def_var_quantize(ncid, varid, my_param.netcdf_quantize_mode, \
+			  my_param.netcdf_nsd);
+    }
+
+    // Set up compression if appropriate
+    if (my_param.compress > 0) {
+      nc_def_var_deflate(ncid, varid, 0, 1, 1);
+    }
+
+    // End definitition
+    nc_enddef(ncid);
+
+    // write the data into the file
+    t = clock();
+    nc_put_var_float(ncid, varid, data_ptr);
+    write_time = clock_to_second(clock() - t);
+
+    printf("File %s nc_put_var_float() takes %.4f s\n",
+	   my_param.filename, write_time);
+
+    //close the file
+    t = clock();
+    nc_close(ncid);
+    close_time = clock_to_second(clock() - t);
+    printf("File %s nc_close() takes %.4f s \n",
+	   my_param.filename, close_time);
+
+    printf("File %s total time: %.4f\n\n", my_param.filename,
+	   write_time + close_time);
+  }
+  free(data_ptr);
+  free(packing_params);
+  return 0;
+}
diff --git a/netcdf_examples/quantize_test/quantize.h b/netcdf_examples/quantize_test/quantize.h
@@ -0,0 +1,10 @@
+/* Function prototype for quantize_params.c */
+struct PackingParams *define_params();
+
+/* function prototypes for quantize.c */
+double clock_to_second(clock_t);
+void populateData(float*, int, int);
+int main();
+
+/* Definitions */
+#define PI 3.14159265
diff --git a/netcdf_examples/quantize_test/quantize_params.c b/netcdf_examples/quantize_test/quantize_params.c
@@ -0,0 +1,69 @@
+//How many experiments are we running?
+#define NUM_PACKING_PARAMS 5
+
+/* Define our struct to hold the packing parameters. This are populated
+   in the define_params function below, allowing a neat way of iterating
+   over experiments */
+struct PackingParams {
+  int compress; // perform compression, 1 for compression, 0 for not
+  int do_quantize; // perform quantization, 1 for quantization, 0 for not
+  int netcdf_quantize_mode; /* Chose netcdf quantization mode, choices:
+			       1) NC_QUANTIZE_BITGROOM
+			       2) NC_QUANTIZE_GRANULARBR
+			       3) NC_QUANTIZE_BITROUND */
+  int netcdf_nsd; /* Number of significant digits to preserve. For
+		     NC_QUANTIZE_BITGROOM and NC_QUANTIZE_GRANULARBR these are
+		     decimal significant figures, for NC_QUANTIZE_BITROUND
+		     these are binary signficant figures.
+		     Note: 1 decimal sf requires ~3.32 bits. */
+  char filename[200]; // Name of file to write to
+  char fieldname[200]; // Name of field to write to file
+};
+
+struct PackingParams *define_params() {
+
+  //Allocate our array of parameters
+  struct PackingParams* param_array;
+  param_array = (struct PackingParams*) \
+    malloc(NUM_PACKING_PARAMS*sizeof(struct PackingParams));
+
+  // define our parameters for this experiment
+  // Reference
+  param_array[0].compress = 0;
+  param_array[0].do_quantize = 0;
+  strcpy(param_array[0].filename, "reference.nc");
+  strcpy(param_array[0].fieldname, "field");
+
+  // Reference compress
+  param_array[1].compress = 1;
+  param_array[1].do_quantize = 0;
+  strcpy(param_array[1].filename, "reference_comp.nc");
+  strcpy(param_array[1].fieldname, "field");
+
+  // Bitgroom pack and compress, nsd = 3
+  param_array[2].compress = 1;
+  param_array[2].do_quantize = 1;
+  param_array[2].netcdf_quantize_mode = NC_QUANTIZE_BITGROOM;
+  param_array[2].netcdf_nsd = 3;
+  strcpy(param_array[2].filename, "quant_bg_3_comp.nc");
+  strcpy(param_array[2].fieldname, "field");
+
+  // Granular pack and compress, nsd = 3
+  param_array[3].compress = 1;
+  param_array[3].do_quantize = 1;
+  param_array[3].netcdf_quantize_mode = NC_QUANTIZE_GRANULARBR;
+  param_array[3].netcdf_nsd = 3;
+  strcpy(param_array[3].filename, "quant_gran_3_comp.nc");
+  strcpy(param_array[3].fieldname, "field");
+
+  // Bitround pack and compress, nsd = 10bit (3dec)
+  param_array[4].compress = 1;
+  param_array[4].do_quantize = 1;
+  param_array[4].netcdf_quantize_mode = NC_QUANTIZE_BITROUND;
+  param_array[4].netcdf_nsd = 10;
+  strcpy(param_array[4].filename, "quant_br_10b_comp.nc");
+  strcpy(param_array[4].fieldname, "field");
+
+
+  return param_array;
+}
diff --git a/netcdf_examples/quantize_test/reference.cdl b/netcdf_examples/quantize_test/reference.cdl
@@ -0,0 +1,29 @@
+netcdf reference {
+dimensions:
+	x = 10 ;
+	y = 10 ;
+variables:
+	float field(x, y) ;
+data:
+
+ field =
+  0, 0, 0, 0, 0, 0, -0, -0, -0, -0,
+  0, 0.5877852, 0.9510565, 0.9510565, 0.5877852, 3.589793e-09, -0.5877852, 
+    -0.9510565, -0.9510565, -0.5877852,
+  0, 1.17557, 1.902113, 1.902113, 1.17557, 7.179586e-09, -1.17557, -1.902113, 
+    -1.902113, -1.17557,
+  0, 1.763356, 2.853169, 2.853169, 1.763356, 1.076938e-08, -1.763356, 
+    -2.853169, -2.853169, -1.763356,
+  0, 2.351141, 3.804226, 3.804226, 2.351141, 1.435917e-08, -2.351141, 
+    -3.804226, -3.804226, -2.351141,
+  0, 2.938926, 4.755282, 4.755282, 2.938926, 1.794896e-08, -2.938926, 
+    -4.755282, -4.755282, -2.938926,
+  0, 3.526711, 5.706339, 5.706339, 3.526711, 2.153876e-08, -3.526711, 
+    -5.706339, -5.706339, -3.526711,
+  0, 4.114497, 6.657396, 6.657396, 4.114497, 2.512855e-08, -4.114497, 
+    -6.657396, -6.657396, -4.114497,
+  0, 4.702282, 7.608452, 7.608452, 4.702282, 2.871835e-08, -4.702282, 
+    -7.608452, -7.608452, -4.702282,
+  0, 5.290067, 8.559508, 8.559508, 5.290067, 3.230814e-08, -5.290067, 
+    -8.559508, -8.559508, -5.290067 ;
+}