# -*- coding: utf-8 -*-
"""
Contains objects for interfacing ``gridwxcomp`` with gdal command line tools
used for spatial interpolation of scattered point data.
"""
import os
import subprocess
import xml.etree.cElementTree as ET
from copy import copy
from pathlib import Path
from xml.dom import minidom
import numpy as np
import pandas as pd
# allows for CL script usage if gridwxcomp not installed
try:
from .spatial import get_subgrid_bounds, zonal_stats, calc_pt_error
from .plot import station_bar_plot
except:
from spatial import get_subgrid_bounds, zonal_stats, calc_pt_error
from plot import station_bar_plot
[docs]class InterpGdal(object):
"""
Usage of gdal command line tools within ``gridwxcomp``, currently
utilizes the `gdal_grid <https://www.gdal.org/gdal_grid.html>`_
command line tool.
Arguments:
summary_csv_path (str): path to [var]_summary_comp CSV file created
by :mod:`gridwxcomp.calc_bias_ratios` containing point bias ratios,
lat, and long.
Attributes:
CELL_SIZE (float): resolution of gridMET dataset in decimal degrees.
summary_csv_path (:obj:`pathlib.Path`): absolute path object to
input ``summary_csv_path``.
layers (list): list of layers in ``summary_csv_path`` to interpolate
e.g. when using :meth:`InterpGdal.gdal_grid` with ``layer="all"``
defaults to :attr:`InterpGdal.default_layers`.
grid_bounds (tuple or None): default None. Extent for interpolation
raster in order (min long, max long, min lat, max lat).
interp_meth (str): default 'invdist'. gdal_grid interpolation method.
interped_rasters (list): empty list that is appended with
:obj:`pathlib.Path` objects to interpolated rasters after using
:meth:`InterpGdal.gdal_grid`.
params (dict or None): default None. After :meth:`InterpGdal.gdal_grid`
``params`` is updated with the last used interpolation parameters
in the form of a dictionary with parameter names as keys.
Example:
The :class:`InterpGdal` class is useful for experimenting with multiple
interpolation algorithms provided by gdal which are optimized and
often sensitive to multiple parameters. We can use the object
to loop over a range of parameter combinations to test how algorithms
perform, we might pick a single layer to test, in this case the
growing season bias ratios between station and gridMET reference
evapotranspiration (etr_mm). Below is a routine to conduct a
basic sensitivity analysis of the power and smooth parameters of the
inverse distance to a power interpolation method,
>>> from gridwxcomp import InterpGdal
>>> import os
>>> # create instance variable from summary csv
>>> summary_file = 'PATH/TO/etr_mm_summary_comp.csv'
>>> # create a InterpGdal instance
>>> test = InterpGdal(summary_file)
>>> layer = 'growseason_mean' # could be a list
>>> # run inverse distance interpolation with different params
>>> for p in range(1,10):
>>> for s in [0, 0.5, 1, 1.5, 2]:
>>> # build output directory based on parameter sets
>>> out_dir = os.path.join('spatial', 'invdist',
>>> 'power={}_smooth={}'.format(p, s))
>>> params = {'power': p, 'smooth': s}
>>> test.gdal_grid(out_dir=out_dir, layer=layer, params=params)
Note, we did not assign the interpolation method 'invdist' because it
is the default. To use another interpolation method we would
assign the ``interp_meth`` kwarg to :meth:`InterpGdal.gdal_grid`.
Similarly, we could experiment with other parameters which all can be
found in the class attribute :attr:`InterpGdal.default_params`. The
instance variable :attr:`InterpGdal.params` can also be used to save
metadata on parameters used for each run.
"""
# constant gridMET cell size in degrees
GRIDMET_RES = 0.041666666666666664
# gdal_grid interpolation methods
interp_methods = ('average',
'invdist',
'invdistnn',
'linear',
'nearest')
"""
interp_methods (tuple): gdal_grid interpolation algorithms.
"""
# default layers calculated by calc bias ratios module
default_layers = ('Jan_mean',
'Feb_mean',
'Mar_mean',
'Apr_mean',
'May_mean',
'Jun_mean',
'Jul_mean',
'Aug_mean',
'Sep_mean',
'Oct_mean',
'Nov_mean',
'Dec_mean',
'growseason_mean',
'annual_mean',
'summer_mean')
"""
default_layers (tuple): Layers to interpolate created by
:mod:`gridwxcomp.calc_bias_ratios`, e.g. "Jan_mean", found in
``summary_csv_path``.
"""
# interp params, method key, param dic as values
default_params = {
'invdist':{
'power': 2,
'smoothing': 0,
'radius1': 0,
'radius2': 0,
'angle': 0,
'max_points': 0,
'min_points': 0,
'nodata': -999
},
'invdistnn':{
'power': 2,
'smoothing': 0,
'radius': 10,
'max_points': 12,
'min_points': 0,
'nodata': -999
},
'average':{
'radius1': 0,
'radius2': 0,
'angle': 0,
'min_points': 0,
'nodata': -999
},
'linear':{
'radius': -1,
'nodata': -999
},
'nearest':{
'radius1': 0,
'radius2': 0,
'angle': 0,
'nodata': -999
}
}
"""
default_params (dict): Dictionary with default parameters for each
interpolation algorithm, slightly modified from GDAL defaults.
Keys are interpolation method names, keys are dictionaries with
parameter names keys and corresponding values.
"""
# maps bias ratio variables names to residual names for point shapefile
# which must be shorter (< 11 characters) to be stored as fields
var_residual_names = {
'annual_mean': 'annual_res',
'growseason_mean': 'grow_res',
'summer_mean': 'summer_res'
}
"""
var_residual_names (dict): Dictionary that maps names of bias ratios
to the name of their intepolated residual if they are too long for
storing as a field name in an ESRI shapefile (i.e. > 10 chars).
"""
def __init__(self, summary_csv_path):
if not Path(summary_csv_path).is_file():
raise FileNotFoundError(
'Summary CSV file: {} not found!'.format(
Path(summary_csv_path).absolute())
)
self.summary_csv_path = Path(summary_csv_path).absolute()
self.layers = list(self.default_layers) # mutable instance attr.
self.grid_bounds = None
self.interp_meth = 'invdist'
self.interped_rasters = [] # appended with Path objects
self.params = None # to hold last used interp. parameters as dict
def _make_pt_vrt(self, layer_name, out_dir):
"""
Make a vrt file for station point ratios in summary CSV for a
specific layer or field name. Save to out_dir. Used for gdal_grid
interpolation commands of scatter point data.
"""
if not Path(out_dir).is_dir():
os.makedirs(out_dir)
point_data = Path(self.summary_csv_path).name.replace(
'.csv', '_tmp.csv')
# make tmp point data csv for given layer, drop missing values
df = pd.read_csv(self.summary_csv_path)
df = df[['STATION_LAT', 'STATION_LON', layer_name]]
df = df[df[layer_name] != -999]
tmp_out_path = str(Path(self.summary_csv_path).parent / point_data)
df.to_csv(tmp_out_path, index=False)
# if out_dir adjust summary CSV path by prepending parent dirs
tmp = copy(Path(out_dir))
n_parent_dirs = 0
while len(Path(tmp).parents) > 0:
if tmp.name == self.summary_csv_path.parent.name:
break
tmp = Path(tmp).parent
n_parent_dirs+=1
path_to_data = str(
Path(
'..{}'.format(os.sep)*n_parent_dirs
).joinpath(point_data)
)
out_file = '{}.vrt'.format(layer_name) # keep it simple just layer name
# VRT format for reading CSV point data
root = ET.Element('OGRVRTDataSource')
OGRVRTLayer = ET.SubElement(root, 'OGRVRTLayer',
name=point_data.replace('.csv', ''))
# set all fields, SRS WGS84, point geom
ET.SubElement(OGRVRTLayer, 'SrcDataSource').text = path_to_data
ET.SubElement(OGRVRTLayer, 'LayerSRS').text = 'epsg:4326'
ET.SubElement(OGRVRTLayer, 'GeometryType').text = 'wkbPoint'
ET.SubElement(OGRVRTLayer, 'GeometryField', encoding='PointFromColumns',
x='STATION_LON', y='STATION_LAT', z=layer_name)
tree = ET.ElementTree(root)
# indent xml, save to out_dir
out_xml_str = _prettify(root)
out_path = os.path.join(out_dir, out_file)
with open(out_path, 'w') as outf:
outf.write(out_xml_str)
def _str_to_params(self, param_str):
"""
Convert parameter string for gdal interpolation arguments
into a dictionary.
Example:
>>> in_str = ":power=2:smoothing=.1:nodata=-999"
>>> _str_to_params(in_str)
{'power': '2', 'smoothing': '.1', 'nodata': '-999'}
"""
param_tmp = param_str.split(':')
param_dict = {}
for pair in param_tmp:
if pair and pair.count('=') == 1:
name, val = pair.split('=')[0], pair.split('=')[1]
param_dict[name] = val
elif not pair:
continue
else:
raise ValueError('{} is not a valid interpolation argument'\
.format(param_str))
return param_dict
[docs] def gdal_grid(self, layer='all', out_dir='', interp_meth='invdist',
params=None, bounds=None, nx_cells=None, ny_cells=None,
scale_factor=1, z_stats=True, res_plot=True, grid_res=None,
grid_id_name='GRIDMET_ID', options=None):
"""
Run gdal_grid command line tool to interpolate point ratios.
For further information on theinterpolation algorithms including
their function, parameters, and options see
`gdal_grid <https://www.gdal.org/gdal_grid.html>`_.
Keyword Arguments:
layer (str or list): default 'all'. Name of summary file column
to interpolate, e.g. 'Jan_mean', or list of names. If 'all'
use all variables in mutable instance attribute "layers".
out_dir (str): default ''. Output directory to save rasters and
zonal stats CSV, always appended to the root dir of the
input summary CSV parent path that contains point ratios.
interp_meth (str): default 'invdist'. gdal interpolation algorithm
params (dict, str, or None): default None. Parameters for
interpolation algorithm. See examples for format rules.
bounds (tuple or None): default None. Tuple of bounding coordinates
in the following order (min long, max long, min lat, max lat)
which need to be in decimal degrees. If None, get extent from
locations of climate stations in input summary CSV with 25 cell
buffer.
nx_cells (int): default None. Number of pixels in x dim. of raster,
if None calculated from ``bounds``.
ny_cells (int): default None. Number of pixels in y dim. of raster,
if None calculated from ``bounds``.
scale_factor (float, int): default 1. Scaling factor to apply to
original gridMET fishnet to create resampling resolution. If
scale_factor = 0.1, the resolution will be one tenth gridMET
resolution or 400 m.
z_stats (bool): default True. Calculate zonal means of
interpolated surface to gridMET cells in fishnet and save to a
CSV file. The CSV file will be saved to the same directory as
the interpolated raster file(s).
res_plot (bool): default True. Make bar plot for residual (error)
between interpolated and station value for ``layer``.
options (str or None): default None. Extra command line options for
gdal_grid spatial interpolation.
Returns:
None
Examples:
The default interpolation algorithm 'invdist' or inverse distance
weighting to a power to interpolate bias ratios in a summary CSV
file that was first created by :mod:`gridwxcomp.calc_bias_ratios`.
The default option will interpolate all layers in :obj:`InterpGdal.default_layers`
and calculate zonal statistics for all layers. It will also assume
the boundaries of the rasters are defined by the centroid locations
of the *outer* station locations in the input summary CSV plus a 25
gridMET cell buffer, the pixel size will be 400m (scale_factor=0.1). We also limit the interpolation to two layers, growing season and
annual mean bias ratios,
>>> from gridwxcomp import InterpGdal
>>> summary_file = 'PATH/TO/[var]_summary_comp.csv'
>>> out_dir = 'default_params'
>>> layers = ['growseason_mean', 'annual_mean']
>>> # create a InterpGdal instance
>>> test = InterpGdal(summary_file)
>>> # run inverse distance interpolation
>>> test.gdal_grid(out_dir=out_dir, layer=layers)
Note, zonal statistics to gridMET cells and interpolated residual
plots are computed by default. A gridMET fishnet must have been
previously created using :func:`gridwxcomp.spatial.make_grid`.
After running the code above the following files will be created
in the 'default_params' directory which will be build in the same
location as the input summary CSV::
default_params/
├── annual_mean.tiff
├── annual_mean.vrt
├── etr_mm_summary_comp_all_yrs.csv
├── etr_mm_summary_pts.cpg
├── etr_mm_summary_pts.dbf
├── etr_mm_summary_pts.prj
├── etr_mm_summary_pts.shp
├── etr_mm_summary_pts.shx
├── zonal_stats.csv
├── growseason_mean.tiff
├── growseason_mean.vrt
└── residual_plots/
├── annual_res.html
└── grow_res.html
GeoTiff interpolated raster files are now created for select layers
as well as VRT (virtual raster) meta files that store info
on each raster's data source. The file "gridMET_stats.csv" contains
gridMET ID as an index and each layer zonal mean as columns. For
example,
========== ================== ==================
GRIDMET_ID growseason_mean annual_mean
========== ================== ==================
511747 0.9650671287940088 0.9078723876243023
510361 0.9658465063428492 0.9097255715561022
508975 0.9667075970344162 0.9117676407214926
========== ================== ==================
There are several :class:`InterpGdal` instance attributes that
may be useful, for example to see the parameters that were used
for the last call to :meth:`InterpGdal.gdal_grid`
>>> test.params
{'power': 2,
'smoothing': 0,
'radius1': 0,
'radius2': 0,
'angle': 0,
'max_points': 0,
'min_points': 0,
'nodata': -999}
Or to find the paths to the interpolated raster files that
have been created by the instance (all), the "interped_rasters"
instance attribute is a list of all :obj:`pathlib.Path` objects
of absolute paths of raster files. To get them as strings,
>>> list(map(str, test.interped_rasters))
['PATH/TO/growseason_mean.tiff',
'PATH/TO/annual_mean.tiff']
Similary, the raster extent that was used and will be used again for
any subsequent calls of :meth:`InterpGdal.gdal_grid` can be
retrieved by
>>> test.grid_bounds
(-111.74583329966664,
-108.74583330033335,
38.21250000003333,
40.462499999966674)
Note:
The ``nx_cells`` and ``ny_cells`` arguments are an option for
defining the output raster's resolution. If these arguments are
passed then the ``scale_factor`` argument has no effect. The latter
assumes raster resolution is relative to gridMET (4 km).
Raises:
KeyError: if interp_meth is not a valid gdal_grid interpolation
algorithm name.
"""
cwd = os.getcwd()
# user provided uniform grid, cell size should be in dec. degrees
if grid_res is not None:
CS = grid_res
else: # assume gridMET
CS = self.GRIDMET_RES
self.grid_res = CS
# grid resolution in decimal degrees
res = scale_factor * CS
# file structure to match gridwxcomp.spatial.interpolate
grid_var = str(self.summary_csv_path).split('_summary')[0]
# out_dir as subdir from dir with summary CSV
out_dir = (Path(self.summary_csv_path).parent/Path(out_dir)).resolve()
if not out_dir.is_dir():
print('{}\nDoes not exist, creating directory'.format(str(out_dir)))
out_dir.mkdir(parents=True, exist_ok=True)
source_file = Path(self.summary_csv_path).name
source = source_file.replace('.csv', '_tmp')
if interp_meth not in InterpGdal.interp_methods:
err_msg = '{} not a valid interpolation method'.format(interp_meth)
raise KeyError(err_msg)
self.interp_meth = interp_meth
# look up default parameters for interpolation method
if not params:
params = InterpGdal.default_params.get(self.interp_meth)
# if run from command line, params is string to be parsed
if isinstance(params, str):
params = self._str_to_params(params)
# avoid zero NA values for zonal_stats, set default NA rep to -999
if not params.get('nodata'):
params['nodata'] = -999
# update instance parameters for later reference
self.params = params
# parameters to command line input str :name=value:name=value ...
param_str = ':'+':'.join('{}={}'.format(key,val)
for (key,val) in params.items())
# get boundary info and update instance attribute
if not bounds and not self.grid_bounds:
# use gridwxcomp.spatial function, assume 25 cell buffer
self.grid_bounds = get_subgrid_bounds(self.summary_csv_path,
buffer=25, grid_res=CS)
elif bounds:
self.grid_bounds = bounds
# raster extent
xmin,xmax,ymin,ymax = self.grid_bounds
# if not given get pixels in lon lat
# add one cell to avoid unfilled extent in case of large upscaling
if not nx_cells:
nx_cells = int(np.abs(xmin - xmax) / \
(CS * scale_factor)) + 1
if not ny_cells:
ny_cells = int(np.abs(ymin - ymax) / \
(CS * scale_factor)) + 1
# fix any minor adjustments to make raster fit gridMET fishnet extent
# if scale_factor=1 make sure raster pixels align exactly w/gridcells
xmax = xmin + nx_cells * CS * scale_factor
ymin = ymax - ny_cells * CS * scale_factor
# to parse options, like --config GDAL_NUM_THREADS update here
if not options:
options = ''
def _run_gdal_grid(layer):
"""reuse if running multiple layers"""
existing_layers = pd.read_csv(self.summary_csv_path).columns
if not layer in existing_layers:
print('\nError: {} does not exist in input CSV:\n {}'.format(
layer, self.summary_csv_path),
'\nSkipping interpolation.'
)
return
# move to out_dir to run gdal command
os.chdir(out_dir)
# build vrt files in out_dir
self._make_pt_vrt(layer, out_dir)
vrt_file = '{}.vrt'.format(layer)
tiff_file = '{}.tiff'.format(layer)
# add raster path to instance if not already there (overwritten)
out_file = out_dir.joinpath(tiff_file)
# print message to console/logging about interpolation
grid_var = Path(self.summary_csv_path).name.split('_summ')[0]
_interp_msg(grid_var, layer, self.interp_meth, res, out_file)
# build command line arguments
cmd = (r'gdal_grid -a {meth}{p} -txe {xmin} {xmax} -tye {ymax}'
' {ymin} -outsize {nx} {ny} -of GTiff -ot Float32 -l {source}'
' {vrt} {out} {options}'.format(meth=interp_meth, p=param_str,
xmin=xmin, xmax=xmax, ymin=ymin, ymax=ymax, nx=nx_cells,
ny=ny_cells, source=source, vrt=vrt_file, out=tiff_file,
options=options))
# run gdal_grid with arguments, x-platform
p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE,
stderr=subprocess.PIPE)
out, err = p.communicate()
if err:
print(err)
else:
if not out_file in self.interped_rasters:
self.interped_rasters.append(out_file)
p.stdout.close()
p.stderr.close()
os.chdir(cwd)
# calculate interpolated values and error at stations
# only calc residuals for mean bias ratios, i.e. not std dev, etc.
if layer in InterpGdal.default_layers:
calc_pt_error(self.summary_csv_path, out_dir, layer, grid_var,
grid_id_name=grid_id_name
)
else:
# delete tmp summary csv used in interpgdal _make_pt_vrt method
# normally deleted in calc_pt_error
tmp_csv = str(self.summary_csv_path).replace('.csv','_tmp.csv')
if Path(tmp_csv).resolve().is_file():
Path(tmp_csv).resolve().unlink()
# zonal means extracted to GRIDMET_ID (cell index)
if z_stats:
zonal_stats(self.summary_csv_path, out_file,
grid_id_name=grid_id_name
)
# residual (error) bar plot, only for mean bias ratios
if res_plot and layer in InterpGdal.default_layers:
layer = self.var_residual_names.get(
layer,
layer.replace('mean','res')
)
y_label = 'residual (interpolated minus station value)'
title = 'layer: {} algorithm: {} (gdal_grid) res: {} deg.'.\
format(layer, self.interp_meth, res)
res_plot_dir = Path(out_dir)/'residual_plots'
subtitle = 'parameters: {}'.format(params)
source_file = Path(out_dir)/Path(self.summary_csv_path).name
station_bar_plot(source_file, layer, out_dir=res_plot_dir,
y_label=y_label, title=title, subtitle=subtitle)
# run interpolation and zonal statistics depending on layer kwarg
if layer == 'all': # potential for multiprocessing
for l in self.layers:
_run_gdal_grid(l)
# run single field
elif isinstance(layer, str):
_run_gdal_grid(layer)
# run select list or tuple of layers
elif isinstance(layer, (list, tuple)):
for l in layer:
_run_gdal_grid(l)
def _prettify(elem):
"""
Return an indented, multiline XML string for a XML element tree.
"""
rough_string = ET.tostring(elem, 'utf-8', method='xml')
reparsed = minidom.parseString(rough_string)
return reparsed.toprettyxml(indent=' ')
def _interp_msg(grid_var, layer, function, res, out_file):
print(
'\nInterpolating {g} point bias ratios for: {t}\n'.\
format(g=grid_var, t=layer),
'Using the "{}" method\n'.format(function),
'Resolution (pixel size) of output raster: {} degrees'.format(res)
)
print(
'GeoTIFF raster will be saved to: \n',
os.path.abspath(out_file)
)