in libs/solaris/vector/mask.py [0:0]
def instance_mask(df, out_file=None, reference_im=None, geom_col='geometry',
do_transform=None, affine_obj=None, shape=(900, 900),
out_type='int', burn_value=255, burn_field=None, nodata_value=0):
"""Convert a dataframe of geometries to a pixel mask.
Arguments
---------
df : :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame`
A :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame` instance
with a column containing geometries (identified by `geom_col`). If the
geometries in `df` are not in pixel coordinates, then `affine` or
`reference_im` must be passed to provide the transformation to convert.
out_file : str, optional
Path to an image file to save the output to. Must be compatible with
:class:`rasterio.DatasetReader`. If provided, a `reference_im` must be
provided (for metadata purposes).
reference_im : :class:`rasterio.DatasetReader` or `str`, optional
An image to extract necessary coordinate information from: the
affine transformation matrix, the image extent, etc. If provided,
`affine_obj` and `shape` are ignored.
geom_col : str, optional
The column containing geometries in `df`. Defaults to ``"geometry"``.
do_transform : bool, optional
Should the values in `df` be transformed from geospatial coordinates
to pixel coordinates? Defaults to ``None``, in which case the function
attempts to infer whether or not a transformation is required based on
the presence or absence of a CRS in `df`. If ``True``, either
`reference_im` or `affine_obj` must be provided as a source for the
the required affine transformation matrix.
affine_obj : `list` or :class:`affine.Affine`, optional
Affine transformation to use to convert from geo coordinates to pixel
space. Only provide this argument if `df` is a
:class:`geopandas.GeoDataFrame` with coordinates in a georeferenced
coordinate space. Ignored if `reference_im` is provided.
shape : tuple, optional
An ``(x_size, y_size)`` tuple defining the pixel extent of the output
mask. Ignored if `reference_im` is provided.
out_type : 'float' or 'int'
burn_value : `int` or `float`, optional
The value to use for labeling objects in the mask. Defaults to 255 (the
max value for ``uint8`` arrays). The mask array will be set to the same
dtype as `burn_value`. Ignored if `burn_field` is provided.
burn_field : str, optional
Name of a column in `df` that provides values for `burn_value` for each
independent object. If provided, `burn_value` is ignored.
nodata_value : `int` or `float`, optional
The value to use for nodata pixels in the mask. Defaults to 0 (the
min value for ``uint8`` arrays). Used if reference_im nodata value is a float.
Ignored if reference_im nodata value is an int or if reference_im is not used.
Take care when visualizing these masks, the nodata value may cause labels to not
be visualized if nodata values are automatically masked by the software.
Returns
-------
mask : :class:`numpy.array`
A pixel mask with 0s for non-object pixels and `burn_value` at object
pixels. `mask` dtype will coincide with `burn_value`.
"""
# TODO: Refactor to remove some duplicated code here and in other mask fxns
if out_file and not reference_im:
raise ValueError(
'If saving output to file, `reference_im` must be provided.')
df = _check_df_load(df)
if len(df) == 0: # for saving an empty mask.
reference_im = _check_rasterio_im_load(reference_im)
shape = reference_im.shape
return np.zeros(shape=shape, dtype='uint8')
if do_transform is None:
# determine whether or not transform should be done
do_transform = _check_do_transform(df, reference_im, affine_obj)
df[geom_col] = df[geom_col].apply(_check_geom) # load in geoms if wkt
if not do_transform:
affine_obj = Affine(1, 0, 0, 0, 1, 0) # identity transform
if reference_im:
reference_im = _check_rasterio_im_load(reference_im)
shape = reference_im.shape
if do_transform:
affine_obj = reference_im.transform
# extract geometries and pair them with burn values
if burn_field:
if out_type == 'int':
feature_list = list(zip(df[geom_col],
df[burn_field].astype('uint8')))
else:
feature_list = list(zip(df[geom_col],
df[burn_field].astype('float32')))
else:
feature_list = list(zip(df[geom_col], [burn_value]*len(df)))
if out_type == 'int':
output_arr = np.empty(shape=(shape[0], shape[1],
len(feature_list)), dtype='uint8')
else:
output_arr = np.empty(shape=(shape[0], shape[1],
len(feature_list)), dtype='float32')
# initialize the output array
for idx, feat in enumerate(feature_list):
output_arr[:, :, idx] = features.rasterize([feat], out_shape=shape,
transform=affine_obj)
if reference_im:
reference_im = _check_rasterio_im_load(reference_im)
try:
bad_data_mask = (reference_im.read() == reference_im.nodata).any(axis=0) # take logical and along all dims so that all pixxels not -9999 across bands
except AttributeError as ae: # raise another, more verbose AttributeError
raise AttributeError("A nodata value is not defined for the source image. Make sure the reference_im has a nodata value defined.") from ae
if len(bad_data_mask.shape) > 2:
bad_data_mask = np.dstack([bad_data_mask]*output_arr.shape[2])
output_arr = np.where(bad_data_mask, 0, output_arr) # mask is broadcasted to filter labels where there are non-nan image values
if out_file:
meta = reference_im.meta.copy()
meta.update(count=output_arr.shape[-1])
if out_type == 'int':
meta.update(dtype='uint8')
if isinstance(meta['nodata'], float):
meta.update(nodata=nodata_value)
with rasterio.open(out_file, 'w', **meta) as dst:
for c in range(1, 1 + output_arr.shape[-1]):
dst.write(output_arr[:, :, c-1], indexes=c)
dst.close()
return output_arr