def instance_mask()

in libs/solaris/vector/mask.py [0:0]


def instance_mask(df, out_file=None, reference_im=None, geom_col='geometry',
                  do_transform=None, affine_obj=None, shape=(900, 900),
                  out_type='int', burn_value=255, burn_field=None, nodata_value=0):
    """Convert a dataframe of geometries to a pixel mask.

    Arguments
    ---------
    df : :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame`
        A :class:`pandas.DataFrame` or :class:`geopandas.GeoDataFrame` instance
        with a column containing geometries (identified by `geom_col`). If the
        geometries in `df` are not in pixel coordinates, then `affine` or
        `reference_im` must be passed to provide the transformation to convert.
    out_file : str, optional
        Path to an image file to save the output to. Must be compatible with
        :class:`rasterio.DatasetReader`. If provided, a `reference_im` must be
        provided (for metadata purposes).
    reference_im : :class:`rasterio.DatasetReader` or `str`, optional
        An image to extract necessary coordinate information from: the
        affine transformation matrix, the image extent, etc. If provided,
        `affine_obj` and `shape` are ignored.
    geom_col : str, optional
        The column containing geometries in `df`. Defaults to ``"geometry"``.
    do_transform : bool, optional
        Should the values in `df` be transformed from geospatial coordinates
        to pixel coordinates? Defaults to ``None``, in which case the function
        attempts to infer whether or not a transformation is required based on
        the presence or absence of a CRS in `df`. If ``True``, either
        `reference_im` or `affine_obj` must be provided as a source for the
        the required affine transformation matrix.
    affine_obj : `list` or :class:`affine.Affine`, optional
        Affine transformation to use to convert from geo coordinates to pixel
        space. Only provide this argument if `df` is a
        :class:`geopandas.GeoDataFrame` with coordinates in a georeferenced
        coordinate space. Ignored if `reference_im` is provided.
    shape : tuple, optional
        An ``(x_size, y_size)`` tuple defining the pixel extent of the output
        mask. Ignored if `reference_im` is provided.
    out_type : 'float' or 'int'
    burn_value : `int` or `float`, optional
        The value to use for labeling objects in the mask. Defaults to 255 (the
        max value for ``uint8`` arrays). The mask array will be set to the same
        dtype as `burn_value`. Ignored if `burn_field` is provided.
    burn_field : str, optional
        Name of a column in `df` that provides values for `burn_value` for each
        independent object. If provided, `burn_value` is ignored.
    nodata_value : `int` or `float`, optional
        The value to use for nodata pixels in the mask. Defaults to 0 (the
        min value for ``uint8`` arrays). Used if reference_im nodata value is a float.
        Ignored if reference_im nodata value is an int or if reference_im is not used.
        Take care when visualizing these masks, the nodata value may cause labels to not 
        be visualized if nodata values are automatically masked by the software.

    Returns
    -------
    mask : :class:`numpy.array`
        A pixel mask with 0s for non-object pixels and `burn_value` at object
        pixels. `mask` dtype will coincide with `burn_value`.

    """
    # TODO: Refactor to remove some duplicated code here and in other mask fxns

    if out_file and not reference_im:
        raise ValueError(
            'If saving output to file, `reference_im` must be provided.')
    df = _check_df_load(df)

    if len(df) == 0: # for saving an empty mask.
        reference_im = _check_rasterio_im_load(reference_im)
        shape = reference_im.shape
        return np.zeros(shape=shape, dtype='uint8')

    if do_transform is None:
        # determine whether or not transform should be done
        do_transform = _check_do_transform(df, reference_im, affine_obj)

    df[geom_col] = df[geom_col].apply(_check_geom)  # load in geoms if wkt
    if not do_transform:
        affine_obj = Affine(1, 0, 0, 0, 1, 0)  # identity transform

    if reference_im:
        reference_im = _check_rasterio_im_load(reference_im)
        shape = reference_im.shape
        if do_transform:
            affine_obj = reference_im.transform

    # extract geometries and pair them with burn values

    if burn_field:
        if out_type == 'int':
            feature_list = list(zip(df[geom_col],
                                    df[burn_field].astype('uint8')))
        else:
            feature_list = list(zip(df[geom_col],
                                    df[burn_field].astype('float32')))
    else:
        feature_list = list(zip(df[geom_col], [burn_value]*len(df)))

    if out_type == 'int':
        output_arr = np.empty(shape=(shape[0], shape[1],
                                     len(feature_list)), dtype='uint8')
    else:
        output_arr = np.empty(shape=(shape[0], shape[1],
                                     len(feature_list)), dtype='float32')
    # initialize the output array

    for idx, feat in enumerate(feature_list):
        output_arr[:, :, idx] = features.rasterize([feat], out_shape=shape,
                                                   transform=affine_obj)

    if reference_im:
        reference_im = _check_rasterio_im_load(reference_im)
    try:
        bad_data_mask = (reference_im.read() == reference_im.nodata).any(axis=0) # take logical and along all dims so that all pixxels not -9999 across bands
    except AttributeError as ae:  # raise another, more verbose AttributeError
        raise AttributeError("A nodata value is not defined for the source image. Make sure the reference_im has a nodata value defined.") from ae
    if len(bad_data_mask.shape) > 2:
        bad_data_mask = np.dstack([bad_data_mask]*output_arr.shape[2])
        output_arr = np.where(bad_data_mask, 0, output_arr) # mask is broadcasted to filter labels where there are non-nan image values

    if out_file:
        meta = reference_im.meta.copy()
        meta.update(count=output_arr.shape[-1])
        if out_type == 'int':
            meta.update(dtype='uint8')
            if isinstance(meta['nodata'], float):
                meta.update(nodata=nodata_value)
        with rasterio.open(out_file, 'w', **meta) as dst:
            for c in range(1, 1 + output_arr.shape[-1]):
                dst.write(output_arr[:, :, c-1], indexes=c)
            dst.close()

    return output_arr