analysis/webservice/algorithms/doms/histogramplot.py (80 lines of code) (raw):

# Licensed to the Apache Software Foundation (ASF) under one or more # contributor license agreements. See the NOTICE file distributed with # this work for additional information regarding copyright ownership. # The ASF licenses this file to You under the Apache License, Version 2.0 # (the "License"); you may not use this file except in compliance with # the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import io from multiprocessing import Process, Manager import matplotlib import matplotlib.mlab as mlab import matplotlib.pyplot as plt import numpy as np from . import BaseDomsHandler from . import ResultsStorage if not matplotlib.get_backend(): matplotlib.use('Agg') PARAMETER_TO_FIELD = { "sst": "sea_water_temperature", "sss": "sea_water_salinity" } PARAMETER_TO_UNITS = { "sst": "($^\circ$C)", "sss": "(g/L)" } class DomsHistogramPlotQueryResults(BaseDomsHandler.DomsQueryResults): def __init__(self, x, parameter, primary, secondary, args=None, bounds=None, count=None, details=None, computeOptions=None, executionId=None, plot=None): BaseDomsHandler.DomsQueryResults.__init__(self, results=x, args=args, details=details, bounds=bounds, count=count, computeOptions=computeOptions, executionId=executionId) self.__primary = primary self.__secondary = secondary self.__x = x self.__parameter = parameter self.__plot = plot def toImage(self): return self.__plot def render(d, x, primary, secondary, parameter, norm_and_curve=False): fig, ax = plt.subplots() fig.suptitle(f'{primary} vs. {secondary}', fontsize=14, fontweight='bold') n, bins, patches = plt.hist(x, 50, facecolor='green', alpha=0.75) if norm_and_curve: mean = np.mean(x) variance = np.var(x) sigma = np.sqrt(variance) y = mlab.normpdf(bins, mean, sigma) l = plt.plot(bins, y, 'r--', linewidth=1) ax.set_title('n = %d' % len(x)) units = PARAMETER_TO_UNITS[parameter] if parameter in PARAMETER_TO_UNITS else PARAMETER_TO_UNITS["sst"] ax.set_xlabel("%s - %s %s" % (primary, secondary, units)) if norm_and_curve: ax.set_ylabel("Probability per unit difference") else: ax.set_ylabel("Frequency") plt.grid(True) buf = io.BytesIO() plt.savefig(buf, format='png') d['plot'] = buf.getvalue() def renderAsync(x, primary, secondary, parameter, norm_and_curve): manager = Manager() d = manager.dict() p = Process(target=render, args=(d, x, primary, secondary, parameter, norm_and_curve)) p.start() p.join() return d['plot'] def createHistogramPlot(id, parameter, norm_and_curve=False, config=None): with ResultsStorage.ResultsRetrieval(config) as storage: params, stats, data = storage.retrieveResults(id) primary = params["primary"] secondary = params["matchup"][0] x = createHistTable(data, secondary, parameter) plot = renderAsync(x, primary, secondary, parameter, norm_and_curve) r = DomsHistogramPlotQueryResults(x=x, parameter=parameter, primary=primary, secondary=secondary, args=params, details=stats, bounds=None, count=None, computeOptions=None, executionId=id, plot=plot) return r def createHistTable(results, secondary, parameter): x = [] field = PARAMETER_TO_FIELD[parameter] if parameter in PARAMETER_TO_FIELD else PARAMETER_TO_FIELD["sst"] for entry in results: for match in entry["matches"]: if match["source"] == secondary: if field in entry and field in match: a = entry[field] b = match[field] x.append((a - b)) return x