python-package/lets_plot/plot/sampling.py (27 lines of code) (raw):

# # Copyright (c) 2019. JetBrains s.r.o. # Use of this source code is governed by the MIT license that can be found in the LICENSE file. # from .core import FeatureSpec __all__ = ['sampling_random', 'sampling_random_stratified', 'sampling_pick', 'sampling_systematic', 'sampling_group_random', 'sampling_group_systematic', 'sampling_vertex_vw', 'sampling_vertex_dp'] def sampling_random(n, seed=None): """ Return a subset of randomly selected items. Parameters ---------- n : int Number of items to return. seed : int Number used to initialize a pseudo random number generator. Returns ------- ``FeatureSpec`` Random sample specification. Examples -------- .. jupyter-execute:: :linenos: :emphasize-lines: 10 import numpy as np from lets_plot import * LetsPlot.setup_html() np.random.seed(27) mean = np.zeros(2) cov = [[.9, -.6], [-.6, .9]] x, y = np.random.multivariate_normal(mean, cov, 10000).T ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\ geom_point(sampling=sampling_random(1000, 35)) """ return _sampling('random', n=n, seed=seed) def sampling_random_stratified(n, seed=None, min_subsample=None): """ Randomly sample from each stratum (subgroup). Parameters ---------- n : int Number of items to return. seed : int Number used to initialize a pseudo random number generator. min_subsample : int Minimal number of items in sub sample. Returns ------- ``FeatureSpec`` Stratified random sample specification. Examples -------- .. jupyter-execute:: :linenos: :emphasize-lines: 10 import numpy as np from lets_plot import * LetsPlot.setup_html() np.random.seed(27) n = 1000 x = np.random.normal(0, 1, n) y = np.random.normal(0, 1, n) cond = np.random.choice(['a', 'b'], n, p=[.9, .1]) ggplot({'x': x, 'y': y, 'cond': cond}, aes('x', 'y', color='cond')) + \\ geom_point(sampling=sampling_random_stratified(50, 35, min_subsample=10)) """ return _sampling('random_stratified', n=n, seed=seed, min_subsample=min_subsample) def sampling_pick(n): """ 'Pick' sampling. Parameters ---------- n : int Number of items to return. Returns ------- ``FeatureSpec`` Sample specification. Examples -------- .. jupyter-execute:: :linenos: :emphasize-lines: 7 import numpy as np from lets_plot import * LetsPlot.setup_html() x = np.linspace(-2, 2, 30) y = x ** 2 ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\ geom_line(sampling=sampling_pick(20)) """ return _sampling('pick', n=n) def sampling_systematic(n): """ Return a subset where items are selected at a regular interval. Parameters ---------- n : int Number of items to return. Returns ------- ``FeatureSpec`` Systematic sample specification. Examples -------- .. jupyter-execute:: :linenos: :emphasize-lines: 9 import numpy as np from lets_plot import * LetsPlot.setup_html() n = 1000 x = np.arange(n) np.random.seed(12) y = np.random.normal(0, 1, n) ggplot({'x': x, 'y': y}, aes(x='x', y='y')) + \\ geom_line(sampling=sampling_systematic(50)) """ return _sampling('systematic', n=n) def sampling_group_systematic(n): """ Return a subset where groups are selected at a regular interval. Parameters ---------- n : int Number of groups to return. Returns ------- ``FeatureSpec`` Group systematic sample specification. Examples -------- .. jupyter-execute:: :linenos: :emphasize-lines: 11 import numpy as np from lets_plot import * LetsPlot.setup_html() waves_count = 100 peak_amplitude = np.linspace(1, 2, waves_count) wave_x = np.linspace(-np.pi, np.pi, 30) x = np.tile(wave_x, waves_count) y = np.array([a * np.sin(wave_x) for a in peak_amplitude]).flatten() a = np.repeat(peak_amplitude, wave_x.size) ggplot({'x': x, 'y': y, 'a': a}, aes('x', 'y')) + \\ geom_line(aes(group='a', color='a'), sampling=sampling_group_systematic(10)) """ return _sampling('group_systematic', n=n) def sampling_group_random(n, seed=None): """ Return a subset of randomly selected groups. Parameters ---------- n : int Number of groups to return. seed : int Number used to initialize a pseudo random number generator. Returns ------- ``FeatureSpec`` Group sample specification. Examples -------- .. jupyter-execute:: :linenos: :emphasize-lines: 11 import numpy as np from lets_plot import * LetsPlot.setup_html() waves_count = 100 peak_amplitude = np.linspace(1, 2, waves_count) wave_x = np.linspace(-np.pi, np.pi, 30) x = np.tile(wave_x, waves_count) y = np.array([a * np.sin(wave_x) for a in peak_amplitude]).flatten() a = np.repeat(peak_amplitude, wave_x.size) ggplot({'x': x, 'y': y, 'a': a}, aes('x', 'y')) + \\ geom_line(aes(group='a', color='a'), sampling=sampling_group_random(10, 35)) """ return _sampling('group_random', n=n, seed=seed) def sampling_vertex_vw(n, polygon=None): """ Simplify a polyline using the Visvalingam-Whyatt algorithm. Parameters ---------- n : int Number of items to return. polygon : bool, default=None If True, the input data is considered as a polygon rings. If False, the input data is considered as a polyline. None for auto-detection. Returns ------- ``FeatureSpec`` Vertices sample specification. Notes ----- Vertex sampling is designed for polygon simplification. Examples -------- .. jupyter-execute:: :linenos: :emphasize-lines: 17 import numpy as np from scipy.stats import multivariate_normal from lets_plot import * LetsPlot.setup_html() np.random.seed(42) n = 300 x = np.linspace(-1, 1, n) y = np.linspace(-1, 1, n) X, Y = np.meshgrid(x, y) mean = np.zeros(2) cov = [[1, .5], [.5, 1]] rv = multivariate_normal(mean, cov) Z = rv.pdf(np.dstack((X, Y))) data = {'x': X.flatten(), 'y': Y.flatten(), 'z': Z.flatten()} ggplot(data, aes(x='x', y='y', z='z')) + \\ geom_contour(sampling=sampling_vertex_vw(150)) """ return _sampling('vertex_vw', n=n, polygon=polygon) def sampling_vertex_dp(n, polygon=None): """ Simplify a polyline using the Douglas-Peucker algorithm. Parameters ---------- n : int Number of items to return. polygon : bool, default=None If True, the input data is considered as a polygon rings. If False, the input data is considered as a polyline. None for auto-detection. Returns ------- ``FeatureSpec`` Vertices sample specification. Notes ----- Vertex sampling is designed for polygon simplification. Examples -------- .. jupyter-execute:: :linenos: :emphasize-lines: 17 import numpy as np from scipy.stats import multivariate_normal from lets_plot import * LetsPlot.setup_html() np.random.seed(42) n = 300 x = np.linspace(-1, 1, n) y = np.linspace(-1, 1, n) X, Y = np.meshgrid(x, y) mean = np.zeros(2) cov = [[1, .5], [.5, 1]] rv = multivariate_normal(mean, cov) Z = rv.pdf(np.dstack((X, Y))) data = {'x': X.flatten(), 'y': Y.flatten(), 'z': Z.flatten()} ggplot(data, aes(x='x', y='y', z='z')) + \\ geom_contour(sampling=sampling_vertex_dp(100)) """ return _sampling('vertex_dp', n=n, polygon=polygon) def _sampling(name, **kwargs): return FeatureSpec('sampling', name, **kwargs)