research/active_learning/active_learning_methods/mixture_of_samplers.py [28:110]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
get_base_AL_mapping()


class MixtureOfSamplers(SamplingMethod):
  """Samples according to mixture of base sampling methods.

  If duplicate points are selected by the mixed strategies when forming the batch
  then the remaining slots are divided according to mixture weights and
  another partial batch is requested until the batch is full.
  """
  def __init__(self,
               X,
               y,
               seed,
               mixture={'methods': ('margin', 'uniform'),
                        'weight': (0.5, 0.5)},
               samplers=None):
    self.X = X
    self.y = y
    self.name = 'mixture_of_samplers'
    self.sampling_methods = mixture['methods']
    self.sampling_weights = dict(zip(mixture['methods'], mixture['weights']))
    self.seed = seed
    # A list of initialized samplers is allowed as an input because
    # for AL_methods that search over different mixtures, may want mixtures to
    # have shared AL_methods so that initialization is only performed once for
    # computation intensive methods like HierarchicalClusteringAL and
    # states are shared between mixtures.
    # If initialized samplers are not provided, initialize them ourselves.
    if samplers is None:
      self.samplers = {}
      self.initialize(self.sampling_methods)
    else:
      self.samplers = samplers
    self.history = []

  def initialize(self, samplers):
    self.samplers = {}
    for s in samplers:
      self.samplers[s] = AL_MAPPING[s](self.X, self.y, self.seed)

  def select_batch_(self, already_selected, N, **kwargs):
    """Returns batch of datapoints selected according to mixture weights.

    Args:
      already_included: index of datapoints already selected
      N: batch size

    Returns:
      indices of points selected to add using margin active learner
    """
    kwargs['already_selected'] = copy.copy(already_selected)
    inds = set()
    self.selected_by_sampler = {}
    for s in self.sampling_methods:
      self.selected_by_sampler[s] = []
    effective_N = 0
    while len(inds) < N:
      effective_N += N - len(inds)
      for s in self.sampling_methods:
        if len(inds) < N:
          batch_size = min(max(int(self.sampling_weights[s] * effective_N), 1), N)
          sampler = self.samplers[s]
          kwargs['N'] = batch_size
          s_inds = sampler.select_batch(**kwargs)
          for ind in s_inds:
            if ind not in self.selected_by_sampler[s]:
              self.selected_by_sampler[s].append(ind)
          s_inds = [d for d in s_inds if d not in inds]
          s_inds = s_inds[0 : min(len(s_inds), N-len(inds))]
          inds.update(s_inds)
    self.history.append(copy.deepcopy(self.selected_by_sampler))
    return list(inds)

  def to_dict(self):
    output = {}
    output['history'] = self.history
    output['samplers'] = self.sampling_methods
    output['mixture_weights'] = self.sampling_weights
    for s in self.samplers:
      s_output = self.samplers[s].to_dict()
      output[s] = s_output
    return output
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


research/active_learning/sampling_methods/mixture_of_samplers.py [28:110]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
get_base_AL_mapping()


class MixtureOfSamplers(SamplingMethod):
  """Samples according to mixture of base sampling methods.

  If duplicate points are selected by the mixed strategies when forming the batch
  then the remaining slots are divided according to mixture weights and
  another partial batch is requested until the batch is full.
  """
  def __init__(self,
               X,
               y,
               seed,
               mixture={'methods': ('margin', 'uniform'),
                        'weight': (0.5, 0.5)},
               samplers=None):
    self.X = X
    self.y = y
    self.name = 'mixture_of_samplers'
    self.sampling_methods = mixture['methods']
    self.sampling_weights = dict(zip(mixture['methods'], mixture['weights']))
    self.seed = seed
    # A list of initialized samplers is allowed as an input because
    # for AL_methods that search over different mixtures, may want mixtures to
    # have shared AL_methods so that initialization is only performed once for
    # computation intensive methods like HierarchicalClusteringAL and
    # states are shared between mixtures.
    # If initialized samplers are not provided, initialize them ourselves.
    if samplers is None:
      self.samplers = {}
      self.initialize(self.sampling_methods)
    else:
      self.samplers = samplers
    self.history = []

  def initialize(self, samplers):
    self.samplers = {}
    for s in samplers:
      self.samplers[s] = AL_MAPPING[s](self.X, self.y, self.seed)

  def select_batch_(self, already_selected, N, **kwargs):
    """Returns batch of datapoints selected according to mixture weights.

    Args:
      already_included: index of datapoints already selected
      N: batch size

    Returns:
      indices of points selected to add using margin active learner
    """
    kwargs['already_selected'] = copy.copy(already_selected)
    inds = set()
    self.selected_by_sampler = {}
    for s in self.sampling_methods:
      self.selected_by_sampler[s] = []
    effective_N = 0
    while len(inds) < N:
      effective_N += N - len(inds)
      for s in self.sampling_methods:
        if len(inds) < N:
          batch_size = min(max(int(self.sampling_weights[s] * effective_N), 1), N)
          sampler = self.samplers[s]
          kwargs['N'] = batch_size
          s_inds = sampler.select_batch(**kwargs)
          for ind in s_inds:
            if ind not in self.selected_by_sampler[s]:
              self.selected_by_sampler[s].append(ind)
          s_inds = [d for d in s_inds if d not in inds]
          s_inds = s_inds[0 : min(len(s_inds), N-len(inds))]
          inds.update(s_inds)
    self.history.append(copy.deepcopy(self.selected_by_sampler))
    return list(inds)

  def to_dict(self):
    output = {}
    output['history'] = self.history
    output['samplers'] = self.sampling_methods
    output['mixture_weights'] = self.sampling_weights
    for s in self.samplers:
      s_output = self.samplers[s].to_dict()
      output[s] = s_output
    return output
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -