def musicxml_similarity()

in moonlight/evaluation/musicxml.py [0:0]


def musicxml_similarity(a, b):
  """Determines the similarity of two scores represented as musicXML strings.

  We currently assume the following:
  - The durations of each representation represent the same tempo.
    - i.e. the unit of measure tempo divisions is the same for both pieces.
  - Scores have an equal number of measures. They are dissimilar otherwise.
  - Corresponding parts have an equal number of staves.

  This currently accounts for:
  - Intra-measure note-to-note pitch and tempo distance, as an edit distance.
    - This can easily be generalized to weight differences based on distance,
      such as penalizing larger gaps in pitch or tempo.

  Args:
    a: a musicXML string
    b: a musicXML string

  Returns:
    A pd.DataFrame with scores on similarity between two scores. 1.0 means
    exactly the same, 0.0 means not similar at all.
    Example:
                        overall_score
      staff  measure
      0      0          0.750
             1          1.000
      total             0.875
  """
  if isinstance(a, six.text_type):
    a = a.encode('utf-8')
  if isinstance(a, bytes):
    a = etree.fromstring(a)
  if isinstance(b, six.text_type):
    b = b.encode('utf-8')
  if isinstance(b, bytes):
    b = etree.fromstring(b)

  a = PartStaves(a)
  b = PartStaves(b)

  # TODO(larryruili): Implement dissimilar measure count edit distance.
  if a.all_measure_counts() != b.all_measure_counts():
    return _not_similar()

  measure_similarities = []
  index = []
  for part_staff in moves.xrange(a.num_partstaves()):
    for measure_num in moves.xrange(a.num_measures(part_staff)):
      measure_similarities.append(
          measure_similarity(
              a.get_measure(part_staff, measure_num),
              b.get_measure(part_staff, measure_num)))
      index.append((part_staff, measure_num))
  df = pd.DataFrame(
      measure_similarities,
      columns=[OVERALL_SCORE],
      index=pd.MultiIndex.from_tuples(index, names=['staff', 'measure']))
  return df.append(
      pd.DataFrame([df[OVERALL_SCORE].mean()],
                   columns=[OVERALL_SCORE],
                   index=pd.MultiIndex.from_tuples([('total', '')],
                                                   names=['staff', 'measure'])))