in moonlight/evaluation/musicxml.py [0:0]
def musicxml_similarity(a, b):
"""Determines the similarity of two scores represented as musicXML strings.
We currently assume the following:
- The durations of each representation represent the same tempo.
- i.e. the unit of measure tempo divisions is the same for both pieces.
- Scores have an equal number of measures. They are dissimilar otherwise.
- Corresponding parts have an equal number of staves.
This currently accounts for:
- Intra-measure note-to-note pitch and tempo distance, as an edit distance.
- This can easily be generalized to weight differences based on distance,
such as penalizing larger gaps in pitch or tempo.
Args:
a: a musicXML string
b: a musicXML string
Returns:
A pd.DataFrame with scores on similarity between two scores. 1.0 means
exactly the same, 0.0 means not similar at all.
Example:
overall_score
staff measure
0 0 0.750
1 1.000
total 0.875
"""
if isinstance(a, six.text_type):
a = a.encode('utf-8')
if isinstance(a, bytes):
a = etree.fromstring(a)
if isinstance(b, six.text_type):
b = b.encode('utf-8')
if isinstance(b, bytes):
b = etree.fromstring(b)
a = PartStaves(a)
b = PartStaves(b)
# TODO(larryruili): Implement dissimilar measure count edit distance.
if a.all_measure_counts() != b.all_measure_counts():
return _not_similar()
measure_similarities = []
index = []
for part_staff in moves.xrange(a.num_partstaves()):
for measure_num in moves.xrange(a.num_measures(part_staff)):
measure_similarities.append(
measure_similarity(
a.get_measure(part_staff, measure_num),
b.get_measure(part_staff, measure_num)))
index.append((part_staff, measure_num))
df = pd.DataFrame(
measure_similarities,
columns=[OVERALL_SCORE],
index=pd.MultiIndex.from_tuples(index, names=['staff', 'measure']))
return df.append(
pd.DataFrame([df[OVERALL_SCORE].mean()],
columns=[OVERALL_SCORE],
index=pd.MultiIndex.from_tuples([('total', '')],
names=['staff', 'measure'])))