def _elem_sequence_match_tag_path_pattern()

in python/de-identifier/research_pacs/de_identifier/dicom_tag_path_pattern.py [0:0]


def _elem_sequence_match_tag_path_pattern(elem_sequence, tag_path_pattern):
  """
  Return `True` if the list of data elements `elem_sequence` matches the tag path pattern. 
  `elem_sequence` is composed of each tag from the top level data element to the data element 
  itself. For example the tag Sequence1.Sequence2.Tag will translate to a list
  `[elem for Sequence1, elem for Sequence2, elem for Tag]`.
  
  Args:
    elem_sequence (list): List of nested data elements
    tag_path_pattern (str)
    
  """
  tag_patterns, prefix = _split_tag_path_pattern(tag_path_pattern)

  # If prefix is '', we search for data elements from the top level only. That is why the length 
  # of `elem_sequence` must be equals to the length of `tag_patterns`.
  if prefix == '' and len(elem_sequence) != len(tag_patterns):
    return False
  
  # If prefix is '+', we search for data elements except in the top level. That is why the length 
  # of `elem_sequence` must be greater than the length of `tag_patterns`.
  if prefix == '+/' and not len(elem_sequence) > len(tag_patterns):
    return False
    
  # If prefix is '*/', we search for data elements except from the top level. That is why the length  
  # of `elem_sequence` must be greater or equal than the length of `tag_patterns`.
  if prefix == '*/' and not len(elem_sequence) >= len(tag_patterns):
    return False
  
  # We start from the end and check if each data element matches the associated tag pattern. For 
  # examples:
  # - The tag is Sequence1.Sequence2.Sequence3.00100010
  # - The tag path pattern is */Sequence*.Seq*.0010XXXX
  # - We check if 00100000 matches 0010XXXX, and Sequence3 matches Seq* and Sequence2 
  #   matches Sequence*
  for i in range(1, len(tag_patterns)+1):
    tag_pattern = tag_patterns[-i]
    elem = elem_sequence[-i]
    if not _elem_match_tag_pattern(elem, tag_pattern):
      return False
    
  return True