override protected def remap()

in daffodil-lib/src/main/scala/org/apache/daffodil/lib/xml/PUARemappers.scala [46:104]


  override protected def remap(prev: Char, curr: Char, next: Char): Int = {
    val res: Int = curr match {
      case 0x9 => curr
      case 0xa => curr
      case 0xd =>
        if (next == 0xa) {
          // CRLF case.
          if (replaceCRWithLF)
            -0xa // CRLF => LF, standard XML behavior. Note negated.
          else
            0xe00d // remap CR to preserve it. Leave LF alone.
        } else {
          // isolated CR case
          if (replaceCRWithLF)
            0xa // isolated CR => LF, standard XML behavior. Note NOT negated.
          else
            0xe00d // remap isolated CR to preserve it.
        }
      case _ if (curr < 0x20) => curr + 0xe000 // ascii c0 controls
      // no remapping for the so called C1 controls (0x80-0x9F) Those are not XML illegal.
      case _ if Character.isSurrogate(curr) => {
        if (
          (Character.isHighSurrogate(curr) && Character.isLowSurrogate(next)) ||
          (Character.isLowSurrogate(curr) && Character.isHighSurrogate(prev))
        ) {
          // well formed surrogate pairs are preserved
          curr
        } else {
          // curr is an isolated surrogate, so to preserve we must remap to PUA
          curr + 0x1000
        }
      }
      case _ if (curr >= 0xe000 && curr <= 0xf8ff) => { // Unicode PUA is E000 to F8FF.
        if (checkForExistingPUA)
          throw new RemapPUACharDetected(curr)
        else curr
      }
      case _ if (curr < 0xfffe) => curr
      // 0xFFFE and 0xFFFF are regular Unicode chars, but XML illegal.
      // (XML only allows up to 0xFFFD)
      // They can't remap into the PUA by the basic techniques of adding
      // 0xE000 or 0x1000 like with control chars or unpaired surrogate code points.
      // So we just pick two adhoc, but recognizable, PUA code points to use by subtracting
      // 0x0F00 from them.
      case 0xfffe =>
        0xf0fe // U+FFFE is not a legal XML char. Can't remap to PUA the regular way.
      case 0xffff => 0xf0ff // U+FFFF is not a legal XML char
      case bad =>
        // $COVERAGE-OFF$
        // This is a final class, so this only gets called with characters by the
        // base class remap(s: String) method. Those chars are only
        // taken from Scala/Java strings, hence, the char codes cannot be beyond 0xFFFF
        Assert.impossibleCase(
          "Scala/Java character code cannot be beyond 0xFFFF but was 0x%40X".format(bad)
        )
      // $COVERAGE-ON$
    }
    res
  }