public static function newBase85Data()

in src/parser/ArcanistBundle.php [827:1044]


  public static function newBase85Data($data, $eol, $mode = null) {
    // The "32bit" and "64bit" modes are used by unit tests to verify that all
    // of the encoding pathways here work identically. In these modes, we skip
    // compression because `gzcompress()` may not be stable and we just want
    // to test that the output matches some expected result.

    if ($mode === null) {
      if (!function_exists('gzcompress')) {
        throw new Exception(
          pht(
            'This patch has binary data. The PHP zlib extension is required '.
            'to apply patches with binary data to git. Install the PHP zlib '.
            'extension to continue.'));
      }

      $input = gzcompress($data);
      $is_64bit = (PHP_INT_SIZE >= 8);
    } else {
      switch ($mode) {
        case '32bit':
          $input = $data;
          $is_64bit = false;
          break;
        case '64bit':
          $input = $data;
          $is_64bit = true;
          break;
        default:
          throw new Exception(
            pht(
              'Unsupported base85 encoding mode "%s".',
              $mode));
      }
    }

    // See emit_binary_diff_body() in diff.c for git's implementation.

    // This is implemented awkwardly in order to closely mirror git's
    // implementation in base85.c

    // It is also implemented awkwardly to work correctly on 32-bit machines.
    // Broadly, this algorithm converts the binary input to printable output
    // by transforming each 4 binary bytes of input to 5 printable bytes of
    // output, one piece at a time.
    //
    // To do this, we convert the 4 bytes into a 32-bit integer, then use
    // modulus and division by 85 to pick out printable bytes (85^5 is slightly
    // larger than 2^32). In C, this algorithm is fairly easy to implement
    // because the accumulator can be made unsigned.
    //
    // In PHP, there are no unsigned integers, so values larger than 2^31 break
    // on 32-bit systems under modulus:
    //
    //   $ php -r 'print (1 << 31) % 13;' # On a 32-bit machine.
    //   -11
    //
    // However, PHP's float type is an IEEE 754 64-bit double precision float,
    // so we can safely store integers up to around 2^53 without loss of
    // precision. To work around the lack of an unsigned type, we just use a
    // double and perform the modulus with fmod().
    //
    // (Since PHP overflows integer operations into floats, we don't need much
    // additional casting.)

    // On 64 bit systems, we skip all this fanfare and just use integers. This
    // is significantly faster.

    static $map = array(
      '0',
      '1',
      '2',
      '3',
      '4',
      '5',
      '6',
      '7',
      '8',
      '9',
      'A',
      'B',
      'C',
      'D',
      'E',
      'F',
      'G',
      'H',
      'I',
      'J',
      'K',
      'L',
      'M',
      'N',
      'O',
      'P',
      'Q',
      'R',
      'S',
      'T',
      'U',
      'V',
      'W',
      'X',
      'Y',
      'Z',
      'a',
      'b',
      'c',
      'd',
      'e',
      'f',
      'g',
      'h',
      'i',
      'j',
      'k',
      'l',
      'm',
      'n',
      'o',
      'p',
      'q',
      'r',
      's',
      't',
      'u',
      'v',
      'w',
      'x',
      'y',
      'z',
      '!',
      '#',
      '$',
      '%',
      '&',
      '(',
      ')',
      '*',
      '+',
      '-',
      ';',
      '<',
      '=',
      '>',
      '?',
      '@',
      '^',
      '_',
      '`',
      '{',
      '|',
      '}',
      '~',
    );

    $len_map = array();
    for ($ii = 0; $ii <= 52; $ii++) {
      if ($ii <= 26) {
        $len_map[$ii] = chr($ii + ord('A') - 1);
      } else {
        $len_map[$ii] = chr($ii - 26 + ord('a') - 1);
      }
    }

    $buf = '';

    $lines = str_split($input, 52);
    $final = (count($lines) - 1);

    foreach ($lines as $idx => $line) {
      if ($idx === $final) {
        $len = strlen($line);
      } else {
        $len = 52;
      }

      // The first character encodes the line length.
      $buf .= $len_map[$len];

      $pos = 0;
      while ($len) {
        $accum = 0;
        for ($count = 24; $count >= 0; $count -= 8) {
          $val = ord($line[$pos++]);
          $val = $val * (1 << $count);
          $accum = $accum + $val;
          if (--$len == 0) {
            break;
          }
        }

        $slice = '';

        // If we're in 64bit mode, we can just use integers. Otherwise, we
        // need to use floating point math to avoid overflows.

        if ($is_64bit) {
          for ($count = 4; $count >= 0; $count--) {
            $val = $accum % 85;
            $accum = $accum / 85;
            $slice .= $map[$val];
          }
        } else {
          for ($count = 4; $count >= 0; $count--) {
            $val = (int)fmod($accum, 85.0);
            $accum = floor($accum / 85.0);
            $slice .= $map[$val];
          }
        }

        $buf .= strrev($slice);
      }

      $buf .= $eol;
    }

    return $buf;
  }