preview/git.js (166 lines of code) (raw):

/** * @license * Licensed to Elasticsearch under one or more contributor * license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright * ownership. Elasticsearch licenses this file to you under * the Apache License, Version 2.0 (the "License"); you may * not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * KIND, either express or implied. See the License for the * specific language governing permissions and limitations * under the License. */ 'use strict'; const child_process = require("child_process"); const { Transform } = require("stream"); /** * Git operations used by the preview. We'd love to use NodeGit but it doesn't * looks like it is asynchronous and/or streaming in the places where we need * it to be. */ module.exports = dir => {return { /** * Returns a promise that will contain the type of the object. Will be one of * `blob`, `commit`, `tag`, `tree`, or `missing`. The first four are defined * [here]{@link http://shafiul.github.io/gitbook/1_the_git_object_model.html}. * `missing` is for missing objects. */ objectType: object => { return new Promise((resolve, reject) => { const opts = { cwd: dir, max_buffer: 64, }; child_process.execFile( "git", ["cat-file", "-t", object], opts, toStringHandler(resolve, reject, resolve) ); }); }, /** * Returns a promise containing the contents of an object. * @param {int} sizeLimit maximum size of the buffer for the object */ catBlobToString: (object, sizeLimit) => { return new Promise((resolve, reject) => { const opts = { cwd: dir, max_buffer: sizeLimit, }; child_process.execFile( "git", ["cat-file", "blob", object], opts, toStringHandler(resolve, reject, reject) ); }); }, /** * Returns a stream containing the contents of the object. */ catBlob: object => { return streamChild(child_process.spawn( "git", ["cat-file", "blob", object], {cwd: dir} )); }, diffLastCommit: branch => { const stream = streamChild(child_process.spawn( "git", ["diff-tree", "-z", "--find-renames", "--numstat", branch, "--"], { cwd: dir, /* * We use the magic 'buffer' encoding so we don't have to build a * a string out of the whole thing at once. We have convenient nuls * in the parsing process that we can use to "chunk" this. */ encoding: 'buffer', } )); return parseDiffTreeZ(stream[Symbol.asyncIterator]()); }, /** * Turn a spawned child process into a stream containing its stdout and * emitting an error if it fails. Exported for testing only. */ _streamChild: streamChild, /** * Parse the output of `git diff-tree -z --find-renames --numstat` as an * async generator. Exported for testing only. */ _parseDiffTreeZ: parseDiffTreeZ, }}; const streamChild = (child) => { // Error should be fairly short so we can safely spool them into a variable. let stderrBuffer = ''; child.stderr.setEncoding('utf8'); child.stderr.addListener('data', chunk => { stderrBuffer += chunk; }); let closed = false; let flushCallback; let childCloseState; const flushIfReady = () => { if (!flushCallback || !childCloseState) { // Not ready. return; } /* * We can get this call multiple times for some reason. Lets just ignore * the second one..... */ if (closed) { return; } closed = true; /* * Since we've closed stdout we can be sure that our transform stream has * received its `flush` callback. So we delegate to that now to close * the transform stream with the results of the subprocess. */ if (childCloseState.code) { /* * Normalize some "not found" style errors from git so the caller can * 404 on them. */ let missing = stderrBuffer.includes("Not a valid object name"); missing |= stderrBuffer.includes("fatal: bad revision"); if (missing) { flushCallback("missing"); } else { flushCallback(failureMessage(`Child failed with code ${childCloseState.code}`, stderrBuffer)); } } else if (childCloseState.signal) { flushCallback(failureMessage(`Child died with signal ${childCloseState.signal}`, stderrBuffer)); } else { flushCallback(); } }; const out = child.stdout.pipe(new Transform({ transform(chunk, _encoding, callback) { callback(null, chunk); }, flush(callback) { // Wait to emit the end until the process closes. flushCallback = callback; flushIfReady(); } })); child.addListener('close', (code, signal) => { childCloseState = {code: code, signal: signal}; flushIfReady(); }); return out; } const failureMessage = (firstPart, stderr) => { if (stderr) { return `${firstPart} and stderr:\n${stderr}`; } return firstPart; } const parseDiffTreeZ = async function* (itr) { const loadFirstChunk = await itr.next(); if (loadFirstChunk.done) { // Empty diff! return; } let chunk = loadFirstChunk.value; const sliceOffNul = async from => { while (true) { const nextNul = chunk.indexOf("\0", from); if (nextNul === -1) { const load = await itr.next(); if (load.done) { if (chunk.length === 0) { return null; } // The iterator is done here so we don't call itr.throw. throw new Error(`Trailing garbage after diff: ${chunk}`); } else { /* * Concat *is* a copying operations which is important because this * is the operation that releases the memory from the last chunk. */ chunk = Buffer.concat([chunk, load.value]); } } else { const result = chunk.toString('utf8', from, nextNul); /* * Slice off the chunk working part that we're returning. Buffer * slicing in nodejs is a non-copying operation so this is quick. */ chunk = chunk.slice(nextNul + 1); return result; } } }; /* * Parses output from `git diff-tree -z` which is in * one of two formats: * * added lines<tab>removed lines<tab>path<nul> * * added lines<tab>removed lines<nul>source path<nul>destination path<nul> * The second one is only used when git detects a rename. */ while (true) { let work = await sliceOffNul(); if (work === null) { // Done! return; } if (work[work.length - 1] === '\t') { work = work.slice(work, -1); } const parts = work.split('\t'); if (parts.length === 3) { const [added, removed, path] = parts; yield { path: path, added: added, removed: removed, }; } else if (parts.length === 2) { const [added, removed] = parts; const path = await sliceOffNul(); const movedToPath = await sliceOffNul(); yield { path: path, movedToPath: movedToPath, added: added, removed: removed, }; } else if (parts.length === 1) { // The commit hash. Ignore it. } else { // Prematurely end the iterator because we've encountered a parsing error. itr.throw(new Error(`Strange entry fom git: ${work}`)); } } } const toStringHandler = (resolve, reject, onMissing) => (err, stdout) => { if (err) { if (err.message.includes("Not a valid object name")) { onMissing("missing"); } else { reject(err); } } else { resolve(stdout.trim()); } };