in pdfbox/src/main/java/org/apache/pdfbox/multipdf/PDFMergerUtility.java [510:847]
public void appendDocument(PDDocument destination, PDDocument source) throws IOException
{
PDFCloneUtility cloner = new PDFCloneUtility(destination);
if (source.getDocument().isClosed())
{
throw new IOException("Error: source PDF is closed.");
}
if (destination.getDocument().isClosed())
{
throw new IOException("Error: destination PDF is closed.");
}
PDDocumentCatalog srcCatalog = source.getDocumentCatalog();
if (isDynamicXfa(srcCatalog.getAcroForm()))
{
throw new IOException("Error: can't merge source document containing dynamic XFA form content.");
}
PDDocumentInformation destInfo = destination.getDocumentInformation();
PDDocumentInformation srcInfo = source.getDocumentInformation();
mergeInto(srcInfo.getCOSObject(), destInfo.getCOSObject(), cloner, Collections.emptySet());
// use the highest version number for the resulting pdf
float destVersion = destination.getVersion();
float srcVersion = source.getVersion();
if (destVersion < srcVersion)
{
destination.setVersion(srcVersion);
}
PDDocumentCatalog destCatalog = destination.getDocumentCatalog();
mergeAcroForm(cloner, destCatalog, srcCatalog);
COSArray destThreads = destCatalog.getCOSObject().getCOSArray(COSName.THREADS);
COSArray srcThreads = (COSArray) cloner.cloneForNewDocument(destCatalog.getCOSObject().getDictionaryObject(
COSName.THREADS));
if (destThreads == null)
{
destCatalog.getCOSObject().setItem(COSName.THREADS, srcThreads);
}
else
{
destThreads.addAll(srcThreads);
}
PDDocumentNameDictionary destNames = destCatalog.getNames();
PDDocumentNameDictionary srcNames = srcCatalog.getNames();
if (srcNames != null)
{
if (destNames == null)
{
destCatalog.getCOSObject().setItem(COSName.NAMES,
cloner.cloneForNewDocument(srcNames.getCOSObject()));
}
else
{
cloner.cloneMerge(srcNames, destNames);
}
}
if (destNames != null && destNames.getCOSObject().containsKey(COSName.ID_TREE))
{
// found in 001031.pdf from PDFBOX-4417 and doesn't belong there
destNames.getCOSObject().removeItem(COSName.ID_TREE);
LOG.warn("Removed /IDTree from /Names dictionary, doesn't belong there");
}
PDDocumentNameDestinationDictionary srcDests = srcCatalog.getDests();
if (srcDests != null)
{
PDDocumentNameDestinationDictionary destDests = destCatalog.getDests();
if (destDests == null)
{
destCatalog.getCOSObject().setItem(COSName.DESTS,
cloner.cloneForNewDocument(srcDests.getCOSObject()));
}
else
{
cloner.cloneMerge(srcDests, destDests);
}
}
PDDocumentOutline srcOutline = srcCatalog.getDocumentOutline();
if (srcOutline != null)
{
PDDocumentOutline destOutline = destCatalog.getDocumentOutline();
if (destOutline == null || destOutline.getFirstChild() == null)
{
PDDocumentOutline cloned = new PDDocumentOutline(
cloner.cloneForNewDocument(srcOutline.getCOSObject()));
destCatalog.setDocumentOutline(cloned);
}
else
{
// search last sibling for dest, because /Last entry is sometimes wrong
PDOutlineItem destLastOutlineItem = destOutline.getFirstChild();
while (true)
{
PDOutlineItem outlineItem = destLastOutlineItem.getNextSibling();
if (outlineItem == null)
{
break;
}
destLastOutlineItem = outlineItem;
}
for (PDOutlineItem item : srcOutline.children())
{
// get each child, clone its dictionary, remove siblings info,
// append outline item created from there
COSDictionary clonedDict = cloner.cloneForNewDocument(item.getCOSObject());
clonedDict.removeItem(COSName.PREV);
clonedDict.removeItem(COSName.NEXT);
PDOutlineItem clonedItem = new PDOutlineItem(clonedDict);
destLastOutlineItem.insertSiblingAfter(clonedItem);
destLastOutlineItem = destLastOutlineItem.getNextSibling();
}
}
}
PageMode destPageMode = destCatalog.getPageMode();
if (destPageMode == null)
{
PageMode srcPageMode = srcCatalog.getPageMode();
destCatalog.setPageMode(srcPageMode);
}
COSDictionary srcLabels = srcCatalog.getCOSObject().getCOSDictionary(COSName.PAGE_LABELS);
if (srcLabels != null)
{
int destPageCount = destination.getNumberOfPages();
COSArray destNums;
COSDictionary destLabels = destCatalog.getCOSObject().getCOSDictionary(COSName.PAGE_LABELS);
if (destLabels == null)
{
destLabels = new COSDictionary();
destNums = new COSArray();
destLabels.setItem(COSName.NUMS, destNums);
destCatalog.getCOSObject().setItem(COSName.PAGE_LABELS, destLabels);
}
else
{
destNums = (COSArray) destLabels.getDictionaryObject(COSName.NUMS);
}
COSArray srcNums = (COSArray) srcLabels.getDictionaryObject(COSName.NUMS);
if (srcNums != null)
{
int startSize = destNums.size();
for (int i = 0; i < srcNums.size(); i += 2)
{
COSBase base = srcNums.getObject(i);
if (!(base instanceof COSNumber))
{
LOG.error("page labels ignored, index {} should be a number, but is {}", i,
base);
// remove what we added
while (destNums.size() > startSize)
{
destNums.remove(startSize);
}
break;
}
COSNumber labelIndex = (COSNumber) base;
long labelIndexValue = labelIndex.intValue();
destNums.add(COSInteger.get(labelIndexValue + destPageCount));
destNums.add(cloner.cloneForNewDocument(srcNums.getObject(i + 1)));
}
}
}
COSStream destMetadata = destCatalog.getCOSObject().getCOSStream(COSName.METADATA);
COSStream srcMetadata = srcCatalog.getCOSObject().getCOSStream(COSName.METADATA);
if (destMetadata == null && srcMetadata != null)
{
try
{
PDStream newStream = new PDStream(destination, srcMetadata.createInputStream(), (COSName) null);
mergeInto(srcMetadata, newStream.getCOSObject(), cloner,
new HashSet<>(Arrays.asList(COSName.FILTER, COSName.LENGTH)));
destCatalog.getCOSObject().setItem(COSName.METADATA, newStream);
}
catch (IOException ex)
{
// PDFBOX-4227 cleartext XMP stream with /Flate
LOG.error("Metadata skipped because it could not be read", ex);
}
}
COSDictionary destOCP = destCatalog.getCOSObject().getCOSDictionary(COSName.OCPROPERTIES);
COSDictionary srcOCP = srcCatalog.getCOSObject().getCOSDictionary(COSName.OCPROPERTIES);
if (destOCP == null && srcOCP != null)
{
destCatalog.getCOSObject().setItem(COSName.OCPROPERTIES, cloner.cloneForNewDocument(srcOCP));
}
else if (destOCP != null && srcOCP != null)
{
cloner.cloneMerge(srcOCP, destOCP);
}
mergeOutputIntents(srcCatalog, destCatalog, cloner);
// merge logical structure hierarchy
boolean mergeStructTree = false;
int destParentTreeNextKey = -1;
Map<Integer, COSObjectable> srcNumberTreeAsMap = null;
Map<Integer, COSObjectable> destNumberTreeAsMap = null;
PDStructureTreeRoot srcStructTree = srcCatalog.getStructureTreeRoot();
PDStructureTreeRoot destStructTree = destCatalog.getStructureTreeRoot();
if (destStructTree == null && srcStructTree != null)
{
// create a dummy structure tree in the destination, so that the source
// tree is cloned. (We can't just copy the tree reference due to PDFBOX-3999)
destStructTree = new PDStructureTreeRoot();
destCatalog.setStructureTreeRoot(destStructTree);
destStructTree.setParentTree(new PDNumberTreeNode(PDParentTreeValue.class));
// PDFBOX-4429: remove bogus StructParent(s)
for (PDPage page : destCatalog.getPages())
{
page.getCOSObject().removeItem(COSName.STRUCT_PARENTS);
for (PDAnnotation ann : page.getAnnotations())
{
ann.getCOSObject().removeItem(COSName.STRUCT_PARENT);
}
}
}
if (destStructTree != null)
{
PDNumberTreeNode destParentTree = destStructTree.getParentTree();
destParentTreeNextKey = destStructTree.getParentTreeNextKey();
if (destParentTree != null)
{
destNumberTreeAsMap = getNumberTreeAsMap(destParentTree);
if (destParentTreeNextKey < 0)
{
if (destNumberTreeAsMap.isEmpty())
{
destParentTreeNextKey = 0;
}
else
{
destParentTreeNextKey = Collections.max(destNumberTreeAsMap.keySet()) + 1;
}
}
if (destParentTreeNextKey >= 0 && srcStructTree != null)
{
PDNumberTreeNode srcParentTree = srcStructTree.getParentTree();
if (srcParentTree != null)
{
srcNumberTreeAsMap = getNumberTreeAsMap(srcParentTree);
if (!srcNumberTreeAsMap.isEmpty())
{
mergeStructTree = true;
}
}
}
}
}
Map<COSDictionary, COSDictionary> objMapping = new HashMap<>();
PDPageTree destinationPageTree = destination.getPages(); // cache PageTree
for (PDPage page : srcCatalog.getPages())
{
PDPage newPage = new PDPage(cloner.cloneForNewDocument(page.getCOSObject()));
if (!mergeStructTree)
{
// PDFBOX-4429: remove bogus StructParent(s)
newPage.getCOSObject().removeItem(COSName.STRUCT_PARENTS);
for (PDAnnotation ann : newPage.getAnnotations())
{
ann.getCOSObject().removeItem(COSName.STRUCT_PARENT);
}
}
newPage.setCropBox(page.getCropBox());
newPage.setMediaBox(page.getMediaBox());
newPage.setRotation(page.getRotation());
PDResources resources = page.getResources();
if (resources != null)
{
// this is smart enough to just create references for resources that are used on multiple pages
newPage.setResources(new PDResources(
cloner.cloneForNewDocument(resources.getCOSObject())));
}
else
{
newPage.setResources(new PDResources());
}
if (mergeStructTree)
{
// add the value of the destination ParentTreeNextKey to every source element
// StructParent(s) value so that these don't overlap with the existing values
updateStructParentEntries(newPage, destParentTreeNextKey);
objMapping.put(page.getCOSObject(), newPage.getCOSObject());
List<PDAnnotation> oldAnnots = page.getAnnotations();
List<PDAnnotation> newAnnots = newPage.getAnnotations();
for (int i = 0; i < oldAnnots.size(); i++)
{
objMapping.put(oldAnnots.get(i).getCOSObject(), newAnnots.get(i).getCOSObject());
}
// TODO update mapping for XObjects
}
destinationPageTree.add(newPage);
}
mergeOpenAction(srcCatalog, destCatalog, cloner);
if (mergeStructTree)
{
updatePageReferences(cloner, srcNumberTreeAsMap, objMapping);
int maxSrcKey = -1;
for (Map.Entry<Integer, COSObjectable> entry : srcNumberTreeAsMap.entrySet())
{
int srcKey = entry.getKey();
maxSrcKey = Math.max(srcKey, maxSrcKey);
COSObjectable value = entry.getValue();
if (value != null)
{
value = cloner.cloneForNewDocument(value.getCOSObject());
destNumberTreeAsMap.put(destParentTreeNextKey + srcKey, value);
}
}
destParentTreeNextKey += maxSrcKey + 1;
PDNumberTreeNode newParentTreeNode = new PDNumberTreeNode(PDParentTreeValue.class);
// Note that all elements are stored flatly. This could become a problem for large files
// when these are opened in a viewer that uses the tagging information.
// If this happens, then PDNumberTreeNode should be improved with a convenience method that
// stores the map into a B+Tree, see https://en.wikipedia.org/wiki/B+_tree
newParentTreeNode.setNumbers(destNumberTreeAsMap);
destStructTree.setParentTree(newParentTreeNode);
destStructTree.setParentTreeNextKey(destParentTreeNextKey);
mergeKEntries(cloner, srcStructTree, destStructTree);
mergeRoleMap(srcStructTree, destStructTree, cloner);
mergeIDTree(cloner, srcStructTree, destStructTree);
mergeMarkInfo(destCatalog, srcCatalog);
mergeLanguage(destCatalog, srcCatalog);
mergeViewerPreferences(destCatalog, srcCatalog, cloner);
}
}