in app/redact/PdfRedactor.scala [49:101]
def redact(document: PDDocument, destination: OutputStream, names: List[String]): Unit = {
val foundNames = for {
name <- names ++ commonNames
found <- TextFinder.findString(document, name)
} yield found
val regexedNames: List[FoundText] = {
if (enableGreedyNameMatching) {
names.flatMap(word => TextFinder.findStringsMatchingRegex(document, word))
} else{
List.empty[FoundText]
}
}
val redactedWords: List[FoundText] = {
if (enableExactStringMatching) {
redactStringsList.flatMap(word => TextFinder.findString(document, word))
} else {
List.empty[FoundText]
}
}
redactFoundText(
document = document,
redactions = List(
foundNames,
redactedWords,
TextFinder.findEmail(document),
TextFinder.findUrl(document),
TextFinder.findWebsite(document, "github.com"),
TextFinder.findWebsite(document, "linkedin.com"),
regexedNames,
).flatten
)
ImageRedactor.redactImages(document)
removeFirstPage(document)
val rasterisedDoc = new PDDocument()
val renderer = new PDFRenderer(document)
for (page <- 0 until document.getNumberOfPages) {
val image = renderer.renderImageWithDPI(page, 300, ImageType.RGB)
val pdPage = new PDPage(PDRectangle.A4)
val pdImage = JPEGFactory.createFromImage(rasterisedDoc, image)
val contentStream = new PDPageContentStream(rasterisedDoc, pdPage)
contentStream.drawImage(pdImage, 0, 0, PDRectangle.A4.getWidth, PDRectangle.A4.getHeight)
contentStream.close()
rasterisedDoc.addPage(pdPage)
}
rasterisedDoc.save(destination)
rasterisedDoc.close()
}