in src/og.py [0:0]
def _generateInsightsPerDocument(self, page, p, insights, medicalInsights,
translate, ta, tma, tt):
maxLen = 2000
text = page.text
start = 0
sl = len(text)
sentiment = []
syntax = []
entities = []
keyPhrases = []
medicalEntities = []
phi = []
translation = ""
while (start < sl):
end = start + maxLen
if (end > sl):
end = sl
subText = text[start:end]
if (insights):
self._insights(start, text, sentiment, syntax, entities,
keyPhrases, ta)
if (medicalInsights):
self._medicalInsights(start, text, medicalEntities, phi, tma)
if (translate):
translation = translation + tt.getTranslation(subText) + "\n"
start = end
if (insights):
FileHelper.writeCSV(
"{}-page-{}-insights-sentiment.csv".format(self.fileName, p),
["Sentiment"], sentiment)
FileHelper.writeCSV(
"{}-page-{}-insights-entities.csv".format(self.fileName, p),
["Type", "Text", "Score", "BeginOffset", "EndOffset"],
entities)
FileHelper.writeCSV(
"{}-page-{}-insights-syntax.csv".format(self.fileName, p), [
"PartOfSpeech-Tag", "PartOfSpeech-Score", "Text",
"BeginOffset", "EndOffset"
], syntax)
FileHelper.writeCSV(
"{}-page-{}-insights-keyPhrases.csv".format(self.fileName, p),
["Text", "Score", "BeginOffset", "EndOffset"], keyPhrases)
if (medicalInsights):
FileHelper.writeCSV(
"{}-page-{}-medical-insights-entities.csv".format(
self.fileName, p), [
"Text", "Type", "Category", "Score", "BeginOffset",
"EndOffset"
], medicalEntities)
FileHelper.writeToFile(
"{}-page-{}-medical-insights-phi.json".format(
self.fileName, p), json.dumps(phi))
if (translate):
FileHelper.writeToFile(
"{}-page-{}-text-translation.txt".format(self.fileName, p),
translation)