parseAndTranslate : async function()

in packages/Ludown/lib/translate-helpers.js [32:259]


    parseAndTranslate : async function(fileContent, subscriptionKey, to_lang, src_lang, translate_comments, translate_link_text, log, batch_translate) {
        let batch_translate_size = batch_translate ? parseInt(batch_translate) : MAX_TRANSLATE_BATCH_SIZE;
        fileContent = helpers.sanitizeNewLines(fileContent);
        let linesInFile = fileContent.split(NEWLINE);
        let linesToTranslate = [];
        let localizedContent = '';
        let currentSectionType = '';
        let inAnswer = false;
        let lineCtr = 0;
        for(let lineIndex in linesInFile) {
            lineCtr++;
            let currentLine = linesInFile[lineIndex].trim();
            // is current line a comment? 
            if(currentLine.indexOf(PARSERCONSTS.COMMENT) === 0) {
                if (inAnswer) {
                    this.addSegment(linesToTranslate, currentLine, true);
                    this.addSegment(linesToTranslate, NEWLINE, false);
                    continue;
                }
                if(translate_comments) {
                    this.addSegment(linesToTranslate, currentLine, true);
                } else {
                    this.addSegment(linesToTranslate, currentLine, false);
                }
            } else if (currentLine.indexOf(PARSERCONSTS.FILTER) === 0) {
                this.addSegment(linesToTranslate, currentLine, false);
                currentSectionType = PARSERCONSTS.FILTER;
            } else if (currentLine.indexOf(PARSERCONSTS.INTENT) === 0) {
                if (inAnswer) {
                    this.addSegment(linesToTranslate, currentLine, true);
                    this.addSegment(linesToTranslate, NEWLINE, false);
                    continue;
                }
                let intentName = currentLine.substring(currentLine.indexOf(' ') + 1).trim();
                //is this a QnA? 
                if(intentName.indexOf(PARSERCONSTS.QNA) === 0) {
                    let beforeQuestion = currentLine.substring(0, currentLine.indexOf(' ') + 1);
                    let question = intentName.slice(1).trim();
                    this.addSegment(linesToTranslate, beforeQuestion + '? ', false);
                    this.addSegment(linesToTranslate, question, true);
                    currentSectionType = PARSERCONSTS.QNA;
                } else {
                    // we would not localize intent name but remember we are under intent section
                    currentSectionType = PARSERCONSTS.INTENT;
                    this.addSegment(linesToTranslate, currentLine, false);
                }
            } else if(currentLine.indexOf('-') === 0 || 
                    currentLine.indexOf('*') === 0 || 
                    currentLine.indexOf('+') === 0 ) {
                if (inAnswer) {
                    this.addSegment(linesToTranslate, currentLine, true);
                    this.addSegment(linesToTranslate, NEWLINE, false);
                    continue;
                }
                // Fix for #1191. Do not localize meta-data filters for QnA.
                if (currentSectionType === PARSERCONSTS.FILTER) {
                    this.addSegment(linesToTranslate, currentLine, false);
                    this.addSegment(linesToTranslate, NEWLINE, false);
                    continue;
                }
                let listSeparator = '';
                let content = '';
                switch (currentSectionType) {
                case PARSERCONSTS.INTENT: {
                    // strip line of the list separator
                    listSeparator = currentLine.charAt(0);
                    this.addSegment(linesToTranslate, listSeparator + ' ', false);
                    content = currentLine.slice(1).trim();
                    let entitiesList = [];
                    // strip line off labelled entity values,mark pattern any entities as not to localize
                    if (content.includes('{')) {
                        const entityRegex = new RegExp(/\{(.*?)\}/g);
                        let entitiesFound = content.match(entityRegex);
                        let eStartIndex = -1;
                        let eEndIndex = -1;
                        let entity;
                        for (var entityIdx in entitiesFound) {
                            entity = entitiesFound[entityIdx];
                            let lEntity = entity.replace('{', '').replace('}', '');
                            let labelledValue = '';
                            let updatedUtteranceLeft = content.substring(0, content.indexOf(entity));
                            let updatedUtteranceRight = content.substring(content.indexOf(entity) + entity.length);
                            // is this a labelled value? 
                            if (lEntity.includes('=')) {
                                let entitySplit = lEntity.split('=');
                                if (entitySplit.length > 2) {
                                    throw (new exception(retCode.errorCode.INVALID_INPUT, '[ERROR]: Nested entity references are not supported in utterance: ' + content));
                                }
                                lEntity = entitySplit[0].trim();
                                labelledValue = entitySplit[1].trim();
                                eStartIndex = content.indexOf(entity);
                                eEndIndex = eStartIndex + labelledValue.length - 1;
                                content = updatedUtteranceLeft + labelledValue + updatedUtteranceRight;
                                entitiesList.push(new helperClasses.entity(lEntity, labelledValue, eStartIndex, eEndIndex));
                            } else {
                                // This is a pattern entity without a labelled value. Do not localize this.
                                eStartIndex = content.indexOf(lEntity) - 1;
                                eEndIndex = eStartIndex + lEntity.length - 1;
                                content = updatedUtteranceLeft + lEntity + updatedUtteranceRight;
                                entitiesList.push(new helperClasses.entity(lEntity, null, eStartIndex, eEndIndex));
                            }
                        }
                    }
                    let offset = 0;
                    let candidateText = '';
                    // Tokenize the input utterance.
                    for (var idx in entitiesList) {
                        let entity = entitiesList[idx];
                        if (entity.start < 0) entity.start = 0;
                        if (entity.start !== offset) {
                            candidateText = content.substring(offset, entity.start);
                            if (candidateText.trim() !== '') {
                                this.addSegment(linesToTranslate, candidateText, true);
                            } else {
                                this.addSegment(linesToTranslate, candidateText, false);
                            }
                        }
                        if (entity.value !== '') {
                            this.addSegment(linesToTranslate, ' {' + entity.entity + '=', false);
                            this.addSegment(linesToTranslate, content.substring(entity.start, entity.end + 1).trim(), true);
                            this.addSegment(linesToTranslate, '} ', false);
                        } else {
                            this.addSegment(linesToTranslate, ' {' + entity.entity + '} ', false);
                        }
                        offset = entity.end + 1;
                    }
                    if (offset !== content.length) {
                        candidateText = content.substring(offset);
                        if (candidateText.trim() !== '') {
                            this.addSegment(linesToTranslate, candidateText.trim(), true);
                        } else {
                            this.addSegment(linesToTranslate, candidateText, false);
                        }
                    }
                }
                break;
                case PARSERCONSTS.ENTITY:
                case PARSERCONSTS.QNA:
                default:
                    // strip line of the list separator
                    listSeparator = currentLine.charAt(0);
                    content = currentLine.slice(1).trim();
                    this.addSegment(linesToTranslate, listSeparator + ' ', false);
                    this.addSegment(linesToTranslate, content, true);
                    break;
                }
            } else if(currentLine.indexOf(PARSERCONSTS.ENTITY) === 0) {
                if (inAnswer) {
                    this.addSegment(linesToTranslate, currentLine, true);
                    this.addSegment(linesToTranslate, NEWLINE, false);
                    continue;
                }
                // we need to localize qna alterations if specified.
                let entityDef = currentLine.replace(PARSERCONSTS.ENTITY, '').split(':');
                let entityName = entityDef[0];
                let entityType = entityDef[1];
                if(entityType.includes(PARSERCONSTS.QNAALTERATIONS)) {
                    this.addSegment(linesToTranslate, '$', false);
                    this.addSegment(linesToTranslate, entityName.trim(), true);
                    this.addSegment(linesToTranslate, ' : ' + PARSERCONSTS.QNAALTERATIONS + ' = ', false);
                    currentSectionType = PARSERCONSTS.ENTITY;
                } else {
                    // we would not localize entity line but remember we are under entity section for list entities
                    this.addSegment(linesToTranslate, currentLine, false);
                }
            } else if(currentLine.indexOf(PARSERCONSTS.ANSWER) === 0) {
                if (inAnswer) {
                    answerData = '';
                }
                this.addSegment(linesToTranslate, currentLine, false);
                inAnswer = !inAnswer;
                currentSectionType = PARSERCONSTS.ANSWER;
            } else if (currentLine.indexOf(PARSERCONSTS.URLORFILEREF) ===0) {
                if (inAnswer) {
                    this.addSegment(linesToTranslate, currentLine, true);
                    this.addSegment(linesToTranslate, NEWLINE, false);
                    continue;
                }
                currentSectionType = PARSERCONSTS.URLORFILEREF;
                if(translate_link_text) {
                    const linkValueRegEx = new RegExp(/\(.*?\)/g);
                    let linkValueList = currentLine.trim().match(linkValueRegEx);
                    let linkValue = linkValueList[0].replace('(','').replace(')','');
                    const linkTextRegEx = new RegExp(/\[.*\]/g);
                    let linkTextList = currentLine.trim().match(linkTextRegEx);
                    let linkTextValue = linkTextList[0].replace('[','').replace(']','');
                    this.addSegment(linesToTranslate, '[', false);
                    this.addSegment(linesToTranslate, linkTextValue, true);
                    this.addSegment(linesToTranslate, ']', false);
                    this.addSegment(linesToTranslate, '(' + linkValue + ')', false);
                } else {
                    this.addSegment(linesToTranslate, currentLine, false);
                }
            } else if(currentLine === '') {
                if (inAnswer) {
                    this.addSegment(linesToTranslate, NEWLINE, false);
                    continue;
                }
            } else {
                if (inAnswer) {
                    this.addSegment(linesToTranslate, currentLine, true);
                    this.addSegment(linesToTranslate, NEWLINE, false);
                    continue;
                } else {
                    throw(new exception(retCode.errorCode.INVALID_INPUT_FILE, 'Error: Unexpected line encountered when parsing \n' + '[' + lineIndex + ']:' + currentLine));
                }
            }
            this.addSegment(linesToTranslate, NEWLINE, false);
            // do we have any payload to localize? and have we hit the batch size limit?
            if ((linesToTranslate.length !== 0) && (lineCtr % batch_translate_size === 0)) {
                try {
                    localizedContent += await this.batchTranslateText(linesToTranslate, subscriptionKey, to_lang, src_lang, log);
                    linesToTranslate = [];
                } catch (err) {
                    throw (err)
                }
            }
        }
        if (linesToTranslate.length !== 0) {
            try {
                localizedContent += await this.batchTranslateText(linesToTranslate, subscriptionKey, to_lang, src_lang, log);
                linesToTranslate = [];
            } catch (err) {
                throw (err)
            }
        }
        return localizedContent;
    },