public JSMDecision buildLearningModel()

in opennlp-similarity/src/main/java/opennlp/tools/jsmlearning/JSMLearnerOnLatticeBase.java [41:141]


	public JSMDecision buildLearningModel(List<String> posTexts, List<String> negTexts, 
			String unknown, String[] separationKeywords){
		psPos = new LinguisticPatternStructure(0,0); psNeg = new LinguisticPatternStructure(0,0);

		if (separationKeywords!=null){ // re-sort by occurrence of separation keyword
			Pair<List<String>, List<String>> pair = reGroupByOccurrenceOfSeparationKeyword(posTexts, negTexts, separationKeywords );
			posTexts = pair.getFirst(); negTexts = 	pair.getSecond();
		}

		List<List<List<ParseTreeChunk>>> lingRepsPos = new ArrayList<>(),
				lingRepsNeg = new ArrayList<>();
		for(String text: posTexts)
			lingRepsPos.add(chunk_maker.formGroupedPhrasesFromChunksForPara(text));

		for(String text: negTexts)
			lingRepsNeg.add(chunk_maker.formGroupedPhrasesFromChunksForPara(text));

		LinkedHashSet<Integer> obj;
		int i=0;
		for(List<List<ParseTreeChunk>> chunk: lingRepsPos){
			obj = new LinkedHashSet<>();
			obj.add(i);
			psPos.AddIntent(chunk, obj, 0);
			i++;
		}
		i=0;
		for(List<List<ParseTreeChunk>> chunk: lingRepsNeg){
			obj = new LinkedHashSet<>();
			obj.add(i);
			psNeg.AddIntent(chunk, obj, 0);
			i++;
		}



		List<List<ParseTreeChunk>> chunksUnknown = chunk_maker.formGroupedPhrasesFromChunksForPara(unknown);
		List<List<List<ParseTreeChunk>>> posIntersections = new ArrayList<>(),
				negIntersections = new ArrayList<>();
		List<List<ParseTreeChunk>> intersection;
		for(int iConcept = 0; iConcept<psPos.conceptList.size(); iConcept++){
			if (psPos.conceptList.get(iConcept).intent!=null && psPos.conceptList.get(iConcept).intent.size()>0){
				intersection = computeIntersectionWithIntentExtendedByDeduction(psPos, iConcept, chunksUnknown);
				if (reduceList(intersection).size()>0)
					posIntersections.add(reduceList(intersection));
			}
			if (psNeg.conceptList.get(iConcept).intent!=null && psNeg.conceptList.get(iConcept).intent.size()>0){				
				intersection = md
						.matchTwoSentencesGroupedChunksDeterministic(psNeg.conceptList.get(iConcept).intent, chunksUnknown);
				if (reduceList(intersection).size()>0)
					negIntersections.add(reduceList(intersection));
			}
		}

		Pair<List<List<List<ParseTreeChunk>>>, List<List<List<ParseTreeChunk>>>> pair = 
				removeInconsistenciesFromPosNegIntersections( posIntersections, 
						negIntersections);

		posIntersections = pair.getFirst();
		negIntersections = pair.getSecond();

		List<List<List<ParseTreeChunk>>> posIntersectionsUnderNeg = new ArrayList<>(),
				negIntersectionsUnderPos = new ArrayList<>();

		for(int iConcept = 0; iConcept<psNeg.conceptList.size(); iConcept++){
			for (List<List<ParseTreeChunk>> negIntersection : negIntersections) {
				intersection = md
								.matchTwoSentencesGroupedChunksDeterministic(psNeg.conceptList.get(iConcept).intent, negIntersection);
				if (reduceList(intersection).size() > 0)
					posIntersectionsUnderNeg.add(reduceList(intersection));
			}
		}

		for(int iConcept = 0; iConcept<psPos.conceptList.size(); iConcept++){
			for (List<List<ParseTreeChunk>> posIntersection : posIntersections) {
				intersection = md
								.matchTwoSentencesGroupedChunksDeterministic(psPos.conceptList.get(iConcept).intent, posIntersection);
				if (reduceList(intersection).size() > 0)
					negIntersectionsUnderPos.add(reduceList(intersection));
			}
		}

		List<ParseTreeChunk>posIntersectionsUnderNegLst = flattenParseTreeChunkLst(posIntersectionsUnderNeg);
		List<ParseTreeChunk>negIntersectionsUnderPosLst=flattenParseTreeChunkLst(negIntersectionsUnderPos);

		posIntersectionsUnderNegLst = subtract(posIntersectionsUnderNegLst, negIntersectionsUnderPosLst);
		negIntersectionsUnderPosLst= subtract(negIntersectionsUnderPosLst, posIntersectionsUnderNegLst);

		System.out.println("Pos - neg inters = "+posIntersectionsUnderNegLst);
		System.out.println("Neg - pos inters = "+negIntersectionsUnderPosLst);

		Boolean bPositiveClass = (float)posIntersectionsUnderNegLst.size()/(float)negIntersectionsUnderPosLst.size() > 1f;

		JSMDecision decision = new JSMDecision("keywordClassName", bPositiveClass, 
				posIntersections , negIntersections, 
				posIntersectionsUnderNeg,
				negIntersectionsUnderPos, separationKeywords);


		return decision;

	}