public void process()

in ctakes-coreference/src/main/java/org/apache/ctakes/coreference/cc/ODIEVectorFileWriter.java [187:366]


	public void process(JCas jcas) {
		//		System.err.println("processCas-ing");
		if(!initialized) return;
//		JCas jcas;
//		try {
//			jcas = arg0.getCurrentView().getJCas();
//		} catch (CASException e) {
//			e.printStackTrace();
//			System.err.println("No processing done in ODIEVectoFileWriter!");
//			return;
//		}

      String docId = DocIdUtil.getDocumentID( jcas );
		docId = docId.substring(docId.lastIndexOf('/')+1, docId.length());
//		Hashtable<Integer, Integer> sysId2AlignId = new Hashtable<Integer, Integer>();
//		Hashtable<Integer, Integer> goldId2AlignId = new Hashtable<Integer, Integer>();
//		Hashtable<Integer, Integer> alignId2GoldId = new Hashtable<Integer, Integer>();
		if (docId==null) docId = "141471681_1";
		System.out.println("creating vectors for "+docId);
//		Vector<Span> goldSpans = loadGoldStandard(docId, goldSpan2id);
		int numPos = 0;

		FSIterator markIter = jcas.getAnnotationIndex(Markable.type).iterator();
		LinkedList<Annotation> lm = FSIteratorToList.convert(markIter);

//		while(markIter.hasNext()){
//			Markable m = (Markable) markIter.next();
//			String key = m.getBegin() + "-" + m.getEnd();
//			markables.put(key, m);
//		}
		
		labeler = new GoldStandardLabeler(goldStandardDir, docId, lm);

//		Vector<Span> sysSpans = loadSystemPairs(lm, docId);
		// align the spans


		FSIterator iter = null;
//		FSIterator iter = jcas.getJFSIndexRepository().getAllIndexedFS(AnaphoricityVecInstance.type);
//		int numVecs = corefNodes.size();
//		log.info(numVecs + " nodes at the start of processing...");

//		if(anaphora){
//			while(iter.hasNext()){
//				AnaphoricityVecInstance vec = (AnaphoricityVecInstance) iter.next();
//				String nodeStr = vec.getVector();
//				int label = getLabel(nodeStr);
//				if(label == 1) posAnaphInst++;
//				else if(label == 0) negAnaphInst++;
//				anaphLabels.add(label);
//				svm_node[] nodes = SvmUtils.getNodes(nodeStr);
//				anaphNodes.add(nodes);
//			}
//			return;
//		}
		
		if(printVectors){
			try {
				neOut = new PrintWriter(outputDir + "/" + CorefConsts.NE + "/vectors/" + docId + ".libsvm");
				demOut = new PrintWriter(outputDir + "/" + CorefConsts.DEM + "/vectors/" + docId + ".libsvm");
				pronOut = new PrintWriter(outputDir + "/" + CorefConsts.PRON + "/vectors/"+ docId + ".libsvm");
			} catch (FileNotFoundException e) {
				// TODO Auto-generated catch block
				e.printStackTrace();
			}
		}

		//		int ind = 0;
		iter = jcas.getJFSIndexRepository().getAllIndexedFS(MarkablePairSet.type);
		while(iter.hasNext()){
			//			VecInstance vec = (VecInstance) iter.next();
			MarkablePairSet pair = (MarkablePairSet) iter.next();
			Markable anaphor = pair.getAnaphor();
			String corefType = (anaphor instanceof NEMarkable ? CorefConsts.NE : (anaphor instanceof DemMarkable ? CorefConsts.DEM : CorefConsts.PRON));
			//			String nodeStr = vec.getVector();
			//			int label = getLabel(nodeStr);
			FSList pairList = pair.getAntecedentList();
			while(pairList instanceof NonEmptyFSList){
				NonEmptyFSList node = (NonEmptyFSList) pairList;
				BooleanLabeledFS labeledProb = (BooleanLabeledFS) node.getHead();
				int label = labeledProb.getLabel() ? 1 : 0;
//				if(anaphora){
//					if(label == 1) posAnaphInst++;
//					else negAnaphInst++;
//					anaphLabels.add(label);
//					svm_node[] nodes = vecCreator.createAnaphoricityVector(anaphor, jcas);
//					anaphNodes.add(nodes);
//				}
				Markable antecedent = (Markable) labeledProb.getFeature();
				label = (labeler.isGoldPair(anaphor, antecedent) ? 1 : 0);
				if(label == 1){
					numPos++;
					if(corefType.equals(CorefConsts.NE)){
						posNeInst++;
						//					neInds.add(ind);
					}else if(corefType.equals(CorefConsts.DEM)){
						posDemInst++;
					}else if(corefType.equals(CorefConsts.PRON)){
						posPronInst++;
					}
				}
				else if(label == 0){
					if(corefType.equals(CorefConsts.NE)){
						negNeInst++;
						//					neInds.add(ind);
					}else if(corefType.equals(CorefConsts.DEM)){
						negDemInst++;
					}else if(corefType.equals(CorefConsts.PRON)){
						negPronInst++;
					}
				}
//				corefLabels.add(label);
//				corefTypes.add(corefType);				// need to add it every time so the indices match...
				//			corefPathTrees.add(pathTree);

				if(printVectors){
					svm_node[] nodes = vecCreator.getNodeFeatures(anaphor, antecedent, jcas); //getNodes(nodeStr);
//					corefNodes.add(nodes);
					PrintWriter writer = null;
					if(corefType.equals(CorefConsts.NE)){
						writer = neOut;
					}else if(corefType.equals(CorefConsts.PRON)){
						writer = pronOut;
					}else if(corefType.equals(CorefConsts.DEM)){
						writer = demOut;
					}
					writer.print(label);
					for(svm_node inst : nodes){
						writer.print(" ");
						writer.print(inst.index);
						writer.print(":");
						writer.print(inst.value);
					}
					writer.println();
					writer.flush();
				}

				if(printTrees){
					//					Markable anaphor = vec.getAnaphor();
					//					Markable antecedent = vec.getAntecedent();
					TreebankNode antecedentNode = MarkableTreeUtils.markableNode(jcas, antecedent.getBegin(), antecedent.getEnd());
					TreebankNode anaphorNode = MarkableTreeUtils.markableNode(jcas, anaphor.getBegin(), anaphor.getEnd());
					debug.println(TreeUtils.tree2str(antecedentNode));
					debug.println(TreeUtils.tree2str(anaphorNode));
//					TopTreebankNode pathTree = TreeExtractor.extractPathTree(antecedentNode, anaphorNode, jcas);
					SimpleTree pathTree = TreeExtractor.extractPathTree(antecedentNode, anaphorNode);
					SimpleTree petTree = TreeExtractor.extractPathEnclosedTree(antecedentNode, anaphorNode, jcas);
//					TopTreebankNode tree = mctTree;
//					String treeStr = TreeUtils.tree2str(tree);
//					String treeStr = mctTree.toString();
					String treeStr = pathTree.toString();
					PrintWriter writer = null;
					if(corefType.equals(CorefConsts.NE)){
						writer = neTreeOut;
					}else if(corefType.equals(CorefConsts.PRON)){
						writer = pronTreeOut;
					}else if(corefType.equals(CorefConsts.DEM)){
						writer = demTreeOut;
					}
					writer.print(label == 1 ? "+1" : "-1");
					writer.print(" |BT| ");
					writer.print(treeStr.replaceAll("\\) \\(", ")("));
					writer.println(" |ET|");
				}
				pairList = node.getTail();
				// NOTE: If this is in place, then we will only output negative examples backwards until we reach
				// the actual coreferent entity.  This may have the effect of suggesting that further away markables
				// are _more_ likely to be coreferent, which is an assumption that probably does not hold up in the
				// test set configuration.  Try commenting this feature out to see if it makes the feature more useful.
//				if(label == 1) break;
			}
		}
		if(printVectors){
			neOut.close();
			demOut.close();
			pronOut.close();
		}
//		numVecs = (corefNodes.size() - numVecs);
//		log.info("Document id: " + docId + " has " + numVecs + " pairwise instances.");
	}