private List getFilesFor()

in ctakes-temporal/src/main/java/org/apache/ctakes/temporal/eval/Evaluation_ImplBase.java [284:400]


	private List<File> getFilesFor( List<Integer> patientSets ) throws FileNotFoundException {
		List<File> files = new ArrayList<>();
		if ( this.xmlFormat == XMLFormat.Anafora ) {
			Set<String> ids = new HashSet<>();
			for ( Integer set : patientSets ) {
				if ( this.subcorpus == Subcorpus.Colon ) {
					ids.add( String.format( "ID%03d", set ) );
				} else if ( this.subcorpus == Subcorpus.DeepPhe ) {
					ids.add( String.format( "patient%02d", set ) );
				} else {
					ids.add( String.format( "doc%04d", set ) );
				}
			}
			int filePrefixLen = 5; // Colon: "ID\d{3}"
			if ( this.subcorpus == Subcorpus.Brain ) {
				filePrefixLen = 7; // Brain: "doc\d{4}"
			} else if ( this.subcorpus == Subcorpus.DeepPhe ) {
				filePrefixLen = 9; // deepPhe: "patient\d{2}"
			}
			if ( this.subcorpus == Subcorpus.DeepPhe ) {
				for ( File dir : this.xmlDirectory.listFiles() ) {
					if ( dir.isDirectory() ) {
						if ( ids.contains( dir.getName().substring( 0, filePrefixLen ) ) ) {
							File file = new File( dir, dir.getName() );
							if ( file.exists() ) {
								files.add( file );
							} else {
								LOGGER.warn( "Missing note: " + file );
							}
						}
					}
				}
			} else {
				for ( String section : THYMEData.SECTIONS ) {
					File xmlSubdir = new File( this.xmlDirectory, section );
					for ( File dir : xmlSubdir.listFiles() ) {
						if ( dir.isDirectory() ) {
							if ( ids.contains( dir.getName().substring( 0, filePrefixLen ) ) ) {
								File file = new File( dir, dir.getName() );
								if ( file.exists() ) {
									files.add( file );
								} else {
									LOGGER.warn( "Missing note: " + file );
								}
							}
						}
					}
				}
			}
		} else if ( this.xmlFormat == XMLFormat.AnaforaCoref) {
			Set<String> ids = new HashSet<>();
			for (Integer set : patientSets) {
				if (this.subcorpus == Subcorpus.Colon) {
					ids.add(String.format("ID%03d", set));
				} else {
					LOGGER.warn("No coreference annotations exist for this corpus!");
				}
			}
			for (File dir : this.xmlDirectory.listFiles()) {
				// this gets us into train/dev/test subdirectory
				for (File ptDir : dir.listFiles()) {
					if (ids.contains(ptDir.getName())) {
						for (File subDir : ptDir.listFiles()) {
							if (subDir.isDirectory()) {
								// for document 001 for patient 001, directory is ID001/ID001_clinic_001
								// and text file within is ID001_clinic_001
								files.add(new File(subDir, subDir.getName()));
							}
						}
					}
				}
			}
		} else if ( this.xmlFormat == XMLFormat.I2B2 ) {
			File trainDir = new File( this.xmlDirectory, "training" );
			File testDir = new File( this.xmlDirectory, "test" );
			for ( Integer pt : patientSets ) {
				File xmlTrain = new File( trainDir, pt + ".xml" );
				File train = new File( trainDir, pt + ".xml.txt" );
				if ( train.exists() ) {
					if ( xmlTrain.exists() ) {
						files.add( train );
					} else {
						System.err.println( "Text file in training has no corresponding xml -- skipping: " + train );
					}
				}
				File xmlTest = new File( testDir, pt + ".xml" );
				File test = new File( testDir, pt + ".xml.txt" );
				if ( xmlTest.exists() ) {
					if ( test.exists() ) {
						files.add( test );
					} else {
						throw new FileNotFoundException( "Could not find the test text file -- for cTAKES usage you must copy the text files into the xml directory for the test set." );
					}
				}
				assert !(train.exists() && test.exists());
			}
		} else if ( xmlFormat == XMLFormat.Knowtator ) {
			LOGGER.warn( "This is an old annotation format -- please upgrade to using anafora files." );
			for ( Integer set : patientSets ) {
				final int setNum = set;
				for ( File file : rawTextDirectory.listFiles( new FilenameFilter() {
					@Override
					public boolean accept( File dir, String name ) {
						return name.contains( String.format( "ID%03d", setNum ) );
					}
				} ) ) {
					// skip hidden files like .svn
					if ( !file.isHidden() ) {
						files.add( file );
					}
				}
			}
		} else {
			LOGGER.error( "Unknown data format -- please specify Anafora, i2b2, or Knowtator format." );
		}
		return files;
	}