in tokenizers/java/src/main/java/javatokenizer/Extractor.java [89:129]
public static SerializableTokens TokenizeFile(File sourceFile, boolean onlyIdentifiers, File baseDirectory) {
CompilationUnit cu;
System.out.println("Tokenizing " + sourceFile + "...");
try {
cu = JavaParser.parse(
FileUtils.readFileToString(
sourceFile,
Charset.defaultCharset()));
List<String> allTokens = new ArrayList<>();
for(JavaToken token : cu.getTokenRange().get()) {
JavaToken.Kind tokenKind = JavaToken.Kind.valueOf(token.getKind());
if (tokenKind == JavaToken.Kind.SPACE ||
tokenKind == JavaToken.Kind.EOF ||
tokenKind == JavaToken.Kind.WINDOWS_EOL ||
tokenKind == JavaToken.Kind.UNIX_EOL ||
tokenKind == JavaToken.Kind.OLD_MAC_EOL ||
tokenKind == JavaToken.Kind.SINGLE_LINE_COMMENT ||
tokenKind == JavaToken.Kind.ENTER_JAVADOC_COMMENT ||
tokenKind == JavaToken.Kind.JAVADOC_COMMENT ||
tokenKind == JavaToken.Kind.ENTER_MULTILINE_COMMENT ||
tokenKind == JavaToken.Kind.MULTI_LINE_COMMENT ||
tokenKind == JavaToken.Kind.COMMENT_CONTENT) {
continue;
}
if (!onlyIdentifiers || tokenKind == JavaToken.Kind.IDENTIFIER) {
allTokens.add(token.getText());
}
}
SerializableTokens serializableObject = new SerializableTokens();
serializableObject.filename = baseDirectory.toURI().relativize(sourceFile.toURI()).toString();
serializableObject.tokens = allTokens;
return serializableObject;
} catch (Exception e) {
System.err.println("Failed to parse " + sourceFile);
//e.printStackTrace();
return null;
} catch (StackOverflowError e) {
return null;
}
}