public void populateCASfromURL()

in TikaAnnotator/src/main/java/org/apache/uima/tika/TIKAWrapper.java [61:130]


	public void populateCASfromURL(CAS cas, URL url, String mime, String language) throws CASException{
	
		InputStream originalStream=null;
		try {
			originalStream = new BufferedInputStream(
					url.openStream());
		} catch (IOException e1) {
			new CASException(e1);
		}
		
		// use custom parser or rely on autodetect
		Parser parser = config.getParser();
		  
		// it that does not work
        if (parser == null) {parser = new AutoDetectParser(config);}

	    Metadata md = new Metadata();
	    MarkupHandler handler  = new MarkupHandler();		  

	    try {
	    	parser.parse(originalStream,handler , md);
	    }
	    catch (Exception e){
	    	// if we have a problem just dump the message and continue
	    	// getLogger().log(Level.WARNING,"Problem converting file : "+URI+"\t"+e.getMessage());
	    	// cas.setDocumentText(""); return;
	    	throw new CASException(e);
	    }
	    finally {
			// set language if it was explicitly specified as a configuration
			// parameter
			if (language != null) {
				cas.setDocumentLanguage(language);
			}
			try {
				originalStream.close();
			} catch (IOException e) {
			}
	    }
	    
		// add text and markup to CAS
	    handler.populateCAS(cas);

	    JCas jcas  = cas.getJCas();
	    
	    SourceDocumentAnnotation docAnnotation = new SourceDocumentAnnotation(jcas);
	    
	    // now iterate on the metadata found by Tika and add them to the info
	    if (docAnnotation.getFeatures()==null){
	    	docAnnotation.setFeatures((FSArray) cas
					.createArrayFS(md.size()+1)) ;
	    }
	    int i=0;
	    for (;i<md.size();i++){
	    	String name = md.names()[i];
	    	String value = md.get(name);
	    	FeatureValue fv = new FeatureValue(cas.getJCas());
	    	fv.setName(name);
	    	fv.setValue(value);
	    	// getLogger().log(Level.FINER,URI+"\t"+name+"\t"+value);
	    	docAnnotation.setFeatures(i,fv);
	    }
	    
	    FeatureValue fv = new FeatureValue(jcas);
    	fv.setName("url");
    	fv.setValue(url.toString());
    	docAnnotation.setFeatures(i,fv);
	    
	    docAnnotation.addToIndexes();
	}