geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/CountryProximityScorer.java [73:148]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  public List<LinkedSpan> score(List<LinkedSpan> linkedData, Map<String, Set<Integer>> countryHits, Map<String, Set<String>> nameCodesMap, String docText, Span[] sentences, Integer maxAllowedDist) {
    this.nameCodesMap = nameCodesMap;
    setDominantCode(countryHits);
    for (LinkedSpan<BaseLink> linkedspan : linkedData) {

      linkedspan = simpleProximityAnalysis(sentences, countryHits, linkedspan, maxAllowedDist);
    }
    return linkedData;
  }

  /**
   * sets class level variable to a code based on the number of mentions
   *
   * @param countryHits
   */
  private void setDominantCode(Map<String, Set<Integer>> countryHits) {
    int hits = -1;
    for (String code : countryHits.keySet()) {
      if (countryHits.get(code).size() > hits) {
        hits = countryHits.get(code).size();
        dominantCode = code;
      }
    }
  }

  /**
   * Generates distances from each country mention to the span's location in the
   * doc text. Ultimately an attempt to ensure that ambiguously named toponyms
   * are resolved to the correct country and coordinate.
   *
   * @param sentences
   * @param countryHits
   * @param span
   * @return
   */
  private LinkedSpan<BaseLink> simpleProximityAnalysis(Span[] sentences, Map<String, Set<Integer>> countryHits, LinkedSpan<BaseLink> span, Integer maxAllowedDistance) {
    Double score = 0.0;
    /*
     * get the index of the actual span, beginning of sentence //should generate
     * tokens from sentence and create a char offset... //could have large
     * sentences due to poor sentence detection or wonky doc text
     */
    int sentenceIdx = span.getSentenceid();
    int sentIndexInDoc = sentences[sentenceIdx].getStart();
    /*
     * create a map of all the span's proximal country mentions in the document
     * Map< countrycode, set of <distances from this NamedEntity>>
     */
    Map<String, Set<Integer>> distancesFromCodeMap = new HashMap<>();
    //map = Map<countrycode, Set <of distances this span is from all the mentions of the code>>
    for (String cCode : countryHits.keySet()) {
      // iterate over all the regex start values and calculate an offset
      for (Integer cHit : countryHits.get(cCode)) {
        Integer absDist = Math.abs(sentIndexInDoc - cHit);
        //only include near mentions based on a heuristic
        //TODO make this a property
        //  if (absDist < maxAllowedDistance) {
        if (distancesFromCodeMap.containsKey(cCode)) {
          distancesFromCodeMap.get(cCode).add(absDist);
        } else {
          HashSet<Integer> newset = new HashSet<>();
          newset.add(absDist);
          distancesFromCodeMap.put(cCode, newset);
        }
      }
    }
    //we now know how far this named entity is from every country mention in the document

    /*
     * the gaz matches that have a country code that have mentions in the doc
     * that are closest to the Named Entity should return the best score.
     * Analyzemap generates a likelihood score that the toponym from the gaz is
     * referring to one of the countries, i.e, Map<countrycode, prob that this
     * span is referring to the toponym form this code key>
     */
    Map<String, Double> scoreMap = analyzeMap(distancesFromCodeMap, sentences, span);
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



geoentitylinker-addon/src/main/java/opennlp/addons/geoentitylinker/scoring/ProvinceProximityScorer.java [84:161]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
  public List<LinkedSpan> score(List<LinkedSpan> linkedData, Map<String, Set<Integer>> countryHits, Map<String, Set<String>> nameCodesMap, String docText, Span[] sentences, Integer maxAllowedDist) {
    this.nameCodesMap = nameCodesMap;
    setDominantCode(countryHits);
    for (LinkedSpan<BaseLink> linkedspan : linkedData) {

      linkedspan = simpleProximityAnalysis(sentences, countryHits, linkedspan, maxAllowedDist);
    }
    return linkedData;
  }

  /**
   * sets class level variable to a code based on the number of mentions
   *
   * @param countryHits
   */
  private void setDominantCode(Map<String, Set<Integer>> countryHits) {
    int hits = -1;
    for (String code : countryHits.keySet()) {
      if (countryHits.get(code).size() > hits) {
        hits = countryHits.get(code).size();
        dominantCode = code;
      }
    }
  }

  /**
   * Generates distances from each country mention to the span's location in the
   * doc text. Ultimately an attempt to ensure that ambiguously named toponyms
   * are resolved to the correct country and coordinate.
   *
   * @param sentences
   * @param countryHits
   * @param span
   * @return
   */
  private LinkedSpan<BaseLink> simpleProximityAnalysis(Span[] sentences, Map<String, Set<Integer>> countryHits, LinkedSpan<BaseLink> span, Integer maxAllowedDistance) {
    Double score = 0.0;
    /*
     * get the index of the actual span, begining of sentence //should generate
     * tokens from sentence and create a char offset... //could have large
     * sentences due to poor sentence detection or wonky doc text
     */
    int sentenceIdx = span.getSentenceid();
    int sentIndexInDoc = sentences[sentenceIdx].getStart();
    /**
     * create a map of all the span's proximal country mentions in the document
     * Map< countrycode, set of <distances from this NamedEntity>>
     */
    Map<String, Set<Integer>> distancesFromCodeMap = new HashMap<>();
    //map = Map<countrycode, Set <of distances this span is from all the mentions of the code>>
    for (String cCode : countryHits.keySet()) {
//iterate over all the regex start values and calculate an offset
      for (Integer cHit : countryHits.get(cCode)) {
        Integer absDist = Math.abs(sentIndexInDoc - cHit);
        //only include near mentions based on a heuristic
        //TODO make this a property
        //  if (absDist < maxAllowedDistance) {
        if (distancesFromCodeMap.containsKey(cCode)) {
          distancesFromCodeMap.get(cCode).add(absDist);
        } else {
          HashSet<Integer> newset = new HashSet<>();
          newset.add(absDist);
          distancesFromCodeMap.put(cCode, newset);
        }
      }

      //}
    }
    //we now know how far this named entity is from every country mention in the document

    /**
     * the gaz matches that have a country code that have mentions in the doc
     * that are closest to the Named Entity should return the best score.
     * Analyzemap generates a likelihood score that the toponym from the gaz is
     * referring to one of the countries, i.e, Map<countrycode, prob that this
     * span is referring to the toponym form this code key>
     */
    Map<String, Double> scoreMap = analyzeMap(distancesFromCodeMap, sentences, span);
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



