protected void processLine()

in community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java [394:539]


  protected void processLine(String line,
                             FastByIDMap<?> data, 
                             FastByIDMap<FastByIDMap<Long>> timestamps,
                             boolean fromPriorData) {

    // Ignore empty lines and comments
    if (line.isEmpty() || line.charAt(0) == COMMENT_CHAR) {
      return;
    }

    Iterator<String> tokens = delimiterPattern.split(line).iterator();
    String userIDString = tokens.next();
    String itemIDString = tokens.next();
    String preferenceValueString = tokens.next();
    boolean hasTimestamp = tokens.hasNext();
    String timestampString = hasTimestamp ? tokens.next() : null;

    long userID = readUserIDFromString(userIDString);
    long itemID = readItemIDFromString(itemIDString);

    if (transpose) {
      long tmp = userID;
      userID = itemID;
      itemID = tmp;
    }

    // This is kind of gross but need to handle two types of storage
    Object maybePrefs = data.get(userID);
    if (fromPriorData) {
      // Data are PreferenceArray

      PreferenceArray prefs = (PreferenceArray) maybePrefs;
      if (!hasTimestamp && preferenceValueString.isEmpty()) {
        // Then line is of form "userID,itemID,", meaning remove
        if (prefs != null) {
          boolean exists = false;
          int length = prefs.length();
          for (int i = 0; i < length; i++) {
            if (prefs.getItemID(i) == itemID) {
              exists = true;
              break;
            }
          }
          if (exists) {
            if (length == 1) {
              data.remove(userID);
            } else {
              PreferenceArray newPrefs = new GenericUserPreferenceArray(length - 1);
              for (int i = 0, j = 0; i < length; i++, j++) {
                if (prefs.getItemID(i) == itemID) {
                  j--;
                } else {
                  newPrefs.set(j, prefs.get(i));
                }
              }
              ((FastByIDMap<PreferenceArray>) data).put(userID, newPrefs);
            }
          }
        }

        removeTimestamp(userID, itemID, timestamps);

      } else {

        float preferenceValue = Float.parseFloat(preferenceValueString);

        boolean exists = false;
        if (prefs != null) {
          for (int i = 0; i < prefs.length(); i++) {
            if (prefs.getItemID(i) == itemID) {
              exists = true;
              prefs.setValue(i, preferenceValue);
              break;
            }
          }
        }

        if (!exists) {
          if (prefs == null) {
            prefs = new GenericUserPreferenceArray(1);
          } else {
            PreferenceArray newPrefs = new GenericUserPreferenceArray(prefs.length() + 1);
            for (int i = 0, j = 1; i < prefs.length(); i++, j++) {
              newPrefs.set(j, prefs.get(i));
            }
            prefs = newPrefs;
          }
          prefs.setUserID(0, userID);
          prefs.setItemID(0, itemID);
          prefs.setValue(0, preferenceValue);
          ((FastByIDMap<PreferenceArray>) data).put(userID, prefs);          
        }
      }

      addTimestamp(userID, itemID, timestampString, timestamps);

    } else {
      // Data are Collection<Preference>

      Collection<Preference> prefs = (Collection<Preference>) maybePrefs;

      if (!hasTimestamp && preferenceValueString.isEmpty()) {
        // Then line is of form "userID,itemID,", meaning remove
        if (prefs != null) {
          // remove pref
          Iterator<Preference> prefsIterator = prefs.iterator();
          while (prefsIterator.hasNext()) {
            Preference pref = prefsIterator.next();
            if (pref.getItemID() == itemID) {
              prefsIterator.remove();
              break;
            }
          }
        }

        removeTimestamp(userID, itemID, timestamps);
        
      } else {

        float preferenceValue = Float.parseFloat(preferenceValueString);

        boolean exists = false;
        if (prefs != null) {
          for (Preference pref : prefs) {
            if (pref.getItemID() == itemID) {
              exists = true;
              pref.setValue(preferenceValue);
              break;
            }
          }
        }

        if (!exists) {
          if (prefs == null) {
            prefs = new ArrayList<>(2);
            ((FastByIDMap<Collection<Preference>>) data).put(userID, prefs);
          }
          prefs.add(new GenericPreference(userID, itemID, preferenceValue));
        }

        addTimestamp(userID, itemID, timestampString, timestamps);

      }

    }
  }