in community/mahout-mr/mr/src/main/java/org/apache/mahout/cf/taste/impl/model/file/FileDataModel.java [394:539]
protected void processLine(String line,
FastByIDMap<?> data,
FastByIDMap<FastByIDMap<Long>> timestamps,
boolean fromPriorData) {
// Ignore empty lines and comments
if (line.isEmpty() || line.charAt(0) == COMMENT_CHAR) {
return;
}
Iterator<String> tokens = delimiterPattern.split(line).iterator();
String userIDString = tokens.next();
String itemIDString = tokens.next();
String preferenceValueString = tokens.next();
boolean hasTimestamp = tokens.hasNext();
String timestampString = hasTimestamp ? tokens.next() : null;
long userID = readUserIDFromString(userIDString);
long itemID = readItemIDFromString(itemIDString);
if (transpose) {
long tmp = userID;
userID = itemID;
itemID = tmp;
}
// This is kind of gross but need to handle two types of storage
Object maybePrefs = data.get(userID);
if (fromPriorData) {
// Data are PreferenceArray
PreferenceArray prefs = (PreferenceArray) maybePrefs;
if (!hasTimestamp && preferenceValueString.isEmpty()) {
// Then line is of form "userID,itemID,", meaning remove
if (prefs != null) {
boolean exists = false;
int length = prefs.length();
for (int i = 0; i < length; i++) {
if (prefs.getItemID(i) == itemID) {
exists = true;
break;
}
}
if (exists) {
if (length == 1) {
data.remove(userID);
} else {
PreferenceArray newPrefs = new GenericUserPreferenceArray(length - 1);
for (int i = 0, j = 0; i < length; i++, j++) {
if (prefs.getItemID(i) == itemID) {
j--;
} else {
newPrefs.set(j, prefs.get(i));
}
}
((FastByIDMap<PreferenceArray>) data).put(userID, newPrefs);
}
}
}
removeTimestamp(userID, itemID, timestamps);
} else {
float preferenceValue = Float.parseFloat(preferenceValueString);
boolean exists = false;
if (prefs != null) {
for (int i = 0; i < prefs.length(); i++) {
if (prefs.getItemID(i) == itemID) {
exists = true;
prefs.setValue(i, preferenceValue);
break;
}
}
}
if (!exists) {
if (prefs == null) {
prefs = new GenericUserPreferenceArray(1);
} else {
PreferenceArray newPrefs = new GenericUserPreferenceArray(prefs.length() + 1);
for (int i = 0, j = 1; i < prefs.length(); i++, j++) {
newPrefs.set(j, prefs.get(i));
}
prefs = newPrefs;
}
prefs.setUserID(0, userID);
prefs.setItemID(0, itemID);
prefs.setValue(0, preferenceValue);
((FastByIDMap<PreferenceArray>) data).put(userID, prefs);
}
}
addTimestamp(userID, itemID, timestampString, timestamps);
} else {
// Data are Collection<Preference>
Collection<Preference> prefs = (Collection<Preference>) maybePrefs;
if (!hasTimestamp && preferenceValueString.isEmpty()) {
// Then line is of form "userID,itemID,", meaning remove
if (prefs != null) {
// remove pref
Iterator<Preference> prefsIterator = prefs.iterator();
while (prefsIterator.hasNext()) {
Preference pref = prefsIterator.next();
if (pref.getItemID() == itemID) {
prefsIterator.remove();
break;
}
}
}
removeTimestamp(userID, itemID, timestamps);
} else {
float preferenceValue = Float.parseFloat(preferenceValueString);
boolean exists = false;
if (prefs != null) {
for (Preference pref : prefs) {
if (pref.getItemID() == itemID) {
exists = true;
pref.setValue(preferenceValue);
break;
}
}
}
if (!exists) {
if (prefs == null) {
prefs = new ArrayList<>(2);
((FastByIDMap<Collection<Preference>>) data).put(userID, prefs);
}
prefs.add(new GenericPreference(userID, itemID, preferenceValue));
}
addTimestamp(userID, itemID, timestampString, timestamps);
}
}
}