in edac_mc.c [1014:1161]
void edac_mc_handle_error(const enum hw_event_mc_err_type type,
struct mem_ctl_info *mci,
const u16 error_count,
const unsigned long page_frame_number,
const unsigned long offset_in_page,
const unsigned long syndrome,
const int top_layer,
const int mid_layer,
const int low_layer,
const char *msg,
const char *other_detail)
{
struct dimm_info *dimm;
char *p, *end;
int row = -1, chan = -1;
int pos[EDAC_MAX_LAYERS] = { top_layer, mid_layer, low_layer };
int i, n_labels = 0;
struct edac_raw_error_desc *e = &mci->error_desc;
bool any_memory = true;
const char *prefix;
edac_dbg(3, "MC%d\n", mci->mc_idx);
/* Fills the error report buffer */
memset(e, 0, sizeof (*e));
e->error_count = error_count;
e->type = type;
e->top_layer = top_layer;
e->mid_layer = mid_layer;
e->low_layer = low_layer;
e->page_frame_number = page_frame_number;
e->offset_in_page = offset_in_page;
e->syndrome = syndrome;
/* need valid strings here for both: */
e->msg = msg ?: "";
e->other_detail = other_detail ?: "";
/*
* Check if the event report is consistent and if the memory location is
* known. If it is, the DIMM(s) label info will be filled and the DIMM's
* error counters will be incremented.
*/
for (i = 0; i < mci->n_layers; i++) {
if (pos[i] >= (int)mci->layers[i].size) {
edac_mc_printk(mci, KERN_ERR,
"INTERNAL ERROR: %s value is out of range (%d >= %d)\n",
edac_layer_name[mci->layers[i].type],
pos[i], mci->layers[i].size);
/*
* Instead of just returning it, let's use what's
* known about the error. The increment routines and
* the DIMM filter logic will do the right thing by
* pointing the likely damaged DIMMs.
*/
pos[i] = -1;
}
if (pos[i] >= 0)
any_memory = false;
}
/*
* Get the dimm label/grain that applies to the match criteria.
* As the error algorithm may not be able to point to just one memory
* stick, the logic here will get all possible labels that could
* pottentially be affected by the error.
* On FB-DIMM memory controllers, for uncorrected errors, it is common
* to have only the MC channel and the MC dimm (also called "branch")
* but the channel is not known, as the memory is arranged in pairs,
* where each memory belongs to a separate channel within the same
* branch.
*/
p = e->label;
*p = '\0';
end = p + sizeof(e->label);
prefix = "";
mci_for_each_dimm(mci, dimm) {
if (top_layer >= 0 && top_layer != dimm->location[0])
continue;
if (mid_layer >= 0 && mid_layer != dimm->location[1])
continue;
if (low_layer >= 0 && low_layer != dimm->location[2])
continue;
/* get the max grain, over the error match range */
if (dimm->grain > e->grain)
e->grain = dimm->grain;
/*
* If the error is memory-controller wide, there's no need to
* seek for the affected DIMMs because the whole channel/memory
* controller/... may be affected. Also, don't show errors for
* empty DIMM slots.
*/
if (!dimm->nr_pages)
continue;
n_labels++;
if (n_labels > EDAC_MAX_LABELS) {
p = e->label;
*p = '\0';
} else {
p += scnprintf(p, end - p, "%s%s", prefix, dimm->label);
prefix = OTHER_LABEL;
}
/*
* get csrow/channel of the DIMM, in order to allow
* incrementing the compat API counters
*/
edac_dbg(4, "%s csrows map: (%d,%d)\n",
mci->csbased ? "rank" : "dimm",
dimm->csrow, dimm->cschannel);
if (row == -1)
row = dimm->csrow;
else if (row >= 0 && row != dimm->csrow)
row = -2;
if (chan == -1)
chan = dimm->cschannel;
else if (chan >= 0 && chan != dimm->cschannel)
chan = -2;
}
if (any_memory)
strscpy(e->label, "any memory", sizeof(e->label));
else if (!*e->label)
strscpy(e->label, "unknown memory", sizeof(e->label));
edac_inc_csrow(e, row, chan);
/* Fill the RAM location data */
p = e->location;
end = p + sizeof(e->location);
prefix = "";
for (i = 0; i < mci->n_layers; i++) {
if (pos[i] < 0)
continue;
p += scnprintf(p, end - p, "%s%s:%d", prefix,
edac_layer_name[mci->layers[i].type], pos[i]);
prefix = " ";
}
edac_raw_mc_handle_error(e);
}