static void sbridge_mce_output_error()

in sb_edac.c [2947:3121]


static void sbridge_mce_output_error(struct mem_ctl_info *mci,
				    const struct mce *m)
{
	struct mem_ctl_info *new_mci;
	struct sbridge_pvt *pvt = mci->pvt_info;
	enum hw_event_mc_err_type tp_event;
	char *optype, msg[256];
	bool ripv = GET_BITFIELD(m->mcgstatus, 0, 0);
	bool overflow = GET_BITFIELD(m->status, 62, 62);
	bool uncorrected_error = GET_BITFIELD(m->status, 61, 61);
	bool recoverable;
	u32 core_err_cnt = GET_BITFIELD(m->status, 38, 52);
	u32 mscod = GET_BITFIELD(m->status, 16, 31);
	u32 errcode = GET_BITFIELD(m->status, 0, 15);
	u32 channel = GET_BITFIELD(m->status, 0, 3);
	u32 optypenum = GET_BITFIELD(m->status, 4, 6);
	/*
	 * Bits 5-0 of MCi_MISC give the least significant bit that is valid.
	 * A value 6 is for cache line aligned address, a value 12 is for page
	 * aligned address reported by patrol scrubber.
	 */
	u32 lsb = GET_BITFIELD(m->misc, 0, 5);
	long channel_mask, first_channel;
	u8  rank = 0xff, socket, ha;
	int rc, dimm;
	char *area_type = "DRAM";

	if (pvt->info.type != SANDY_BRIDGE)
		recoverable = true;
	else
		recoverable = GET_BITFIELD(m->status, 56, 56);

	if (uncorrected_error) {
		core_err_cnt = 1;
		if (ripv) {
			tp_event = HW_EVENT_ERR_UNCORRECTED;
		} else {
			tp_event = HW_EVENT_ERR_FATAL;
		}
	} else {
		tp_event = HW_EVENT_ERR_CORRECTED;
	}

	/*
	 * According with Table 15-9 of the Intel Architecture spec vol 3A,
	 * memory errors should fit in this mask:
	 *	000f 0000 1mmm cccc (binary)
	 * where:
	 *	f = Correction Report Filtering Bit. If 1, subsequent errors
	 *	    won't be shown
	 *	mmm = error type
	 *	cccc = channel
	 * If the mask doesn't match, report an error to the parsing logic
	 */
	switch (optypenum) {
	case 0:
		optype = "generic undef request error";
		break;
	case 1:
		optype = "memory read error";
		break;
	case 2:
		optype = "memory write error";
		break;
	case 3:
		optype = "addr/cmd error";
		break;
	case 4:
		optype = "memory scrubbing error";
		break;
	default:
		optype = "reserved";
		break;
	}

	if (pvt->info.type == KNIGHTS_LANDING) {
		if (channel == 14) {
			edac_dbg(0, "%s%s err_code:%04x:%04x EDRAM bank %d\n",
				overflow ? " OVERFLOW" : "",
				(uncorrected_error && recoverable)
				? " recoverable" : "",
				mscod, errcode,
				m->bank);
		} else {
			char A = *("A");

			/*
			 * Reported channel is in range 0-2, so we can't map it
			 * back to mc. To figure out mc we check machine check
			 * bank register that reported this error.
			 * bank15 means mc0 and bank16 means mc1.
			 */
			channel = knl_channel_remap(m->bank == 16, channel);
			channel_mask = 1 << channel;

			snprintf(msg, sizeof(msg),
				"%s%s err_code:%04x:%04x channel:%d (DIMM_%c)",
				overflow ? " OVERFLOW" : "",
				(uncorrected_error && recoverable)
				? " recoverable" : " ",
				mscod, errcode, channel, A + channel);
			edac_mc_handle_error(tp_event, mci, core_err_cnt,
				m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
				channel, 0, -1,
				optype, msg);
		}
		return;
	} else if (lsb < 12) {
		rc = get_memory_error_data(mci, m->addr, &socket, &ha,
					   &channel_mask, &rank,
					   &area_type, msg);
	} else {
		rc = get_memory_error_data_from_mce(mci, m, &socket, &ha,
						    &channel_mask, msg);
	}

	if (rc < 0)
		goto err_parsing;
	new_mci = get_mci_for_node_id(socket, ha);
	if (!new_mci) {
		strcpy(msg, "Error: socket got corrupted!");
		goto err_parsing;
	}
	mci = new_mci;
	pvt = mci->pvt_info;

	first_channel = find_first_bit(&channel_mask, NUM_CHANNELS);

	if (rank == 0xff)
		dimm = -1;
	else if (rank < 4)
		dimm = 0;
	else if (rank < 8)
		dimm = 1;
	else
		dimm = 2;

	/*
	 * FIXME: On some memory configurations (mirror, lockstep), the
	 * Memory Controller can't point the error to a single DIMM. The
	 * EDAC core should be handling the channel mask, in order to point
	 * to the group of dimm's where the error may be happening.
	 */
	if (!pvt->is_lockstep && !pvt->is_cur_addr_mirrored && !pvt->is_close_pg)
		channel = first_channel;

	snprintf(msg, sizeof(msg),
		 "%s%s area:%s err_code:%04x:%04x socket:%d ha:%d channel_mask:%ld rank:%d",
		 overflow ? " OVERFLOW" : "",
		 (uncorrected_error && recoverable) ? " recoverable" : "",
		 area_type,
		 mscod, errcode,
		 socket, ha,
		 channel_mask,
		 rank);

	edac_dbg(0, "%s\n", msg);

	/* FIXME: need support for channel mask */

	if (channel == CHANNEL_UNSPECIFIED)
		channel = -1;

	/* Call the helper to output message */
	edac_mc_handle_error(tp_event, mci, core_err_cnt,
			     m->addr >> PAGE_SHIFT, m->addr & ~PAGE_MASK, 0,
			     channel, dimm, -1,
			     optype, msg);
	return;
err_parsing:
	edac_mc_handle_error(tp_event, mci, core_err_cnt, 0, 0, 0,
			     -1, -1, -1,
			     msg, "");

}