uint32_t cpuinfo_x86_normalize_brand_string()

in src/x86/name.c [528:667]


uint32_t cpuinfo_x86_normalize_brand_string(
	const char raw_name[48],
	char normalized_name[48])
{
	normalized_name[0] = '\0';
	char name[48];
	memcpy(name, raw_name, sizeof(name));

	/*
	 * First find the end of the string
	 * Start search from the end because some brand strings contain zeroes in the middle
	 */
	char* name_end = &name[48];
	while (name_end[-1] == '\0') {
		/*
		 * Adject name_end by 1 position and check that we didn't reach the start of the brand string.
		 * This is possible if all characters are zero.
		 */
		if (--name_end == name) {
			/* All characters are zeros */
			return 0;
		}
	}

	struct parser_state parser_state = { 0 };

	/* Now unify all whitespace characters: replace tabs and '\0' with spaces */
	{
		bool inside_parentheses = false;
		for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
			switch (*char_ptr) {
				case '(':
					inside_parentheses = true;
					*char_ptr = ' ';
					break;
				case ')':
					inside_parentheses = false;
					*char_ptr = ' ';
					break;
				case '@':
					parser_state.frequency_separator = char_ptr;
				case '\0':
				case '\t':
					*char_ptr = ' ';
					break;
				default:
					if (inside_parentheses) {
						*char_ptr = ' ';
					}
			}
		}
	}

	/* Iterate through all tokens and erase redundant parts */
	{
		bool is_token = false;
		char* token_start;
		for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
			if (*char_ptr == ' ') {
				if (is_token) {
					is_token = false;
					if (!transform_token(token_start, char_ptr, &parser_state)) {
						name_end = char_ptr;
						break;
					}
				}
			} else {
				if (!is_token) {
					is_token = true;
					token_start = char_ptr;
				}
			}
		}
		if (is_token) {
			transform_token(token_start, name_end, &parser_state);
		}
	}

	/* If this is an engineering sample, return empty string */
	if (parser_state.engineering_sample) {
		return 0;
	}

	/* Check if there is some string before the frequency separator. */
	if (parser_state.frequency_separator != NULL) {
		if (is_space(name, parser_state.frequency_separator)) {
			/* If only frequency is available, return empty string */
			return 0;
		}
	}

	/* Compact tokens: collapse multiple spacing into one */
	{
		char* output_ptr = normalized_name;
		char* token_start;
		bool is_token = false;
		bool previous_token_ends_with_dash = true;
		bool current_token_starts_with_dash = false;
		uint32_t token_count = 1;
		for (char* char_ptr = name; char_ptr != name_end; char_ptr++) {
			const char character = *char_ptr;
			if (character == ' ') {
				if (is_token) {
					is_token = false;
					if (!current_token_starts_with_dash && !previous_token_ends_with_dash) {
						token_count += 1;
						*output_ptr++ = ' ';
					}
					output_ptr = move_token(token_start, char_ptr, output_ptr);
					/* Note: char_ptr[-1] exists because there is a token before this space */
					previous_token_ends_with_dash = (char_ptr[-1] == '-');
				}
			} else {
				if (!is_token) {
					is_token = true;
					token_start = char_ptr;
					current_token_starts_with_dash = (character == '-');
				}
			}
		}
		if (is_token) {
			if (!current_token_starts_with_dash && !previous_token_ends_with_dash) {
				token_count += 1;
				*output_ptr++ = ' ';
			}
			output_ptr = move_token(token_start, name_end, output_ptr);
		}
		if (parser_state.frequency_token && token_count <= 1) {
			/* The only remaining part is frequency */
			normalized_name[0] = '\0';
			return 0;
		}
		if (output_ptr < &normalized_name[48]) {
			*output_ptr = '\0';
		} else {
			normalized_name[47] = '\0';
		}
		return (uint32_t) (output_ptr - normalized_name);
	}
}