int mapred_verify_object()

in gpcontrib/gpmapreduce/src/parse.c [1846:2146]


int mapred_verify_object(mapred_parser_t *parser, mapred_object_t *obj)
{
	char *name;
	int error = NO_ERROR;

	XASSERT(obj);

	/* Verify that all required fields are present and valid */
	name = obj->name ? obj->name : "unnamed";
	switch (obj->kind)
	{
		case MAPRED_DOCUMENT:
			
			/*
			 * If there is a version on the document then it should have
			 * been validated by parser_set_version()
			 */
			if (!obj->u.document.version)
			{
				error = mapred_obj_error(obj, "Missing VERSION",
										 parser->doc_number);
			}

			break;

		case MAPRED_INPUT:

			/* Validate required fields */
			if (!obj->name)
				error = mapred_obj_error(obj, "Missing NAME");
			if (obj->u.input.type == MAPRED_INPUT_NONE)
				error = mapred_obj_error(obj,
						  "Missing FILE, GPFDIST, TABLE, QUERY, or EXEC");

			/* set default values */
			if (error == NO_ERROR)
			{
				if (!obj->u.input.columns)
				{
					obj->u.input.columns = malloc(sizeof(mapred_plist_t));
					obj->u.input.columns->name = copyscalar("value");
					obj->u.input.columns->type = copyscalar("text");
					obj->u.input.columns->next = NULL;
				}
				if (!obj->u.input.columns->next &&
					!obj->u.input.delimiter)
				{
					obj->u.input.delimiter = copyscalar("off");
				}
			}
			break;

		case MAPRED_OUTPUT:

			if (!obj->name)
				error = mapred_obj_error(obj, "Missing NAME");
			if (obj->u.output.type == MAPRED_OUTPUT_NONE)
				error = mapred_obj_error(obj, "Missing FILE or TABLE");
			break;

		case MAPRED_MAPPER:
		case MAPRED_TRANSITION:
		case MAPRED_COMBINER:
		case MAPRED_FINALIZER:

			if (!obj->name)
				error = mapred_obj_error(obj, "Missing NAME");

			/*
			 * We now support "builtin" functions, which are specified by a lack
			 * of an implementation language.  If a language is specified then
			 * a function body is still required.  If a language is not specified
			 * then the function body just defaults to the name of the function.
			 */
			if (obj->name && !obj->u.function.language && !obj->u.function.body)
				obj->u.function.body = copyscalar(obj->name);

			if (obj->u.function.language && !obj->u.function.body)
				error = mapred_obj_error(obj, "Missing FUNCTION");

			/*
			 * LIBRARY is required for "C" language functions.
			 * LIBRARY is invalid for any other language.
			 *
			 * It would be good to verify that LIBRARY is not used in
			 * older YAML formats, but that is difficult given the current
			 * structure of the code.
			 */
			if (obj->u.function.language)
			{
				if (obj->u.function.library)
				{
					if (strcasecmp("C", obj->u.function.language))
					{
						error = mapred_obj_error(obj, "LIBRARY is invalid for "
												 "%s LANGUAGE functions",
												 obj->u.function.language);
					}
				}
				else if (!strcasecmp("C", obj->u.function.language))
				{
					error = mapred_obj_error(obj, "Missing LIBRARY");
				}

				/*
				 * Don't bother filling in default arguments if we already have
				 * an error.
				 */
				if (error)
					break;

				/*
				 * Set default values.
				 *   For builtin functions we delay this so that we can lookup the
				 *   function in the catalog to determine the defaults.
				 */
				if (!obj->u.function.parameters)
				{
					const char *name = default_parameter_names[obj->kind][0];
					name = default_parameter_names[obj->kind][0];
					obj->u.function.parameters = malloc(sizeof(mapred_plist_t));
					obj->u.function.parameters->type = copyscalar("text");
					obj->u.function.parameters->name = copyscalar(name);
					obj->u.function.parameters->next = NULL;

					name = default_parameter_names[obj->kind][1];
					if (name)
					{
						obj->u.function.parameters->next = malloc(sizeof(mapred_plist_t));
						obj->u.function.parameters->next->type = copyscalar("text");
						obj->u.function.parameters->next->name = copyscalar(name);
						obj->u.function.parameters->next->next = NULL;						
					}
				}
				else
				{
					switch (obj->kind)
					{
						case MAPRED_TRANSITION:
							if (!obj->u.function.parameters->next)
							{
								error = mapred_obj_error(
									obj,
									"requires at least 2 input parameters [state, arg1, ...]"
									);
							}
							break;

						case MAPRED_COMBINER:
							if (!obj->u.function.parameters->next ||
								obj->u.function.parameters->next->next)
							{
								error = mapred_obj_error(
									obj,
									"requires exactly 2 input parameters [state1, state2]"
									);
							}
							break;

						case MAPRED_FINALIZER:
							if (obj->u.function.parameters->next)
							{
								error = mapred_obj_error(
									obj,
									"requires exactly 1 input parameter [state]"
									);
							}
							break;

						case MAPRED_MAPPER:
						default:
							break;
					}
				}

				if (!obj->u.function.returns)
				{
					const char *name = default_return_names[obj->kind][0];
					obj->u.function.returns = malloc(sizeof(mapred_plist_t));
					obj->u.function.returns->type = copyscalar("text");
					obj->u.function.returns->name = copyscalar(name);
					obj->u.function.returns->next = NULL;

					name = default_return_names[obj->kind][1];
					if (name)
					{
						obj->u.function.returns->next = malloc(sizeof(mapred_plist_t));
						obj->u.function.returns->next->type = copyscalar("text");
						obj->u.function.returns->next->name = copyscalar(name);
						obj->u.function.returns->next->next = NULL;						
					}
				}
				else if (obj->kind == MAPRED_TRANSITION ||
						 obj->kind == MAPRED_COMBINER)
				{
					if (obj->u.function.returns->next)
					{
						error = mapred_obj_error(
							obj,
							"requires exactly one output parameter [state]"
							);
					}
				}

				/* Set default mode: depends on type of function */
				if (obj->u.function.mode == MAPRED_MODE_NONE)
				{
					if (obj->kind == MAPRED_TRANSITION ||
						obj->kind == MAPRED_COMBINER)
					{
						obj->u.function.mode = MAPRED_MODE_SINGLE;
					}
					else
					{
						obj->u.function.mode = MAPRED_MODE_MULTI;
					}
				}
			}
			break;

		case MAPRED_REDUCER:

			if (!obj->name)
				error = mapred_obj_error(obj, "Missing NAME");
			if (!obj->u.reducer.transition.name)
				error = mapred_obj_error(obj, "Missing TRANSITION");
			/*
			 * Will verify that functions are valid for reducer input after we
			 * have resolved the pointers.
			 */

			/*
			 * It would be good to verify that ORDERING is not used in
			 * older YAML formats, but that is difficult given the current
			 * structure of the code.
			 */

			/*
			 * ORDERING and COMBINER are incompatible
			 */
			if (obj->u.reducer.ordering != NULL &&
				obj->u.reducer.combiner.name)
			{
				error = mapred_obj_error(obj,
										 "REDUCERS cannot specify both a COMBINER "
										 "function and an ORDERING specification");
			}

			/* Setup default "keys" */
			if (!obj->u.reducer.keys)
			{
				obj->u.reducer.keys = malloc(sizeof(mapred_clist_t));
				obj->u.reducer.keys->value = copyscalar("key");
				obj->u.reducer.keys->next = malloc(sizeof(mapred_clist_t));				
				obj->u.reducer.keys->next->next = NULL;
				obj->u.reducer.keys->next->value = copyscalar("*");
			}

			break;

		case MAPRED_TASK:
			if (!obj->name)
				error = mapred_obj_error(obj, "Missing NAME");

			/* Fallthrough */

		case MAPRED_EXECUTION:
			
			if (!obj->u.task.input.name)
				error = mapred_obj_error(obj, "Missing SOURCE");
			
			/* IDENTITY Mappers and Reducers */
			if (obj->u.task.mapper.name &&
				!strcasecmp("IDENTITY", obj->u.task.mapper.name))
			{
				free(obj->u.task.mapper.name);
				obj->u.task.mapper.name = NULL;
			}
			if (obj->u.task.reducer.name &&
				!strcasecmp("IDENTITY", obj->u.task.reducer.name))
			{
				free(obj->u.task.reducer.name);
				obj->u.task.reducer.name = NULL;
			}

			/* STDOUT Output */
			if (obj->u.task.output.name &&
				!strcasecmp("STDOUT", obj->u.task.output.name))
			{
				free(obj->u.task.output.name);
				obj->u.task.output.name = NULL;
			}
			break;

		case MAPRED_NO_KIND:
		default:
			XASSERT(false);
	}

	return error;
}