in src/backend/commands/dbcommands.c [112:828]
static bool get_db_info(const char *name, LOCKMODE lockmode,
Oid *dbIdP, Oid *ownerIdP,
int *encodingP, bool *dbIsTemplateP, bool *dbAllowConnP,
Oid *dbLastSysOidP, TransactionId *dbFrozenXidP,
MultiXactId *dbMinMultiP,
Oid *dbTablespace, char **dbCollate, char **dbCtype);
static bool have_createdb_privilege(void);
static void remove_dbtablespaces(Oid db_id);
static bool check_db_file_conflict(Oid db_id);
static int errdetail_busy_db(int notherbackends, int npreparedxacts);
/*
* CREATE DATABASE
*/
Oid
createdb(ParseState *pstate, const CreatedbStmt *stmt)
{
TableScanDesc scan;
Relation rel;
Oid src_dboid = InvalidOid;
Oid src_owner;
int src_encoding = -1;
char *src_collate = NULL;
char *src_ctype = NULL;
bool src_istemplate;
bool src_allowconn;
Oid src_lastsysoid = InvalidOid;
TransactionId src_frozenxid = InvalidTransactionId;
MultiXactId src_minmxid = InvalidMultiXactId;
Oid src_deftablespace = InvalidOid;
volatile Oid dst_deftablespace;
Relation pg_database_rel;
HeapTuple tuple;
Datum new_record[Natts_pg_database];
bool new_record_nulls[Natts_pg_database];
Oid dboid;
Oid datdba;
ListCell *option;
DefElem *dtablespacename = NULL;
DefElem *downer = NULL;
DefElem *dtemplate = NULL;
DefElem *dencoding = NULL;
DefElem *dlocale = NULL;
DefElem *dcollate = NULL;
DefElem *dctype = NULL;
DefElem *distemplate = NULL;
DefElem *dallowconnections = NULL;
DefElem *dconnlimit = NULL;
char *dbname = stmt->dbname;
char *dbowner = NULL;
const char *dbtemplate = NULL;
char *dbcollate = NULL;
char *dbctype = NULL;
char *canonname;
int encoding = -1;
bool dbistemplate = false;
bool dballowconnections = true;
int dbconnlimit = -1;
int notherbackends;
int npreparedxacts;
createdb_failure_params fparms;
bool shouldDispatch = (Gp_role == GP_ROLE_DISPATCH);
/* Extract options from the statement node tree */
foreach(option, stmt->options)
{
DefElem *defel = (DefElem *) lfirst(option);
if (strcmp(defel->defname, "tablespace") == 0)
{
if (dtablespacename)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
parser_errposition(pstate, defel->location)));
dtablespacename = defel;
}
else if (strcmp(defel->defname, "owner") == 0)
{
if (downer)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
parser_errposition(pstate, defel->location)));
downer = defel;
}
else if (strcmp(defel->defname, "template") == 0)
{
if (dtemplate)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
parser_errposition(pstate, defel->location)));
dtemplate = defel;
}
else if (strcmp(defel->defname, "encoding") == 0)
{
if (dencoding)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
parser_errposition(pstate, defel->location)));
dencoding = defel;
}
else if (strcmp(defel->defname, "locale") == 0)
{
if (dlocale)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
parser_errposition(pstate, defel->location)));
dlocale = defel;
}
else if (strcmp(defel->defname, "lc_collate") == 0)
{
if (dcollate)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
parser_errposition(pstate, defel->location)));
dcollate = defel;
}
else if (strcmp(defel->defname, "lc_ctype") == 0)
{
if (dctype)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
parser_errposition(pstate, defel->location)));
dctype = defel;
}
else if (strcmp(defel->defname, "is_template") == 0)
{
if (distemplate)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
parser_errposition(pstate, defel->location)));
distemplate = defel;
}
else if (strcmp(defel->defname, "allow_connections") == 0)
{
if (dallowconnections)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
parser_errposition(pstate, defel->location)));
dallowconnections = defel;
}
else if (strcmp(defel->defname, "connection_limit") == 0)
{
if (dconnlimit)
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
parser_errposition(pstate, defel->location)));
dconnlimit = defel;
}
else if (strcmp(defel->defname, "location") == 0)
{
ereport(WARNING,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("LOCATION is not supported anymore"),
errhint("Consider using tablespaces instead."),
parser_errposition(pstate, defel->location)));
}
else
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("option \"%s\" not recognized", defel->defname),
parser_errposition(pstate, defel->location)));
}
if (dlocale && (dcollate || dctype))
ereport(ERROR,
(errcode(ERRCODE_SYNTAX_ERROR),
errmsg("conflicting or redundant options"),
errdetail("LOCALE cannot be specified together with LC_COLLATE or LC_CTYPE.")));
if (downer && downer->arg)
dbowner = defGetString(downer);
if (dtemplate && dtemplate->arg)
dbtemplate = defGetString(dtemplate);
if (dencoding && dencoding->arg)
{
const char *encoding_name;
if (IsA(dencoding->arg, Integer))
{
encoding = defGetInt32(dencoding);
encoding_name = pg_encoding_to_char(encoding);
if (strcmp(encoding_name, "") == 0 ||
pg_valid_server_encoding(encoding_name) < 0)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("%d is not a valid encoding code",
encoding),
parser_errposition(pstate, dencoding->location)));
}
else
{
encoding_name = defGetString(dencoding);
encoding = pg_valid_server_encoding(encoding_name);
if (encoding < 0)
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_OBJECT),
errmsg("%s is not a valid encoding name",
encoding_name),
parser_errposition(pstate, dencoding->location)));
}
if (encoding == PG_SQL_ASCII && shouldDispatch)
{
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("server encoding 'SQL_ASCII' is not supported")));
}
}
if (dlocale && dlocale->arg)
{
dbcollate = defGetString(dlocale);
dbctype = defGetString(dlocale);
}
if (dcollate && dcollate->arg)
dbcollate = defGetString(dcollate);
if (dctype && dctype->arg)
dbctype = defGetString(dctype);
if (distemplate && distemplate->arg)
dbistemplate = defGetBoolean(distemplate);
if (dallowconnections && dallowconnections->arg)
dballowconnections = defGetBoolean(dallowconnections);
if (dconnlimit && dconnlimit->arg)
{
dbconnlimit = defGetInt32(dconnlimit);
if (dbconnlimit < -1)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("invalid connection limit: %d", dbconnlimit)));
}
/* obtain OID of proposed owner */
if (dbowner)
datdba = get_role_oid(dbowner, false);
else
datdba = GetUserId();
/*
* To create a database, must have createdb privilege and must be able to
* become the target role (this does not imply that the target role itself
* must have createdb privilege). The latter provision guards against
* "giveaway" attacks. Note that a superuser will always have both of
* these privileges a fortiori.
*/
if (!have_createdb_privilege())
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("permission denied to create database")));
check_is_member_of_role(GetUserId(), datdba);
/*
* Lookup database (template) to be cloned, and obtain share lock on it.
* ShareLock allows two CREATE DATABASEs to work from the same template
* concurrently, while ensuring no one is busy dropping it in parallel
* (which would be Very Bad since we'd likely get an incomplete copy
* without knowing it). This also prevents any new connections from being
* made to the source until we finish copying it, so we can be sure it
* won't change underneath us.
*/
if (!dbtemplate)
dbtemplate = "template1"; /* Default template database name */
if (!get_db_info(dbtemplate, ShareLock,
&src_dboid, &src_owner, &src_encoding,
&src_istemplate, &src_allowconn, &src_lastsysoid,
&src_frozenxid, &src_minmxid, &src_deftablespace,
&src_collate, &src_ctype))
ereport(ERROR,
(errcode(ERRCODE_UNDEFINED_DATABASE),
errmsg("template database \"%s\" does not exist",
dbtemplate)));
/*
* Permission check: to copy a DB that's not marked datistemplate, you
* must be superuser or the owner thereof.
*/
if (!src_istemplate)
{
if (!pg_database_ownercheck(src_dboid, GetUserId()))
ereport(ERROR,
(errcode(ERRCODE_INSUFFICIENT_PRIVILEGE),
errmsg("permission denied to copy database \"%s\"",
dbtemplate)));
}
/* If encoding or locales are defaulted, use source's setting */
if (encoding < 0)
encoding = src_encoding;
if (dbcollate == NULL)
dbcollate = src_collate;
if (dbctype == NULL)
dbctype = src_ctype;
/* Some encodings are client only */
if (!PG_VALID_BE_ENCODING(encoding))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("invalid server encoding %d", encoding)));
/* Check that the chosen locales are valid, and get canonical spellings */
if (!check_locale(LC_COLLATE, dbcollate, &canonname))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("invalid locale name: \"%s\"", dbcollate)));
dbcollate = canonname;
if (!check_locale(LC_CTYPE, dbctype, &canonname))
ereport(ERROR,
(errcode(ERRCODE_WRONG_OBJECT_TYPE),
errmsg("invalid locale name: \"%s\"", dbctype)));
dbctype = canonname;
check_encoding_locale_matches(encoding, dbcollate, dbctype);
/*
* Check that the new encoding and locale settings match the source
* database. We insist on this because we simply copy the source data ---
* any non-ASCII data would be wrongly encoded, and any indexes sorted
* according to the source locale would be wrong.
*
* However, we assume that template0 doesn't contain any non-ASCII data
* nor any indexes that depend on collation or ctype, so template0 can be
* used as template for creating a database with any encoding or locale.
*/
if (strcmp(dbtemplate, "template0") != 0)
{
if (encoding != src_encoding)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("new encoding (%s) is incompatible with the encoding of the template database (%s)",
pg_encoding_to_char(encoding),
pg_encoding_to_char(src_encoding)),
errhint("Use the same encoding as in the template database, or use template0 as template.")));
if (strcmp(dbcollate, src_collate) != 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("new collation (%s) is incompatible with the collation of the template database (%s)",
dbcollate, src_collate),
errhint("Use the same collation as in the template database, or use template0 as template.")));
if (strcmp(dbctype, src_ctype) != 0)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("new LC_CTYPE (%s) is incompatible with the LC_CTYPE of the template database (%s)",
dbctype, src_ctype),
errhint("Use the same LC_CTYPE as in the template database, or use template0 as template.")));
}
/* Resolve default tablespace for new database */
if (dtablespacename && dtablespacename->arg)
{
char *tablespacename;
AclResult aclresult;
tablespacename = defGetString(dtablespacename);
dst_deftablespace = get_tablespace_oid(tablespacename, false);
/* check permissions */
aclresult = pg_tablespace_aclcheck(dst_deftablespace, GetUserId(),
ACL_CREATE);
if (aclresult != ACLCHECK_OK)
aclcheck_error(aclresult, OBJECT_TABLESPACE,
tablespacename);
/* pg_global must never be the default tablespace */
if (dst_deftablespace == GLOBALTABLESPACE_OID)
ereport(ERROR,
(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
errmsg("pg_global cannot be used as default tablespace")));
/*
* If we are trying to change the default tablespace of the template,
* we require that the template not have any files in the new default
* tablespace. This is necessary because otherwise the copied
* database would contain pg_class rows that refer to its default
* tablespace both explicitly (by OID) and implicitly (as zero), which
* would cause problems. For example another CREATE DATABASE using
* the copied database as template, and trying to change its default
* tablespace again, would yield outright incorrect results (it would
* improperly move tables to the new default tablespace that should
* stay in the same tablespace).
*/
if (dst_deftablespace != src_deftablespace)
{
char *srcpath;
struct stat st;
srcpath = GetDatabasePath(src_dboid, dst_deftablespace);
if (stat(srcpath, &st) == 0 &&
S_ISDIR(st.st_mode) &&
!directory_is_empty(srcpath))
ereport(ERROR,
(errcode(ERRCODE_FEATURE_NOT_SUPPORTED),
errmsg("cannot assign new default tablespace \"%s\"",
tablespacename),
errdetail("There is a conflict because database \"%s\" already has some tables in this tablespace.",
dbtemplate)));
pfree(srcpath);
}
}
else
{
/* Use template database's default tablespace */
dst_deftablespace = src_deftablespace;
/* Note there is no additional permission check in this path */
}
/*
* If built with appropriate switch, whine when regression-testing
* conventions for database names are violated. But don't complain during
* initdb.
*/
#ifdef ENFORCE_REGRESSION_TEST_NAME_RESTRICTIONS
if (IsUnderPostmaster && strstr(dbname, "regression") == NULL)
elog(WARNING, "databases created by regression test cases should have names including \"regression\"");
#endif
/*
* Check for db name conflict. This is just to give a more friendly error
* message than "unique index violation". There's a race condition but
* we're willing to accept the less friendly message in that case.
*/
if (OidIsValid(get_database_oid(dbname, true)))
ereport(ERROR,
(errcode(ERRCODE_DUPLICATE_DATABASE),
errmsg("database \"%s\" already exists", dbname)));
/*
* The source DB can't have any active backends, except this one
* (exception is to allow CREATE DB while connected to template1).
* Otherwise we might copy inconsistent data.
*
* This should be last among the basic error checks, because it involves
* potential waiting; we may as well throw an error first if we're gonna
* throw one.
*/
if (CountOtherDBBackends(src_dboid, ¬herbackends, &npreparedxacts))
ereport(ERROR,
(errcode(ERRCODE_OBJECT_IN_USE),
errmsg("source database \"%s\" is being accessed by other users",
dbtemplate),
errdetail_busy_db(notherbackends, npreparedxacts)));
/*
* Select an OID for the new database, checking that it doesn't have a
* filename conflict with anything already existing in the tablespace
* directories.
*/
pg_database_rel = table_open(DatabaseRelationId, RowExclusiveLock);
if (Gp_role == GP_ROLE_EXECUTE)
dboid = GetPreassignedOidForDatabase(dbname);
else
{
do
{
dboid = GetNewOidWithIndex(pg_database_rel, DatabaseOidIndexId,
Anum_pg_database_oid);
} while (check_db_file_conflict(dboid));
if (Gp_role == GP_ROLE_DISPATCH)
RememberAssignedOidForDatabase(dbname, dboid);
}
/*
* Insert a new tuple into pg_database. This establishes our ownership of
* the new database name (anyone else trying to insert the same name will
* block on the unique index, and fail after we commit).
*/
/* Form tuple */
MemSet(new_record, 0, sizeof(new_record));
MemSet(new_record_nulls, false, sizeof(new_record_nulls));
new_record[Anum_pg_database_oid - 1] = ObjectIdGetDatum(dboid);
new_record[Anum_pg_database_datname - 1] =
DirectFunctionCall1(namein, CStringGetDatum(dbname));
new_record[Anum_pg_database_datdba - 1] = ObjectIdGetDatum(datdba);
new_record[Anum_pg_database_encoding - 1] = Int32GetDatum(encoding);
new_record[Anum_pg_database_datcollate - 1] =
DirectFunctionCall1(namein, CStringGetDatum(dbcollate));
new_record[Anum_pg_database_datctype - 1] =
DirectFunctionCall1(namein, CStringGetDatum(dbctype));
new_record[Anum_pg_database_datistemplate - 1] = BoolGetDatum(dbistemplate);
new_record[Anum_pg_database_datallowconn - 1] = BoolGetDatum(dballowconnections);
new_record[Anum_pg_database_datconnlimit - 1] = Int32GetDatum(dbconnlimit);
new_record[Anum_pg_database_datlastsysoid - 1] = ObjectIdGetDatum(src_lastsysoid);
new_record[Anum_pg_database_datfrozenxid - 1] = TransactionIdGetDatum(src_frozenxid);
new_record[Anum_pg_database_datminmxid - 1] = TransactionIdGetDatum(src_minmxid);
new_record[Anum_pg_database_dattablespace - 1] = ObjectIdGetDatum(dst_deftablespace);
/*
* We deliberately set datacl to default (NULL), rather than copying it
* from the template database. Copying it would be a bad idea when the
* owner is not the same as the template's owner.
*/
new_record_nulls[Anum_pg_database_datacl - 1] = true;
tuple = heap_form_tuple(RelationGetDescr(pg_database_rel),
new_record, new_record_nulls);
CatalogTupleInsert(pg_database_rel, tuple);
CommandCounterIncrement();
/*
* Create tag description.
*/
if (stmt->tags)
{
AddTagDescriptions(stmt->tags,
InvalidOid,
DatabaseRelationId,
dboid,
dbname);
}
if (shouldDispatch)
{
elog(DEBUG5, "shouldDispatch = true, dbOid = %d", dboid);
/*
* Dispatch the command to all primary segments.
*
* Doesn't wait for the QEs to finish execution.
*/
CdbDispatchUtilityStatement((Node *) stmt,
DF_CANCEL_ON_ERROR |
DF_NEED_TWO_PHASE |
DF_WITH_SNAPSHOT,
GetAssignedOidsForDispatch(),
NULL);
}
/*
* Now generate additional catalog entries associated with the new DB
*/
/* Register owner dependency */
recordDependencyOnOwner(DatabaseRelationId, dboid, datdba);
/* Create pg_shdepend entries for objects within database */
copyTemplateDependencies(src_dboid, dboid);
/* MPP-6929: metadata tracking */
if (Gp_role == GP_ROLE_DISPATCH)
MetaTrackAddObject(DatabaseRelationId,
dboid,
GetUserId(),
"CREATE", "DATABASE"
);
CHECK_FOR_INTERRUPTS();
/* Post creation hook for new database */
InvokeObjectPostCreateHook(DatabaseRelationId, dboid, 0);
/*
* Force a checkpoint before starting the copy. This will force all dirty
* buffers, including those of unlogged tables, out to disk, to ensure
* source database is up-to-date on disk for the copy.
* FlushDatabaseBuffers() would suffice for that, but we also want to
* process any pending unlink requests. Otherwise, if a checkpoint
* happened while we're copying files, a file might be deleted just when
* we're about to copy it, causing the lstat() call in copydir() to fail
* with ENOENT.
*/
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT
| CHECKPOINT_FLUSH_ALL);
/*
* Once we start copying subdirectories, we need to be able to clean 'em
* up if we fail. Use an ENSURE block to make sure this happens. (This
* is not a 100% solution, because of the possibility of failure during
* transaction commit after we leave this routine, but it should handle
* most scenarios.)
*/
fparms.src_dboid = src_dboid;
fparms.dest_dboid = dboid;
PG_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
PointerGetDatum(&fparms));
{
/*
* Iterate through all tablespaces of the template database, and copy
* each one to the new database.
*/
rel = table_open(TableSpaceRelationId, AccessShareLock);
scan = table_beginscan_catalog(rel, 0, NULL);
while ((tuple = heap_getnext(scan, ForwardScanDirection)) != NULL)
{
Form_pg_tablespace spaceform = (Form_pg_tablespace) GETSTRUCT(tuple);
Oid srctablespace = spaceform->oid;
Oid dsttablespace;
char *srcpath;
char *dstpath;
struct stat st;
/* No need to copy global tablespace */
if (srctablespace == GLOBALTABLESPACE_OID)
continue;
srcpath = GetDatabasePath(src_dboid, srctablespace);
if (stat(srcpath, &st) < 0 || !S_ISDIR(st.st_mode) ||
directory_is_empty(srcpath))
{
/* Assume we can ignore it */
pfree(srcpath);
continue;
}
if (srctablespace == src_deftablespace)
dsttablespace = dst_deftablespace;
else
dsttablespace = srctablespace;
dstpath = GetDatabasePath(dboid, dsttablespace);
/*
* Register the database directory to PendingDBDelete link list
* for cleanup in txn abort.
*/
ScheduleDbDirDelete(dboid, dsttablespace, false);
/*
* Copy this subdirectory to the new location
*
* We don't need to copy subdirectories
*/
copydir(srcpath, dstpath, false);
SIMPLE_FAULT_INJECTOR("create_db_after_file_copy");
/* Record the filesystem change in XLOG */
{
xl_dbase_create_rec xlrec;
xlrec.db_id = dboid;
xlrec.tablespace_id = dsttablespace;
xlrec.src_db_id = src_dboid;
xlrec.src_tablespace_id = srctablespace;
XLogBeginInsert();
XLogRegisterData((char *) &xlrec, sizeof(xl_dbase_create_rec));
(void) XLogInsert(RM_DBASE_ID,
XLOG_DBASE_CREATE | XLR_SPECIAL_REL_UPDATE);
}
pfree(srcpath);
pfree(dstpath);
}
SIMPLE_FAULT_INJECTOR("after_xlog_create_database");
table_endscan(scan);
table_close(rel, AccessShareLock);
/*
* We force a checkpoint before committing. This effectively means
* that committed XLOG_DBASE_CREATE operations will never need to be
* replayed (at least not in ordinary crash recovery; we still have to
* make the XLOG entry for the benefit of PITR operations). This
* avoids two nasty scenarios:
*
* #1: When PITR is off, we don't XLOG the contents of newly created
* indexes; therefore the drop-and-recreate-whole-directory behavior
* of DBASE_CREATE replay would lose such indexes.
*
* #2: Since we have to recopy the source database during DBASE_CREATE
* replay, we run the risk of copying changes in it that were
* committed after the original CREATE DATABASE command but before the
* system crash that led to the replay. This is at least unexpected
* and at worst could lead to inconsistencies, eg duplicate table
* names.
*
* (Both of these were real bugs in releases 8.0 through 8.0.3.)
*
* In PITR replay, the first of these isn't an issue, and the second
* is only a risk if the CREATE DATABASE and subsequent template
* database change both occur while a base backup is being taken.
* There doesn't seem to be much we can do about that except document
* it as a limitation.
*
* Perhaps if we ever implement CREATE DATABASE in a less cheesy way,
* we can avoid this.
*/
RequestCheckpoint(CHECKPOINT_IMMEDIATE | CHECKPOINT_FORCE | CHECKPOINT_WAIT);
/*
* Close pg_database, but keep lock till commit.
*/
table_close(pg_database_rel, NoLock);
/*
* Force synchronous commit, thus minimizing the window between
* creation of the database files and committal of the transaction. If
* we crash before committing, we'll have a DB that's taking up disk
* space but is not in pg_database, which is not good.
*/
ForceSyncCommit();
}
PG_END_ENSURE_ERROR_CLEANUP(createdb_failure_callback,
PointerGetDatum(&fparms));
return dboid;
}