in datafusion/sql/src/parser.rs [834:1270]
fn create_external_table() -> Result<(), ParserError> {
// positive case
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
has_header: false,
delimiter: ',',
location: "foo.csv".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
// positive case: leading space
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
has_header: false,
delimiter: ',',
location: "foo.csv".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
// positive case: leading space + semicolon
let sql =
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV LOCATION 'foo.csv' ;";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
has_header: false,
delimiter: ',',
location: "foo.csv".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
// positive case with delimiter
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV DELIMITER '|' LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
has_header: false,
delimiter: '|',
location: "foo.csv".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
// positive case: partitioned by
let sql = "CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1, p2) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
has_header: false,
delimiter: ',',
location: "foo.csv".into(),
table_partition_cols: vec!["p1".to_string(), "p2".to_string()],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
// positive case: it is ok for case insensitive sql stmt with `WITH HEADER ROW` tokens
let sqls = vec![
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH HEADER ROW LOCATION 'foo.csv'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV with header row LOCATION 'foo.csv'"
];
for sql in sqls {
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
has_header: true,
delimiter: ',',
location: "foo.csv".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
}
// positive case: it is ok for sql stmt with `COMPRESSION TYPE GZIP` tokens
let sqls = vec![
("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE GZIP LOCATION 'foo.csv'", "GZIP"),
("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE BZIP2 LOCATION 'foo.csv'", "BZIP2"),
("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE XZ LOCATION 'foo.csv'", "XZ"),
("CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV COMPRESSION TYPE ZSTD LOCATION 'foo.csv'", "ZSTD"),
];
for (sql, file_compression_type) in sqls {
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![make_column_def("c1", DataType::Int(display))],
file_type: "CSV".to_string(),
has_header: false,
delimiter: ',',
location: "foo.csv".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: CompressionTypeVariant::from_str(
file_compression_type,
)?,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
}
// positive case: it is ok for parquet files not to have columns specified
let sql = "CREATE EXTERNAL TABLE t STORED AS PARQUET LOCATION 'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![],
file_type: "PARQUET".to_string(),
has_header: false,
delimiter: ',',
location: "foo.parquet".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
// positive case: it is ok for parquet files to be other than upper case
let sql = "CREATE EXTERNAL TABLE t STORED AS parqueT LOCATION 'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![],
file_type: "PARQUET".to_string(),
has_header: false,
delimiter: ',',
location: "foo.parquet".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
// positive case: it is ok for avro files not to have columns specified
let sql = "CREATE EXTERNAL TABLE t STORED AS AVRO LOCATION 'foo.avro'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![],
file_type: "AVRO".to_string(),
has_header: false,
delimiter: ',',
location: "foo.avro".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
// positive case: it is ok for avro files not to have columns specified
let sql =
"CREATE EXTERNAL TABLE IF NOT EXISTS t STORED AS PARQUET LOCATION 'foo.parquet'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![],
file_type: "PARQUET".to_string(),
has_header: false,
delimiter: ',',
location: "foo.parquet".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: true,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
// Error cases: partition column does not support type
let sql =
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV PARTITIONED BY (p1 int) LOCATION 'foo.csv'";
expect_parse_error(sql, "sql parser error: Expected ',' or ')' after partition definition, found: int");
// positive case: additional options (one entry) can be specified
let sql =
"CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1') LOCATION 'blahblah'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![],
file_type: "X".to_string(),
has_header: false,
delimiter: ',',
location: "blahblah".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::from([("k1".into(), "v1".into())]),
});
expect_parse_ok(sql, expected)?;
// positive case: additional options (multiple entries) can be specified
let sql =
"CREATE EXTERNAL TABLE t STORED AS x OPTIONS ('k1' 'v1', k2 v2) LOCATION 'blahblah'";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![],
file_type: "X".to_string(),
has_header: false,
delimiter: ',',
location: "blahblah".into(),
table_partition_cols: vec![],
order_exprs: vec![],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::from([
("k1".into(), "v1".into()),
("k2".into(), "v2".into()),
]),
});
expect_parse_ok(sql, expected)?;
// Ordered Col
let sqls = vec!["CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1) LOCATION 'foo.csv'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS FIRST) LOCATION 'foo.csv'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 NULLS LAST) LOCATION 'foo.csv'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC) LOCATION 'foo.csv'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC) LOCATION 'foo.csv'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS FIRST) LOCATION 'foo.csv'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 DESC NULLS LAST) LOCATION 'foo.csv'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS FIRST) LOCATION 'foo.csv'",
"CREATE EXTERNAL TABLE t(c1 int) STORED AS CSV WITH ORDER (c1 ASC NULLS LAST) LOCATION 'foo.csv'"];
let expected = vec![
(None, None),
(None, Some(true)),
(None, Some(false)),
(Some(true), None),
(Some(false), None),
(Some(false), Some(true)),
(Some(false), Some(false)),
(Some(true), Some(true)),
(Some(true), Some(false)),
];
for (sql, (asc, nulls_first)) in sqls.iter().zip(expected.into_iter()) {
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![make_column_def("c1", DataType::Int(None))],
file_type: "CSV".to_string(),
has_header: false,
delimiter: ',',
location: "foo.csv".into(),
table_partition_cols: vec![],
order_exprs: vec![vec![OrderByExpr {
expr: Identifier(Ident {
value: "c1".to_owned(),
quote_style: None,
}),
asc,
nulls_first,
}]],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
}
// Ordered Col
let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 ASC, c2 DESC NULLS FIRST) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![
make_column_def("c1", DataType::Int(display)),
make_column_def("c2", DataType::Int(display)),
],
file_type: "CSV".to_string(),
has_header: false,
delimiter: ',',
location: "foo.csv".into(),
table_partition_cols: vec![],
order_exprs: vec![vec![
OrderByExpr {
expr: Identifier(Ident {
value: "c1".to_owned(),
quote_style: None,
}),
asc: Some(true),
nulls_first: None,
},
OrderByExpr {
expr: Identifier(Ident {
value: "c2".to_owned(),
quote_style: None,
}),
asc: Some(false),
nulls_first: Some(true),
},
]],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
// Ordered Binary op
let sql = "CREATE EXTERNAL TABLE t(c1 int, c2 int) STORED AS CSV WITH ORDER (c1 - c2 ASC) LOCATION 'foo.csv'";
let display = None;
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![
make_column_def("c1", DataType::Int(display)),
make_column_def("c2", DataType::Int(display)),
],
file_type: "CSV".to_string(),
has_header: false,
delimiter: ',',
location: "foo.csv".into(),
table_partition_cols: vec![],
order_exprs: vec![vec![OrderByExpr {
expr: Expr::BinaryOp {
left: Box::new(Identifier(Ident {
value: "c1".to_owned(),
quote_style: None,
})),
op: BinaryOperator::Minus,
right: Box::new(Identifier(Ident {
value: "c2".to_owned(),
quote_style: None,
})),
},
asc: Some(true),
nulls_first: None,
}]],
if_not_exists: false,
file_compression_type: UNCOMPRESSED,
unbounded: false,
options: HashMap::new(),
});
expect_parse_ok(sql, expected)?;
// Most complete CREATE EXTERNAL TABLE statement possible
let sql = "
CREATE UNBOUNDED EXTERNAL TABLE IF NOT EXISTS t (c1 int, c2 float)
STORED AS PARQUET
DELIMITER '*'
WITH HEADER ROW
WITH ORDER (c1 - c2 ASC)
COMPRESSION TYPE zstd
PARTITIONED BY (c1)
LOCATION 'foo.parquet'
OPTIONS (ROW_GROUP_SIZE '1024', 'TRUNCATE' 'NO')
";
let expected = Statement::CreateExternalTable(CreateExternalTable {
name: "t".into(),
columns: vec![
make_column_def("c1", DataType::Int(None)),
make_column_def("c2", DataType::Float(None)),
],
file_type: "PARQUET".to_string(),
has_header: true,
delimiter: '*',
location: "foo.parquet".into(),
table_partition_cols: vec!["c1".into()],
order_exprs: vec![vec![OrderByExpr {
expr: Expr::BinaryOp {
left: Box::new(Identifier(Ident {
value: "c1".to_owned(),
quote_style: None,
})),
op: BinaryOperator::Minus,
right: Box::new(Identifier(Ident {
value: "c2".to_owned(),
quote_style: None,
})),
},
asc: Some(true),
nulls_first: None,
}]],
if_not_exists: true,
file_compression_type: CompressionTypeVariant::ZSTD,
unbounded: true,
options: HashMap::from([
("ROW_GROUP_SIZE".into(), "1024".into()),
("TRUNCATE".into(), "NO".into()),
]),
});
expect_parse_ok(sql, expected)?;
// For error cases, see: `create_external_table.slt`
Ok(())
}