in odps-data-carrier/meta-carrier/src/main/java/com/aliyun/odps/datacarrier/metacarrier/HiveMetaCarrier.java [73:180]
public static void main(String[] args) throws Exception {
if (args.length == 0) {
printUsage();
System.exit(128);
}
MODE mode = null;
try {
mode = MODE.valueOf(args[0]);
} catch (IllegalArgumentException e) {
printUsage();
System.exit(128);
}
// Connect to hive meta store
HiveConf hiveConf = new HiveConf();
String outputPath;
if (mode.equals(MODE.SECURITY_OFF)) {
if (args.length != 3) {
String msg = "Invalid arguments. Expect hive metastore address, configuration file path "
+ "and output directory.";
throw new IllegalArgumentException(msg);
}
hiveConf.setVar(HiveConf.ConfVars.METASTOREURIS, args[1]);
outputPath = args[2];
} else if (mode.equals(MODE.KERBEROS)) {
throw new IllegalArgumentException("Not supported");
} else {
throw new IllegalArgumentException("Not supported");
}
HiveMetaStoreClient hiveClient = new HiveMetaStoreClient(hiveConf);
MetaManager metaManager = new MetaManager(outputPath);
GlobalMetaModel globalMetaModel = new GlobalMetaModel();
globalMetaModel.datasourceType = "HIVE";
metaManager.setGlobalMeta(globalMetaModel);
List<String> databaseNames = hiveClient.getAllDatabases();
// Iterate over databases
for (String databaseName : databaseNames) {
List<String> tableNames = hiveClient.getAllTables(databaseName);
DatabaseMetaModel databaseMeta = new DatabaseMetaModel();
databaseMeta.databaseName = databaseName;
databaseMeta.odpsProjectName = databaseName;
metaManager.setDatabaseMeta(databaseMeta);
// Iterate over tables
for (String tableName : tableNames) {
TableMetaModel tableMetaModel = new TableMetaModel();
TablePartitionMetaModel tablePartitionMetaModel = new TablePartitionMetaModel();
// Handle table meta
tableMetaModel.tableName = tableName;
tableMetaModel.odpsTableName = tableName;
List<FieldSchema> columns = hiveClient.getFields(databaseName, tableName);
for (FieldSchema column : columns) {
ColumnMetaModel columnMetaModel = new ColumnMetaModel();
columnMetaModel.columnName = column.getName();
columnMetaModel.odpsColumnName = column.getName();
columnMetaModel.type = column.getType();
columnMetaModel.comment = column.getComment();
tableMetaModel.columns.add(columnMetaModel);
}
List<FieldSchema> partitionColumns = hiveClient.getTable(databaseName, tableName)
.getPartitionKeys();
for (FieldSchema partitionColumn : partitionColumns) {
ColumnMetaModel columnMetaModel = new ColumnMetaModel();
columnMetaModel.columnName = partitionColumn.getName();
columnMetaModel.odpsColumnName = partitionColumn.getName();
columnMetaModel.type = partitionColumn.getType();
columnMetaModel.comment = partitionColumn.getComment();
tableMetaModel.partitionColumns.add(columnMetaModel);
}
metaManager.setTableMeta(databaseName, tableMetaModel);
// Handle partition meta
// TODO: what if there are more than 32767 partitions
// TODO: support parquet
tablePartitionMetaModel.tableName = tableName;
List<Partition> partitions =
hiveClient.listPartitions(databaseName, tableName, Short.MAX_VALUE);
if (!partitions.isEmpty()) {
for (Partition partition : partitions) {
PartitionMetaModel partitionMetaModel = new PartitionMetaModel();
partitionMetaModel.createTime = Integer.toString(partition.getCreateTime());
partitionMetaModel.location = partition.getSd().getLocation();
// Generate partition specifications
List<String> partitionValues = partition.getValues();
StringBuilder partitionSpecBuilder = new StringBuilder();
for (int i = 0; i < partitionColumns.size(); i++) {
partitionSpecBuilder
.append(partitionColumns.get(i).getName())
.append("=\'")
.append(partitionValues.get(i))
.append("\'");
if (i != partitionColumns.size() - 1) {
partitionSpecBuilder.append(",");
}
}
partitionMetaModel.partitionSpec = partitionSpecBuilder.toString();
tablePartitionMetaModel.partitions.add(partitionMetaModel);
}
metaManager.setTablePartitionMeta(databaseName, tablePartitionMetaModel);
}
}
}
}