private RelNode genTableLogicalPlan()

in ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java [3030:3279]
202 lines of code
40 McCabe index (conditional complexity)

    private RelNode genTableLogicalPlan(String tableAlias, QB qb) throws SemanticException {
      RowResolver rr = new RowResolver();
      RelNode tableRel = null;

      try {

        // 1. If the table has a Sample specified, bail from Calcite path.
        // 2. if returnpath is on and hivetestmode is on bail
        if (qb.getParseInfo().getTabSample(tableAlias) != null
            || getNameToSplitSampleMap().containsKey(tableAlias)
            || (conf.getBoolVar(HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP)) && (conf.getBoolVar(HiveConf.ConfVars.HIVE_TEST_MODE)) ) {
          String msg = String.format("Table Sample specified for %s."
              + " Currently we don't support Table Sample clauses in CBO,"
              + " turn off cbo for queries on tableSamples.", tableAlias);
          LOG.debug(msg);
          throw new CalciteSemanticException(msg, UnsupportedFeature.Table_sample_clauses);
        }

        // 2. Get Table Metadata
        Table tabMetaData = qb.getMetaData().getSrcForAlias(tableAlias);

        // 3. Get Table Logical Schema (Row Type)
        // NOTE: Table logical schema = Non Partition Cols + Partition Cols +
        // Virtual Cols

        // 3.1 Add Column info for non partion cols (Object Inspector fields)
        final Deserializer deserializer = tabMetaData.getDeserializer();
        StructObjectInspector rowObjectInspector = (StructObjectInspector) deserializer
            .getObjectInspector();

        deserializer.handleJobLevelConfiguration(conf);

        List<? extends StructField> fields = rowObjectInspector.getAllStructFieldRefs();
        ColumnInfo colInfo;
        String colName;
        ArrayList<ColumnInfo> cInfoLst = new ArrayList<>();

        final NotNullConstraint nnc = tabMetaData.getNotNullConstraint();
        final PrimaryKeyInfo pkc = tabMetaData.getPrimaryKeyInfo();

        for (StructField structField : fields) {
          colName = structField.getFieldName();
          colInfo = new ColumnInfo(
                  structField.getFieldName(),
                  TypeInfoUtils.getTypeInfoFromObjectInspector(structField.getFieldObjectInspector()),
                  isNullable(colName, nnc, pkc), tableAlias, false);
          colInfo.setSkewedCol(isSkewedCol(tableAlias, qb, colName));
          rr.put(tableAlias, colName, colInfo);
          cInfoLst.add(colInfo);
        }
        // TODO: Fix this
        ArrayList<ColumnInfo> nonPartitionColumns = new ArrayList<ColumnInfo>(cInfoLst);
        ArrayList<ColumnInfo> partitionColumns = new ArrayList<ColumnInfo>();

        // 3.2 Add column info corresponding to partition columns
        for (FieldSchema part_col : tabMetaData.getPartCols()) {
          colName = part_col.getName();
          colInfo = new ColumnInfo(colName,
                  TypeInfoFactory.getPrimitiveTypeInfo(part_col.getType()),
                  isNullable(colName, nnc, pkc), tableAlias, true);
          rr.put(tableAlias, colName, colInfo);
          cInfoLst.add(colInfo);
          partitionColumns.add(colInfo);
        }

        final TableType tableType = obtainTableType(tabMetaData);

        // 3.3 Add column info corresponding to virtual columns
        List<VirtualColumn> virtualCols = new ArrayList<>();
        if (tableType == TableType.NATIVE) {
          virtualCols = VirtualColumn.getRegistry(conf);
          if (AcidUtils.isNonNativeAcidTable(tabMetaData)) {
            virtualCols.addAll(tabMetaData.getStorageHandler().acidVirtualColumns());
          }
          if (tabMetaData.isNonNative() && tabMetaData.getStorageHandler().areSnapshotsSupported() &&
              isBlank(tabMetaData.getMetaTable())) {
            virtualCols.add(VirtualColumn.SNAPSHOT_ID);
          }
          for (VirtualColumn vc : virtualCols) {
            colInfo = new ColumnInfo(vc.getName(), vc.getTypeInfo(), tableAlias, true,
                vc.getIsHidden());
            rr.put(tableAlias, vc.getName().toLowerCase(), colInfo);
            cInfoLst.add(colInfo);
          }
        }

        // 4. Build operator
        Map<String, String> tabPropsFromQuery = qb.getTabPropsForAlias(tableAlias);
        HiveTableScan.HiveTableScanTrait tableScanTrait = HiveTableScan.HiveTableScanTrait.from(tabPropsFromQuery);
        RelOptHiveTable optTable;
        if (tableType == TableType.DRUID ||
                (tableType == TableType.JDBC && tabMetaData.getProperty(Constants.JDBC_TABLE) != null)) {
          // Create case sensitive columns list
          List<String> originalColumnNames =
                  ((StandardStructObjectInspector)rowObjectInspector).getOriginalColumnNames();
          List<ColumnInfo> cIList = new ArrayList<ColumnInfo>(originalColumnNames.size());
          for (int i = 0; i < rr.getColumnInfos().size(); i++) {
            cIList.add(new ColumnInfo(originalColumnNames.get(i), rr.getColumnInfos().get(i).getType(),
                    tableAlias, false));
          }
          // Build row type from field <type, name>
          RelDataType rowType = TypeConverter.getType(cluster, cIList);
          // Build RelOptAbstractTable
          List<String> fullyQualifiedTabName = new ArrayList<>();
          if (tabMetaData.getDbName() != null && !tabMetaData.getDbName().isEmpty()) {
            fullyQualifiedTabName.add(tabMetaData.getDbName());
          }
          fullyQualifiedTabName.add(tabMetaData.getTableName());

          if (tableType == TableType.DRUID) {
            // Build Druid query
            String address = HiveConf.getVar(conf,
                  HiveConf.ConfVars.HIVE_DRUID_BROKER_DEFAULT_ADDRESS);
            String dataSource = tabMetaData.getParameters().get(Constants.DRUID_DATA_SOURCE);
            Set<String> metrics = new HashSet<>();
            RexBuilder rexBuilder = cluster.getRexBuilder();
            RelDataTypeFactory dtFactory = rexBuilder.getTypeFactory();
            List<RelDataType> druidColTypes = new ArrayList<>();
            List<String> druidColNames = new ArrayList<>();
            //@TODO FIX this, we actually do not need this anymore,
            // in addition to that Druid allow numeric dimensions now so this check is not accurate
            for (RelDataTypeField field : rowType.getFieldList()) {
              if (DruidTable.DEFAULT_TIMESTAMP_COLUMN.equals(field.getName())) {
                // Druid's time column is always not null.
                druidColTypes.add(dtFactory.createTypeWithNullability(field.getType(), false));
              } else {
                druidColTypes.add(field.getType());
              }
              druidColNames.add(field.getName());
              if (field.getName().equals(DruidTable.DEFAULT_TIMESTAMP_COLUMN)) {
                // timestamp
                continue;
              }
              if (field.getType().getSqlTypeName() == SqlTypeName.VARCHAR) {
                // dimension
                continue;
              }
              metrics.add(field.getName());
            }

            List<Interval> intervals = Arrays.asList(DruidTable.DEFAULT_INTERVAL);
            rowType = dtFactory.createStructType(druidColTypes, druidColNames);
            DruidTable druidTable = new DruidTable(new DruidSchema(address, address, false),
                dataSource, RelDataTypeImpl.proto(rowType), metrics, DruidTable.DEFAULT_TIMESTAMP_COLUMN,
                intervals, null, null);
            optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName,
                rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf,
                tabNameToTabObject, partitionCache, colStatsCache, noColsMissingStats);
            final TableScan scan = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION),
                optTable, null == tableAlias ? tabMetaData.getTableName() : tableAlias,
                getAliasId(tableAlias, qb), HiveConf.getBoolVar(conf,
                    HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP), qb.isInsideView()
                    || qb.getAliasInsideView().contains(tableAlias.toLowerCase()), tableScanTrait);
            tableRel = DruidQuery.create(cluster, cluster.traitSetOf(BindableConvention.INSTANCE),
                optTable, druidTable, ImmutableList.of(scan), DruidSqlOperatorConverter.getDefaultMap());
          } else {
            optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName,
                  rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf,
                  tabNameToTabObject, partitionCache, colStatsCache, noColsMissingStats);
            final HiveTableScan hts = new HiveTableScan(cluster,
                  cluster.traitSetOf(HiveRelNode.CONVENTION), optTable,
                  null == tableAlias ? tabMetaData.getTableName() : tableAlias,
                  getAliasId(tableAlias, qb),
                  HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP),
                  qb.isInsideView() || qb.getAliasInsideView().contains(tableAlias.toLowerCase()), tableScanTrait);

            final String dataBaseType = tabMetaData.getProperty(Constants.JDBC_DATABASE_TYPE);
            final String url = tabMetaData.getProperty(Constants.JDBC_URL);
            final String driver = tabMetaData.getProperty(Constants.JDBC_DRIVER);
            final String user = tabMetaData.getProperty(Constants.JDBC_USERNAME);
            final String pswd;
            if (tabMetaData.getProperty(Constants.JDBC_PASSWORD) != null) {
              pswd = tabMetaData.getProperty(Constants.JDBC_PASSWORD);
            } else if (tabMetaData.getProperty(Constants.JDBC_KEYSTORE) != null) {
              String keystore = tabMetaData.getProperty(Constants.JDBC_KEYSTORE);
              String key = tabMetaData.getProperty(Constants.JDBC_KEY);
              pswd = Utilities.getPasswdFromKeystore(keystore, key);
            } else if (tabMetaData.getProperty(Constants.JDBC_PASSWORD_URI) != null) {
              pswd = Utilities.getPasswdFromUri(tabMetaData.getProperty(Constants.JDBC_PASSWORD_URI));
            } else {
              pswd = null;
              LOG.warn("No password found for accessing {} table via JDBC", fullyQualifiedTabName);
            }
            final String catalogName = tabMetaData.getProperty(Constants.JDBC_CATALOG);
            final String schemaName = tabMetaData.getProperty(Constants.JDBC_SCHEMA);
            final String tableName = tabMetaData.getProperty(Constants.JDBC_TABLE);

            DataSource ds = JdbcSchema.dataSource(url, driver, user, pswd);
            SqlDialect jdbcDialect = JdbcSchema.createDialect(SqlDialectFactoryImpl.INSTANCE, ds);
            String dialectName = jdbcDialect.getClass().getName();
            if (LOG.isDebugEnabled()) {
              LOG.debug("Dialect for table {}: {}", tableName, dialectName);
            }

            List<String> jdbcConventionKey = ImmutableNullableList.of(url, driver, user, pswd, dialectName, dataBaseType);
            jdbcConventionMap.putIfAbsent(jdbcConventionKey, JdbcConvention.of(jdbcDialect, null, dataBaseType));
            JdbcConvention jc = jdbcConventionMap.get(jdbcConventionKey);

            List<String> schemaKey = ImmutableNullableList.of(url, driver, user, pswd, dialectName, dataBaseType,
              catalogName, schemaName);
            schemaMap.putIfAbsent(schemaKey, new JdbcSchema(ds, jc.dialect, jc, catalogName, schemaName));
            JdbcSchema schema = schemaMap.get(schemaKey);

            JdbcTable jt = (JdbcTable) schema.getTable(tableName);
            if (jt == null) {
              throw new SemanticException("Table " + tableName + " was not found in the database");
            }

            JdbcHiveTableScan jdbcTableRel = new JdbcHiveTableScan(cluster, optTable, jt, jc, hts);
            tableRel = new HiveJdbcConverter(cluster, jdbcTableRel.getTraitSet().replace(HiveRelNode.CONVENTION),
                    jdbcTableRel, jc, url, user);
          }
        } else {
          // Build row type from field <type, name>
          RelDataType rowType = TypeConverter.getType(cluster, rr, null);
          // Build RelOptAbstractTable
          List<String> fullyQualifiedTabName = new ArrayList<>();
          if (tabMetaData.getDbName() != null && !tabMetaData.getDbName().isEmpty()) {
            fullyQualifiedTabName.add(tabMetaData.getDbName());
          }
          fullyQualifiedTabName.add(tabMetaData.getTableName());
          optTable = new RelOptHiveTable(relOptSchema, relOptSchema.getTypeFactory(), fullyQualifiedTabName,
              rowType, tabMetaData, nonPartitionColumns, partitionColumns, virtualCols, conf,
              tabNameToTabObject, partitionCache, colStatsCache, noColsMissingStats);
          // Build Hive Table Scan Rel
          tableRel = new HiveTableScan(cluster, cluster.traitSetOf(HiveRelNode.CONVENTION), optTable,
              null == tableAlias ? tabMetaData.getTableName() : tableAlias,
              getAliasId(tableAlias, qb), HiveConf.getBoolVar(conf,
                  HiveConf.ConfVars.HIVE_CBO_RETPATH_HIVEOP), qb.isInsideView()
                  || qb.getAliasInsideView().contains(tableAlias.toLowerCase()), tableScanTrait);
        }

        if (optTable.hasReferentialConstraints()) {
          profilesCBO.add(ExtendedCBOProfile.REFERENTIAL_CONSTRAINTS);
        }

        // 6. Add Schema(RR) to RelNode-Schema map
        ImmutableMap<String, Integer> hiveToCalciteColMap = buildHiveToCalciteColumnMap(rr);
        relToHiveRR.put(tableRel, rr);
        relToHiveColNameCalcitePosMap.put(tableRel, hiveToCalciteColMap);
      } catch (Exception e) {
        if (e instanceof SemanticException) {
          throw (SemanticException) e;
        } else {
          throw (new RuntimeException(e));
        }
      }

      return tableRel;
    }