phoenix5-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixInputFormat.java [71:253]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@SuppressWarnings({"deprecation", "rawtypes"})
public class PhoenixInputFormat<T extends DBWritable> implements InputFormat<WritableComparable,
        T> {

    private static final Logger LOG = LoggerFactory.getLogger(PhoenixInputFormat.class);

    public PhoenixInputFormat() {
        if (LOG.isDebugEnabled()) {
            LOG.debug("PhoenixInputFormat created");
        }
    }

    @Override
    public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
        String tableName = jobConf.get(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME);

        String query;
        String executionEngine = jobConf.get(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname,
                HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.getDefaultValue());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Target table name at split phase : " + tableName + "with whereCondition :" +
                    jobConf.get(TableScanDesc.FILTER_TEXT_CONF_STR) +
                    " and " + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " : " +
                    executionEngine);
        }

        List<PhoenixSearchCondition> conditionList = null;
        String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
        if (filterExprSerialized != null) {
            ExprNodeGenericFuncDesc filterExpr =
                    SerializationUtilities.deserializeExpression(filterExprSerialized);
            PhoenixPredicateDecomposer predicateDecomposer =
                    PhoenixPredicateDecomposer
                      .create(Arrays.asList(jobConf.get(serdeConstants.LIST_COLUMNS).split(",")));
            predicateDecomposer.decomposePredicate(filterExpr);
            if (predicateDecomposer.isCalledPPD()) {
                conditionList = predicateDecomposer.getSearchConditionList();
            }
        }

        query = PhoenixQueryBuilder.getInstance().buildQuery(jobConf, tableName,
                PhoenixStorageHandlerUtil.getReadColumnNames(jobConf), conditionList);

        final QueryPlan queryPlan = getQueryPlan(jobConf, query);
        final List<KeyRange> allSplits = queryPlan.getSplits();
        final List<InputSplit> splits = generateSplits(jobConf, queryPlan, allSplits, query);

        return splits.toArray(new InputSplit[splits.size()]);
    }

    private List<InputSplit> generateSplits(final JobConf jobConf, final QueryPlan qplan,
                                            final List<KeyRange> splits, String query) throws
            IOException {
        if (qplan == null){
            throw new NullPointerException();
        }if (splits == null){
            throw new NullPointerException();
        }
        final List<InputSplit> psplits = new ArrayList<>(splits.size());

        Path[] tablePaths = FileInputFormat.getInputPaths(ShimLoader.getHadoopShims()
                .newJobContext(new Job(jobConf)));
        boolean splitByStats = jobConf.getBoolean(PhoenixStorageHandlerConstants.SPLIT_BY_STATS,
                false);

        setScanCacheSize(jobConf);

        // Adding Localization
        try (org.apache.hadoop.hbase.client.Connection connection = ConnectionFactory.createConnection(PhoenixConnectionUtil.getConfiguration(jobConf))) {
        RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan
                .getTableRef().getTable().getPhysicalName().toString()));

        for (List<Scan> scans : qplan.getScans()) {
            PhoenixInputSplit inputSplit;

            HRegionLocation location = regionLocator.getRegionLocation(scans.get(0).getStartRow()
                    , false);
            RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection.getAdmin());
            long regionSize =  sizeCalculator.getRegionSize(location.getRegionInfo().getRegionName());
            String regionLocation = PhoenixStorageHandlerUtil.getRegionLocation(location, LOG);

            if (splitByStats) {
                for (Scan aScan : scans) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Split for  scan : " + aScan + "with scanAttribute : " + aScan
                                .getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : [" +
                                aScan.getCaching() + ", " + aScan.getCacheBlocks() + ", " + aScan
                                .getBatch() + "] and  regionLocation : " + regionLocation);
                    }

                    inputSplit = new PhoenixInputSplit(new ArrayList<>(Arrays.asList(aScan)), tablePaths[0],
                            regionLocation, regionSize);
                    inputSplit.setQuery(query);
                    psplits.add(inputSplit);
                }
            } else {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Scan count[" + scans.size() + "] : " + Bytes.toStringBinary(scans
                            .get(0).getStartRow()) + " ~ " + Bytes.toStringBinary(scans.get(scans
                            .size() - 1).getStopRow()));
                    LOG.debug("First scan : " + scans.get(0) + "with scanAttribute : " + scans
                            .get(0).getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : " +
                            "[" + scans.get(0).getCaching() + ", " + scans.get(0).getCacheBlocks()
                            + ", " + scans.get(0).getBatch() + "] and  regionLocation : " +
                            regionLocation);

                    for (int i = 0, limit = scans.size(); i < limit; i++) {
                        LOG.debug("EXPECTED_UPPER_REGION_KEY[" + i + "] : " + Bytes
                                .toStringBinary(scans.get(i).getAttribute
                                        (BaseScannerRegionObserverConstants.EXPECTED_UPPER_REGION_KEY)));
                    }
                }

                inputSplit = new PhoenixInputSplit(scans, tablePaths[0], regionLocation,
                        regionSize);
                inputSplit.setQuery(query);
                psplits.add(inputSplit);
            }
        }
		}

        return psplits;
    }

    private void setScanCacheSize(JobConf jobConf) {
        int scanCacheSize = jobConf.getInt(PhoenixStorageHandlerConstants.HBASE_SCAN_CACHE, -1);
        if (scanCacheSize > 0) {
            jobConf.setInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, scanCacheSize);
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("Generating splits with scanCacheSize : " + scanCacheSize);
        }
    }

    @Override
    public RecordReader<WritableComparable, T> getRecordReader(InputSplit split, JobConf job,
                                                               Reporter reporter) throws
            IOException {
        final QueryPlan queryPlan = getQueryPlan(job, ((PhoenixInputSplit) split).getQuery());
        @SuppressWarnings("unchecked")
        final Class<T> inputClass = (Class<T>) job.getClass(PhoenixConfigurationUtil.INPUT_CLASS,
                PhoenixResultWritable.class);

        PhoenixRecordReader<T> recordReader = new PhoenixRecordReader<T>(inputClass, job,
                queryPlan);
        recordReader.initialize(split);

        return recordReader;
    }

    /**
     * Returns the query plan associated with the select query.
     */
    private QueryPlan getQueryPlan(final Configuration configuration, String selectStatement)
            throws IOException {
        try {
            final String currentScnValue = configuration.get(PhoenixConfigurationUtil
                    .CURRENT_SCN_VALUE);
            final Properties overridingProps = new Properties();
            if (currentScnValue != null) {
                overridingProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, currentScnValue);
            }
            final Connection connection = PhoenixConnectionUtil.getInputConnection(configuration,
                    overridingProps);
            if (selectStatement == null) {
                throw new NullPointerException();
            }
            final Statement statement = connection.createStatement();
            final PhoenixStatement pstmt = statement.unwrap(PhoenixStatement.class);

            if (LOG.isDebugEnabled()) {
                LOG.debug("Compiled query : " + selectStatement);
            }

            // Optimize the query plan so that we potentially use secondary indexes
            final QueryPlan queryPlan = pstmt.optimizeQuery(selectStatement);
            // Initialize the query plan so it sets up the parallel scans
            queryPlan.iterator(MapReduceParallelScanGrouper.getInstance());
            return queryPlan;
        } catch (Exception exception) {
            LOG.error(String.format("Failed to get the query plan with error [%s]", exception.getMessage()));
            throw new RuntimeException(exception);
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



phoenix5-hive4/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixInputFormat.java [71:253]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
@SuppressWarnings({"deprecation", "rawtypes"})
public class PhoenixInputFormat<T extends DBWritable> implements InputFormat<WritableComparable,
        T> {

    private static final Logger LOG = LoggerFactory.getLogger(PhoenixInputFormat.class);

    public PhoenixInputFormat() {
        if (LOG.isDebugEnabled()) {
            LOG.debug("PhoenixInputFormat created");
        }
    }

    @Override
    public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException {
        String tableName = jobConf.get(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME);

        String query;
        String executionEngine = jobConf.get(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname,
                HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.getDefaultValue());
        if (LOG.isDebugEnabled()) {
            LOG.debug("Target table name at split phase : " + tableName + "with whereCondition :" +
                    jobConf.get(TableScanDesc.FILTER_TEXT_CONF_STR) +
                    " and " + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " : " +
                    executionEngine);
        }

        List<PhoenixSearchCondition> conditionList = null;
        String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR);
        if (filterExprSerialized != null) {
            ExprNodeGenericFuncDesc filterExpr =
                    SerializationUtilities.deserializeExpression(filterExprSerialized);
            PhoenixPredicateDecomposer predicateDecomposer =
                    PhoenixPredicateDecomposer
                      .create(Arrays.asList(jobConf.get(serdeConstants.LIST_COLUMNS).split(",")));
            predicateDecomposer.decomposePredicate(filterExpr);
            if (predicateDecomposer.isCalledPPD()) {
                conditionList = predicateDecomposer.getSearchConditionList();
            }
        }

        query = PhoenixQueryBuilder.getInstance().buildQuery(jobConf, tableName,
                PhoenixStorageHandlerUtil.getReadColumnNames(jobConf), conditionList);

        final QueryPlan queryPlan = getQueryPlan(jobConf, query);
        final List<KeyRange> allSplits = queryPlan.getSplits();
        final List<InputSplit> splits = generateSplits(jobConf, queryPlan, allSplits, query);

        return splits.toArray(new InputSplit[splits.size()]);
    }

    private List<InputSplit> generateSplits(final JobConf jobConf, final QueryPlan qplan,
                                            final List<KeyRange> splits, String query) throws
            IOException {
        if (qplan == null){
            throw new NullPointerException();
        }if (splits == null){
            throw new NullPointerException();
        }
        final List<InputSplit> psplits = new ArrayList<>(splits.size());

        Path[] tablePaths = FileInputFormat.getInputPaths(ShimLoader.getHadoopShims()
                .newJobContext(new Job(jobConf)));
        boolean splitByStats = jobConf.getBoolean(PhoenixStorageHandlerConstants.SPLIT_BY_STATS,
                false);

        setScanCacheSize(jobConf);

        // Adding Localization
        try (org.apache.hadoop.hbase.client.Connection connection = ConnectionFactory.createConnection(PhoenixConnectionUtil.getConfiguration(jobConf))) {
        RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan
                .getTableRef().getTable().getPhysicalName().toString()));

        for (List<Scan> scans : qplan.getScans()) {
            PhoenixInputSplit inputSplit;

            HRegionLocation location = regionLocator.getRegionLocation(scans.get(0).getStartRow()
                    , false);
            RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection.getAdmin());
            long regionSize =  sizeCalculator.getRegionSize(location.getRegionInfo().getRegionName());
            String regionLocation = PhoenixStorageHandlerUtil.getRegionLocation(location, LOG);

            if (splitByStats) {
                for (Scan aScan : scans) {
                    if (LOG.isDebugEnabled()) {
                        LOG.debug("Split for  scan : " + aScan + "with scanAttribute : " + aScan
                                .getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : [" +
                                aScan.getCaching() + ", " + aScan.getCacheBlocks() + ", " + aScan
                                .getBatch() + "] and  regionLocation : " + regionLocation);
                    }

                    inputSplit = new PhoenixInputSplit(new ArrayList<>(Arrays.asList(aScan)), tablePaths[0],
                            regionLocation, regionSize);
                    inputSplit.setQuery(query);
                    psplits.add(inputSplit);
                }
            } else {
                if (LOG.isDebugEnabled()) {
                    LOG.debug("Scan count[" + scans.size() + "] : " + Bytes.toStringBinary(scans
                            .get(0).getStartRow()) + " ~ " + Bytes.toStringBinary(scans.get(scans
                            .size() - 1).getStopRow()));
                    LOG.debug("First scan : " + scans.get(0) + "with scanAttribute : " + scans
                            .get(0).getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : " +
                            "[" + scans.get(0).getCaching() + ", " + scans.get(0).getCacheBlocks()
                            + ", " + scans.get(0).getBatch() + "] and  regionLocation : " +
                            regionLocation);

                    for (int i = 0, limit = scans.size(); i < limit; i++) {
                        LOG.debug("EXPECTED_UPPER_REGION_KEY[" + i + "] : " + Bytes
                                .toStringBinary(scans.get(i).getAttribute
                                        (BaseScannerRegionObserverConstants.EXPECTED_UPPER_REGION_KEY)));
                    }
                }

                inputSplit = new PhoenixInputSplit(scans, tablePaths[0], regionLocation,
                        regionSize);
                inputSplit.setQuery(query);
                psplits.add(inputSplit);
            }
        }
		}

        return psplits;
    }

    private void setScanCacheSize(JobConf jobConf) {
        int scanCacheSize = jobConf.getInt(PhoenixStorageHandlerConstants.HBASE_SCAN_CACHE, -1);
        if (scanCacheSize > 0) {
            jobConf.setInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, scanCacheSize);
        }

        if (LOG.isDebugEnabled()) {
            LOG.debug("Generating splits with scanCacheSize : " + scanCacheSize);
        }
    }

    @Override
    public RecordReader<WritableComparable, T> getRecordReader(InputSplit split, JobConf job,
                                                               Reporter reporter) throws
            IOException {
        final QueryPlan queryPlan = getQueryPlan(job, ((PhoenixInputSplit) split).getQuery());
        @SuppressWarnings("unchecked")
        final Class<T> inputClass = (Class<T>) job.getClass(PhoenixConfigurationUtil.INPUT_CLASS,
                PhoenixResultWritable.class);

        PhoenixRecordReader<T> recordReader = new PhoenixRecordReader<T>(inputClass, job,
                queryPlan);
        recordReader.initialize(split);

        return recordReader;
    }

    /**
     * Returns the query plan associated with the select query.
     */
    private QueryPlan getQueryPlan(final Configuration configuration, String selectStatement)
            throws IOException {
        try {
            final String currentScnValue = configuration.get(PhoenixConfigurationUtil
                    .CURRENT_SCN_VALUE);
            final Properties overridingProps = new Properties();
            if (currentScnValue != null) {
                overridingProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, currentScnValue);
            }
            final Connection connection = PhoenixConnectionUtil.getInputConnection(configuration,
                    overridingProps);
            if (selectStatement == null) {
                throw new NullPointerException();
            }
            final Statement statement = connection.createStatement();
            final PhoenixStatement pstmt = statement.unwrap(PhoenixStatement.class);

            if (LOG.isDebugEnabled()) {
                LOG.debug("Compiled query : " + selectStatement);
            }

            // Optimize the query plan so that we potentially use secondary indexes
            final QueryPlan queryPlan = pstmt.optimizeQuery(selectStatement);
            // Initialize the query plan so it sets up the parallel scans
            queryPlan.iterator(MapReduceParallelScanGrouper.getInstance());
            return queryPlan;
        } catch (Exception exception) {
            LOG.error(String.format("Failed to get the query plan with error [%s]", exception.getMessage()));
            throw new RuntimeException(exception);
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -



