phoenix5-hive/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixInputFormat.java [71:253]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @SuppressWarnings({"deprecation", "rawtypes"}) public class PhoenixInputFormat implements InputFormat { private static final Logger LOG = LoggerFactory.getLogger(PhoenixInputFormat.class); public PhoenixInputFormat() { if (LOG.isDebugEnabled()) { LOG.debug("PhoenixInputFormat created"); } } @Override public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { String tableName = jobConf.get(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME); String query; String executionEngine = jobConf.get(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.getDefaultValue()); if (LOG.isDebugEnabled()) { LOG.debug("Target table name at split phase : " + tableName + "with whereCondition :" + jobConf.get(TableScanDesc.FILTER_TEXT_CONF_STR) + " and " + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " : " + executionEngine); } List conditionList = null; String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (filterExprSerialized != null) { ExprNodeGenericFuncDesc filterExpr = SerializationUtilities.deserializeExpression(filterExprSerialized); PhoenixPredicateDecomposer predicateDecomposer = PhoenixPredicateDecomposer .create(Arrays.asList(jobConf.get(serdeConstants.LIST_COLUMNS).split(","))); predicateDecomposer.decomposePredicate(filterExpr); if (predicateDecomposer.isCalledPPD()) { conditionList = predicateDecomposer.getSearchConditionList(); } } query = PhoenixQueryBuilder.getInstance().buildQuery(jobConf, tableName, PhoenixStorageHandlerUtil.getReadColumnNames(jobConf), conditionList); final QueryPlan queryPlan = getQueryPlan(jobConf, query); final List allSplits = queryPlan.getSplits(); final List splits = generateSplits(jobConf, queryPlan, allSplits, query); return splits.toArray(new InputSplit[splits.size()]); } private List generateSplits(final JobConf jobConf, final QueryPlan qplan, final List splits, String query) throws IOException { if (qplan == null){ throw new NullPointerException(); }if (splits == null){ throw new NullPointerException(); } final List psplits = new ArrayList<>(splits.size()); Path[] tablePaths = FileInputFormat.getInputPaths(ShimLoader.getHadoopShims() .newJobContext(new Job(jobConf))); boolean splitByStats = jobConf.getBoolean(PhoenixStorageHandlerConstants.SPLIT_BY_STATS, false); setScanCacheSize(jobConf); // Adding Localization try (org.apache.hadoop.hbase.client.Connection connection = ConnectionFactory.createConnection(PhoenixConnectionUtil.getConfiguration(jobConf))) { RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan .getTableRef().getTable().getPhysicalName().toString())); for (List scans : qplan.getScans()) { PhoenixInputSplit inputSplit; HRegionLocation location = regionLocator.getRegionLocation(scans.get(0).getStartRow() , false); RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection.getAdmin()); long regionSize = sizeCalculator.getRegionSize(location.getRegionInfo().getRegionName()); String regionLocation = PhoenixStorageHandlerUtil.getRegionLocation(location, LOG); if (splitByStats) { for (Scan aScan : scans) { if (LOG.isDebugEnabled()) { LOG.debug("Split for scan : " + aScan + "with scanAttribute : " + aScan .getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : [" + aScan.getCaching() + ", " + aScan.getCacheBlocks() + ", " + aScan .getBatch() + "] and regionLocation : " + regionLocation); } inputSplit = new PhoenixInputSplit(new ArrayList<>(Arrays.asList(aScan)), tablePaths[0], regionLocation, regionSize); inputSplit.setQuery(query); psplits.add(inputSplit); } } else { if (LOG.isDebugEnabled()) { LOG.debug("Scan count[" + scans.size() + "] : " + Bytes.toStringBinary(scans .get(0).getStartRow()) + " ~ " + Bytes.toStringBinary(scans.get(scans .size() - 1).getStopRow())); LOG.debug("First scan : " + scans.get(0) + "with scanAttribute : " + scans .get(0).getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : " + "[" + scans.get(0).getCaching() + ", " + scans.get(0).getCacheBlocks() + ", " + scans.get(0).getBatch() + "] and regionLocation : " + regionLocation); for (int i = 0, limit = scans.size(); i < limit; i++) { LOG.debug("EXPECTED_UPPER_REGION_KEY[" + i + "] : " + Bytes .toStringBinary(scans.get(i).getAttribute (BaseScannerRegionObserverConstants.EXPECTED_UPPER_REGION_KEY))); } } inputSplit = new PhoenixInputSplit(scans, tablePaths[0], regionLocation, regionSize); inputSplit.setQuery(query); psplits.add(inputSplit); } } } return psplits; } private void setScanCacheSize(JobConf jobConf) { int scanCacheSize = jobConf.getInt(PhoenixStorageHandlerConstants.HBASE_SCAN_CACHE, -1); if (scanCacheSize > 0) { jobConf.setInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, scanCacheSize); } if (LOG.isDebugEnabled()) { LOG.debug("Generating splits with scanCacheSize : " + scanCacheSize); } } @Override public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { final QueryPlan queryPlan = getQueryPlan(job, ((PhoenixInputSplit) split).getQuery()); @SuppressWarnings("unchecked") final Class inputClass = (Class) job.getClass(PhoenixConfigurationUtil.INPUT_CLASS, PhoenixResultWritable.class); PhoenixRecordReader recordReader = new PhoenixRecordReader(inputClass, job, queryPlan); recordReader.initialize(split); return recordReader; } /** * Returns the query plan associated with the select query. */ private QueryPlan getQueryPlan(final Configuration configuration, String selectStatement) throws IOException { try { final String currentScnValue = configuration.get(PhoenixConfigurationUtil .CURRENT_SCN_VALUE); final Properties overridingProps = new Properties(); if (currentScnValue != null) { overridingProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, currentScnValue); } final Connection connection = PhoenixConnectionUtil.getInputConnection(configuration, overridingProps); if (selectStatement == null) { throw new NullPointerException(); } final Statement statement = connection.createStatement(); final PhoenixStatement pstmt = statement.unwrap(PhoenixStatement.class); if (LOG.isDebugEnabled()) { LOG.debug("Compiled query : " + selectStatement); } // Optimize the query plan so that we potentially use secondary indexes final QueryPlan queryPlan = pstmt.optimizeQuery(selectStatement); // Initialize the query plan so it sets up the parallel scans queryPlan.iterator(MapReduceParallelScanGrouper.getInstance()); return queryPlan; } catch (Exception exception) { LOG.error(String.format("Failed to get the query plan with error [%s]", exception.getMessage())); throw new RuntimeException(exception); - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - phoenix5-hive4/src/main/java/org/apache/phoenix/hive/mapreduce/PhoenixInputFormat.java [71:253]: - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - @SuppressWarnings({"deprecation", "rawtypes"}) public class PhoenixInputFormat implements InputFormat { private static final Logger LOG = LoggerFactory.getLogger(PhoenixInputFormat.class); public PhoenixInputFormat() { if (LOG.isDebugEnabled()) { LOG.debug("PhoenixInputFormat created"); } } @Override public InputSplit[] getSplits(JobConf jobConf, int numSplits) throws IOException { String tableName = jobConf.get(PhoenixStorageHandlerConstants.PHOENIX_TABLE_NAME); String query; String executionEngine = jobConf.get(HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.getDefaultValue()); if (LOG.isDebugEnabled()) { LOG.debug("Target table name at split phase : " + tableName + "with whereCondition :" + jobConf.get(TableScanDesc.FILTER_TEXT_CONF_STR) + " and " + HiveConf.ConfVars.HIVE_EXECUTION_ENGINE.varname + " : " + executionEngine); } List conditionList = null; String filterExprSerialized = jobConf.get(TableScanDesc.FILTER_EXPR_CONF_STR); if (filterExprSerialized != null) { ExprNodeGenericFuncDesc filterExpr = SerializationUtilities.deserializeExpression(filterExprSerialized); PhoenixPredicateDecomposer predicateDecomposer = PhoenixPredicateDecomposer .create(Arrays.asList(jobConf.get(serdeConstants.LIST_COLUMNS).split(","))); predicateDecomposer.decomposePredicate(filterExpr); if (predicateDecomposer.isCalledPPD()) { conditionList = predicateDecomposer.getSearchConditionList(); } } query = PhoenixQueryBuilder.getInstance().buildQuery(jobConf, tableName, PhoenixStorageHandlerUtil.getReadColumnNames(jobConf), conditionList); final QueryPlan queryPlan = getQueryPlan(jobConf, query); final List allSplits = queryPlan.getSplits(); final List splits = generateSplits(jobConf, queryPlan, allSplits, query); return splits.toArray(new InputSplit[splits.size()]); } private List generateSplits(final JobConf jobConf, final QueryPlan qplan, final List splits, String query) throws IOException { if (qplan == null){ throw new NullPointerException(); }if (splits == null){ throw new NullPointerException(); } final List psplits = new ArrayList<>(splits.size()); Path[] tablePaths = FileInputFormat.getInputPaths(ShimLoader.getHadoopShims() .newJobContext(new Job(jobConf))); boolean splitByStats = jobConf.getBoolean(PhoenixStorageHandlerConstants.SPLIT_BY_STATS, false); setScanCacheSize(jobConf); // Adding Localization try (org.apache.hadoop.hbase.client.Connection connection = ConnectionFactory.createConnection(PhoenixConnectionUtil.getConfiguration(jobConf))) { RegionLocator regionLocator = connection.getRegionLocator(TableName.valueOf(qplan .getTableRef().getTable().getPhysicalName().toString())); for (List scans : qplan.getScans()) { PhoenixInputSplit inputSplit; HRegionLocation location = regionLocator.getRegionLocation(scans.get(0).getStartRow() , false); RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(regionLocator, connection.getAdmin()); long regionSize = sizeCalculator.getRegionSize(location.getRegionInfo().getRegionName()); String regionLocation = PhoenixStorageHandlerUtil.getRegionLocation(location, LOG); if (splitByStats) { for (Scan aScan : scans) { if (LOG.isDebugEnabled()) { LOG.debug("Split for scan : " + aScan + "with scanAttribute : " + aScan .getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : [" + aScan.getCaching() + ", " + aScan.getCacheBlocks() + ", " + aScan .getBatch() + "] and regionLocation : " + regionLocation); } inputSplit = new PhoenixInputSplit(new ArrayList<>(Arrays.asList(aScan)), tablePaths[0], regionLocation, regionSize); inputSplit.setQuery(query); psplits.add(inputSplit); } } else { if (LOG.isDebugEnabled()) { LOG.debug("Scan count[" + scans.size() + "] : " + Bytes.toStringBinary(scans .get(0).getStartRow()) + " ~ " + Bytes.toStringBinary(scans.get(scans .size() - 1).getStopRow())); LOG.debug("First scan : " + scans.get(0) + "with scanAttribute : " + scans .get(0).getAttributesMap() + " [scanCache, cacheBlock, scanBatch] : " + "[" + scans.get(0).getCaching() + ", " + scans.get(0).getCacheBlocks() + ", " + scans.get(0).getBatch() + "] and regionLocation : " + regionLocation); for (int i = 0, limit = scans.size(); i < limit; i++) { LOG.debug("EXPECTED_UPPER_REGION_KEY[" + i + "] : " + Bytes .toStringBinary(scans.get(i).getAttribute (BaseScannerRegionObserverConstants.EXPECTED_UPPER_REGION_KEY))); } } inputSplit = new PhoenixInputSplit(scans, tablePaths[0], regionLocation, regionSize); inputSplit.setQuery(query); psplits.add(inputSplit); } } } return psplits; } private void setScanCacheSize(JobConf jobConf) { int scanCacheSize = jobConf.getInt(PhoenixStorageHandlerConstants.HBASE_SCAN_CACHE, -1); if (scanCacheSize > 0) { jobConf.setInt(HConstants.HBASE_CLIENT_SCANNER_CACHING, scanCacheSize); } if (LOG.isDebugEnabled()) { LOG.debug("Generating splits with scanCacheSize : " + scanCacheSize); } } @Override public RecordReader getRecordReader(InputSplit split, JobConf job, Reporter reporter) throws IOException { final QueryPlan queryPlan = getQueryPlan(job, ((PhoenixInputSplit) split).getQuery()); @SuppressWarnings("unchecked") final Class inputClass = (Class) job.getClass(PhoenixConfigurationUtil.INPUT_CLASS, PhoenixResultWritable.class); PhoenixRecordReader recordReader = new PhoenixRecordReader(inputClass, job, queryPlan); recordReader.initialize(split); return recordReader; } /** * Returns the query plan associated with the select query. */ private QueryPlan getQueryPlan(final Configuration configuration, String selectStatement) throws IOException { try { final String currentScnValue = configuration.get(PhoenixConfigurationUtil .CURRENT_SCN_VALUE); final Properties overridingProps = new Properties(); if (currentScnValue != null) { overridingProps.put(PhoenixRuntime.CURRENT_SCN_ATTRIB, currentScnValue); } final Connection connection = PhoenixConnectionUtil.getInputConnection(configuration, overridingProps); if (selectStatement == null) { throw new NullPointerException(); } final Statement statement = connection.createStatement(); final PhoenixStatement pstmt = statement.unwrap(PhoenixStatement.class); if (LOG.isDebugEnabled()) { LOG.debug("Compiled query : " + selectStatement); } // Optimize the query plan so that we potentially use secondary indexes final QueryPlan queryPlan = pstmt.optimizeQuery(selectStatement); // Initialize the query plan so it sets up the parallel scans queryPlan.iterator(MapReduceParallelScanGrouper.getInstance()); return queryPlan; } catch (Exception exception) { LOG.error(String.format("Failed to get the query plan with error [%s]", exception.getMessage())); throw new RuntimeException(exception); - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -