private int initializeHadoopLogsAnalyzer()

in hadoop-tools/hadoop-rumen/src/main/java/org/apache/hadoop/tools/rumen/HadoopLogsAnalyzer.java [311:510]


  private int initializeHadoopLogsAnalyzer(String[] args)
      throws FileNotFoundException, IOException {
    Path jobTraceFilename = null;
    Path topologyFilename = null;
    if (args.length == 0 || args[args.length - 1].charAt(0) == '-') {
      throw new IllegalArgumentException("No input specified.");
    } else {
      inputFilename = args[args.length - 1];
    }

    for (int i = 0; i < args.length - (inputFilename == null ? 0 : 1); ++i) {
      if (StringUtils.equalsIgnoreCase("-h", args[i])
          || StringUtils.equalsIgnoreCase("-help", args[i])) {
        usage();
        return 0;
      }

      if (StringUtils.equalsIgnoreCase("-c", args[i])
          || StringUtils.equalsIgnoreCase("-collect-prefixes", args[i])) {
        collecting = true;
        continue;
      }

      // these control the job digest
      if (StringUtils.equalsIgnoreCase("-write-job-trace", args[i])) {
        ++i;
        jobTraceFilename = new Path(args[i]);
        continue;
      }

      if (StringUtils.equalsIgnoreCase("-single-line-job-traces", args[i])) {
        prettyprintTrace = false;
        continue;
      }

      if (StringUtils.equalsIgnoreCase("-omit-task-details", args[i])) {
        omitTaskDetails = true;
        continue;
      }

      if (StringUtils.equalsIgnoreCase("-write-topology", args[i])) {
        ++i;
        topologyFilename = new Path(args[i]);
        continue;
      }

      if (StringUtils.equalsIgnoreCase("-job-digest-spectra", args[i])) {
        ArrayList<Integer> values = new ArrayList<Integer>();

        ++i;

        while (i < args.length && Character.isDigit(args[i].charAt(0))) {
          values.add(Integer.parseInt(args[i]));
          ++i;
        }

        if (values.size() == 0) {
          throw new IllegalArgumentException("Empty -job-digest-spectra list");
        }

        attemptTimesPercentiles = new int[values.size()];

        int lastValue = 0;

        for (int j = 0; j < attemptTimesPercentiles.length; ++j) {
          if (values.get(j) <= lastValue || values.get(j) >= 100) {
            throw new IllegalArgumentException(
                "Bad -job-digest-spectra percentiles list");
          }
          attemptTimesPercentiles[j] = values.get(j);
        }

        --i;
        continue;
      }

      if (StringUtils.equalsIgnoreCase("-d", args[i])
          || StringUtils.equalsIgnoreCase("-debug", args[i])) {
        debug = true;
        continue;
      }

      if (StringUtils.equalsIgnoreCase("-spreads", args[i])) {
        int min = Integer.parseInt(args[i + 1]);
        int max = Integer.parseInt(args[i + 2]);

        if (min < max && min < 1000 && max < 1000) {
          spreadMin = min;
          spreadMax = max;
          spreading = true;
          i += 2;
        }
        continue;
      }

      // These control log-wide CDF outputs
      if (StringUtils.equalsIgnoreCase("-delays", args[i])) {
        delays = true;
        continue;
      }

      if (StringUtils.equalsIgnoreCase("-runtimes", args[i])) {
        runtimes = true;
        continue;
      }

      if (StringUtils.equalsIgnoreCase("-tasktimes", args[i])) {
        collectTaskTimes = true;
        continue;
      }

      if (StringUtils.equalsIgnoreCase("-v1", args[i])) {
        version = 1;
        continue;
      }

      throw new IllegalArgumentException("Unrecognized argument: " + args[i]);
    }

    runTimeDists = newDistributionBlock();
    delayTimeDists = newDistributionBlock();
    mapTimeSpreadDists = newDistributionBlock("map-time-spreads");
    shuffleTimeSpreadDists = newDistributionBlock();
    sortTimeSpreadDists = newDistributionBlock();
    reduceTimeSpreadDists = newDistributionBlock();

    mapTimeDists = newDistributionBlock();
    shuffleTimeDists = newDistributionBlock();
    sortTimeDists = newDistributionBlock();
    reduceTimeDists = newDistributionBlock();

    taskAttemptStartTimes = new HashMap<String, Long>();
    taskReduceAttemptShuffleEndTimes = new HashMap<String, Long>();
    taskReduceAttemptSortEndTimes = new HashMap<String, Long>();
    taskMapAttemptFinishTimes = new HashMap<String, Long>();
    taskReduceAttemptFinishTimes = new HashMap<String, Long>();

    final Path inputPath = new Path(inputFilename);

    inputIsDirectory = pathIsDirectory(inputPath);

    if (jobTraceFilename != null && attemptTimesPercentiles == null) {
      attemptTimesPercentiles = new int[19];

      for (int i = 0; i < 19; ++i) {
        attemptTimesPercentiles[i] = (i + 1) * 5;
      }
    }

    if (!inputIsDirectory) {
      input = maybeUncompressedPath(inputPath);
    } else {
      inputDirectoryPath = inputPath;
      FileSystem fs = inputPath.getFileSystem(getConf());
      FileStatus[] statuses = fs.listStatus(inputPath);
      inputDirectoryFiles = new String[statuses.length];

      for (int i = 0; i < statuses.length; ++i) {
        inputDirectoryFiles[i] = statuses[i].getPath().getName();
      }

      // filter out the .crc files, if any
      int dropPoint = 0;

      for (int i = 0; i < inputDirectoryFiles.length; ++i) {
        String name = inputDirectoryFiles[i];

        if (!(name.length() >= 4 && ".crc".equals(name
            .substring(name.length() - 4)))) {
          inputDirectoryFiles[dropPoint++] = name;
        }
      }

      LOG.info("We dropped " + (inputDirectoryFiles.length - dropPoint)
          + " crc files.");

      String[] new_inputDirectoryFiles = new String[dropPoint];
      System.arraycopy(inputDirectoryFiles, 0, new_inputDirectoryFiles, 0,
          dropPoint);
      inputDirectoryFiles = new_inputDirectoryFiles;

      Arrays.sort(inputDirectoryFiles);

      if (!setNextDirectoryInputStream()) {
        throw new FileNotFoundException("Empty directory specified.");
      }
    }

    if (jobTraceFilename != null) {
      jobTraceGen = new DefaultOutputter<LoggedJob>();
      jobTraceGen.init(jobTraceFilename, getConf());

      if (topologyFilename != null) {
        topologyGen = new DefaultOutputter<LoggedNetworkTopology>();
        topologyGen.init(topologyFilename, getConf());
      }
    }

    return 0;
  }