public void setConf()

in core/src/main/java/org/apache/stormcrawler/protocol/RobotRulesParser.java [100:166]


    public void setConf(Config conf) {

        // Grab the agent names we advertise to robots files.
        String agentName = ConfUtils.getString(conf, "http.agent.name");
        if (null == agentName) {
            throw new RuntimeException("Agent name not configured!");
        }

        agentName = agentName.toLowerCase(Locale.ROOT);
        checkAgentValue(agentName);

        ArrayList<String> agents = new ArrayList<>();

        List<String> configuredAgentNames = ConfUtils.loadListFromConf("http.robots.agents", conf);
        // backward compatibility
        // if it has a single entry - parse it
        if (configuredAgentNames.size() == 1) {
            StringTokenizer tok = new StringTokenizer(configuredAgentNames.get(0), ",");
            while (tok.hasMoreTokens()) {
                String agent = tok.nextToken().trim().toLowerCase(Locale.ROOT);
                checkAgentValue(agent);
                agents.add(agent);
            }
        } else {
            for (String ag : configuredAgentNames) {
                String agent = ag.trim().toLowerCase(Locale.ROOT);
                checkAgentValue(agent);
                agents.add(agent);
            }
        }

        /*
         * If there are no agents for robots-parsing, use the default agent-string. If
         * both are present, our agent-string should be the first one we advertise to
         * robots-parsing.
         */
        if (agents.isEmpty()) {
            LOG.info(
                    "No agents listed in 'http.robots.agents' property! Using http.agent.name [{}]",
                    agentName);
            this.agentNames.add(agentName.toLowerCase(Locale.ROOT));
        } else {
            int index = 0;
            if ((agents.get(0)).equalsIgnoreCase(agentName)) {
                index++;
            } else {
                LOG.info(
                        "Agent we advertise ({}) not listed first in 'http.robots.agents' property!",
                        agentName);
            }

            // append all the agents from the http.robots.agents property
            for (; index < agents.size(); index++) {
                agentNames.add(agents.get(index));
            }
        }

        String spec =
                ConfUtils.getString(
                        conf, cacheConfigParamName, "maximumSize=10000,expireAfterWrite=6h");
        CACHE = Caffeine.from(spec).build();

        spec =
                ConfUtils.getString(
                        conf, errorcacheConfigParamName, "maximumSize=10000,expireAfterWrite=1h");
        ERRORCACHE = Caffeine.from(spec).build();
    }