static int prefork_run()

in server/mpm/prefork/prefork.c [926:1290]


static int prefork_run(apr_pool_t *_pconf, apr_pool_t *plog, server_rec *s)
{
    ap_listen_rec **listen_buckets = NULL;
    int num_buckets = retained->mpm->num_buckets;
    int remaining_children_to_start;
    apr_status_t rv;
    char id[16];
    int i;

    ap_log_pid(pconf, ap_pid_fname);

    /* On first startup create gen_pool to satisfy the lifetime of the
     * parent's PODs and listeners; on restart stop the children from the
     * previous generation and clear gen_pool for the next one.
     */
    if (!retained->gen_pool) {
        apr_pool_create(&retained->gen_pool, ap_pglobal);
    }
    else {
        if (retained->mpm->was_graceful) {
            /* kill off the idle ones */
            for (i = 0; i < num_buckets; i++) {
                ap_mpm_pod_killpg(retained->buckets[i].pod,
                                  retained->max_daemons_limit);
            }

            /* This is mostly for debugging... so that we know what is still
             * gracefully dealing with existing request.  This will break
             * in a very nasty way if we ever have the scoreboard totally
             * file-based (no shared memory)
             */
            for (i = 0; i < ap_daemons_limit; ++i) {
                if (ap_scoreboard_image->servers[i][0].status != SERVER_DEAD) {
                    ap_scoreboard_image->servers[i][0].status = SERVER_GRACEFUL;
                    /* Ask each child to close its listeners.
                     *
                     * NOTE: we use the scoreboard, because if we send SIGUSR1
                     * to every process in the group, this may include CGI's,
                     * piped loggers, etc. They almost certainly won't handle
                     * it gracefully.
                     */
                    ap_mpm_safe_kill(ap_scoreboard_image->parent[i].pid,
                                     AP_SIG_GRACEFUL);
                }
            }
        }
        else {
            /* Kill 'em off */
            if (ap_unixd_killpg(getpgrp(), SIGHUP) < 0) {
                ap_log_error(APLOG_MARK, APLOG_WARNING, errno,
                             ap_server_conf, APLOGNO(00172) "killpg SIGHUP");
            }
            ap_reclaim_child_processes(0, /* Not when just starting up */
                                       prefork_note_child_killed);
        }
        apr_pool_clear(retained->gen_pool);
        retained->buckets = NULL;

        /* advance to the next generation */
        /* XXX: we really need to make sure this new generation number isn't in
         * use by any of the children.
         */
        ++retained->mpm->my_generation;
    }

    if (!retained->mpm->was_graceful) {
        if (ap_run_pre_mpm(s->process->pool, SB_SHARED) != OK) {
            retained->mpm->mpm_state = AP_MPMQ_STOPPING;
            return !OK;
        }
        num_buckets = (one_process) ? 1 : 0; /* one_process => one bucket */
        retained->idle_spawn_rate = 1; /* reset idle_spawn_rate */
    }

    /* Now on for the new generation. */
    ap_scoreboard_image->global->running_generation = retained->mpm->my_generation;
    ap_unixd_mpm_set_signals(pconf, one_process);

    if ((rv = ap_duplicate_listeners(retained->gen_pool, ap_server_conf,
                                     &listen_buckets, &num_buckets))) {
        ap_log_error(APLOG_MARK, APLOG_CRIT, rv,
                     ap_server_conf, APLOGNO(03280)
                     "could not duplicate listeners");
        return !OK;
    }

    retained->buckets = apr_pcalloc(retained->gen_pool,
                                    num_buckets * sizeof(*retained->buckets));
    for (i = 0; i < num_buckets; i++) {
        if (!one_process /* no POD in one_process mode */
                && (rv = ap_mpm_pod_open(retained->gen_pool,
                                         &retained->buckets[i].pod))) {
            ap_log_error(APLOG_MARK, APLOG_CRIT, rv,
                         ap_server_conf, APLOGNO(03281)
                         "could not open pipe-of-death");
            return !OK;
        }
        /* Initialize cross-process accept lock (safe accept needed only) */
        if ((rv = SAFE_ACCEPT((apr_snprintf(id, sizeof id, "%i", i),
                               ap_proc_mutex_create(&retained->buckets[i].mutex,
                                                    NULL, AP_ACCEPT_MUTEX_TYPE,
                                                    id, s, retained->gen_pool,
                                                    0))))) {
            ap_log_error(APLOG_MARK, APLOG_CRIT, rv,
                         ap_server_conf, APLOGNO(03282)
                         "could not create accept mutex");
            return !OK;
        }
        retained->buckets[i].listeners = listen_buckets[i];
    }
    retained->mpm->num_buckets = num_buckets;

    /* Don't thrash since num_buckets depends on the
     * system and the number of online CPU cores...
     */
    if (ap_daemons_limit < num_buckets)
        ap_daemons_limit = num_buckets;
    if (ap_daemons_to_start < num_buckets)
        ap_daemons_to_start = num_buckets;
    if (ap_daemons_min_free < num_buckets)
        ap_daemons_min_free = num_buckets;
    if (ap_daemons_max_free < ap_daemons_min_free + num_buckets)
        ap_daemons_max_free = ap_daemons_min_free + num_buckets;

    /* If we're doing a graceful_restart then we're going to see a lot
     * of children exiting immediately when we get into the main loop
     * below (because we just sent them AP_SIG_GRACEFUL).  This happens pretty
     * rapidly... and for each one that exits we'll start a new one until
     * we reach at least daemons_min_free.  But we may be permitted to
     * start more than that, so we'll just keep track of how many we're
     * supposed to start up without the 1 second penalty between each fork.
     */
    remaining_children_to_start = ap_daemons_to_start;
    if (remaining_children_to_start > ap_daemons_limit) {
        remaining_children_to_start = ap_daemons_limit;
    }
    if (!retained->mpm->was_graceful) {
        startup_children(remaining_children_to_start);
        remaining_children_to_start = 0;
    }
    else {
        /* give the system some time to recover before kicking into
         * exponential mode
         */
        retained->hold_off_on_exponential_spawning = 10;
    }

    ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00163)
                "%s configured -- resuming normal operations",
                ap_get_server_description());
    ap_log_error(APLOG_MARK, APLOG_INFO, 0, ap_server_conf, APLOGNO(00164)
                "Server built: %s", ap_get_server_built());
    ap_log_command_line(plog, s);
    ap_log_mpm_common(s);
    ap_log_error(APLOG_MARK, APLOG_DEBUG, 0, ap_server_conf, APLOGNO(00165)
                "Accept mutex: %s (default: %s)",
                (retained->buckets[0].mutex)
                    ? apr_proc_mutex_name(retained->buckets[0].mutex)
                    : "none",
                apr_proc_mutex_defname());

    retained->mpm->mpm_state = AP_MPMQ_RUNNING;

    if (one_process) {
        AP_MONCONTROL(1);
        make_child(ap_server_conf, 0);
        /* NOTREACHED */
        ap_assert(0);
        return !OK;
    }

    while (!retained->mpm->restart_pending && !retained->mpm->shutdown_pending) {
        int child_slot;
        apr_exit_why_e exitwhy;
        int status, processed_status;
        /* this is a memory leak, but I'll fix it later. */
        apr_proc_t pid;

        ap_wait_or_timeout(&exitwhy, &status, &pid, pconf, ap_server_conf);

        /* XXX: if it takes longer than 1 second for all our children
         * to start up and get into IDLE state then we may spawn an
         * extra child
         */
        if (pid.pid != -1) {
            processed_status = ap_process_child_status(&pid, exitwhy, status);
            child_slot = ap_find_child_by_pid(&pid);
            if (processed_status == APEXIT_CHILDFATAL) {
                /* fix race condition found in PR 39311
                 * A child created at the same time as a graceful happens 
                 * can find the lock missing and create a fatal error.
                 * It is not fatal for the last generation to be in this state.
                 */
                if (child_slot < 0
                    || ap_get_scoreboard_process(child_slot)->generation
                       == retained->mpm->my_generation) {
                    retained->mpm->mpm_state = AP_MPMQ_STOPPING;
                    return !OK;
                }
                else {
                    ap_log_error(APLOG_MARK, APLOG_WARNING, 0, ap_server_conf, APLOGNO(00166)
                                 "Ignoring fatal error in child of previous "
                                 "generation (pid %ld).",
                                 (long)pid.pid);
                }
            }

            /* non-fatal death... note that it's gone in the scoreboard. */
            if (child_slot >= 0) {
                (void) ap_update_child_status_from_indexes(child_slot, 0, SERVER_DEAD,
                                                           (request_rec *) NULL);
                prefork_note_child_killed(child_slot, 0, 0);
                if (processed_status == APEXIT_CHILDSICK) {
                    /* child detected a resource shortage (E[NM]FILE, ENOBUFS, etc)
                     * cut the fork rate to the minimum
                     */
                    retained->idle_spawn_rate = 1;
                }
                else if (remaining_children_to_start
                    && child_slot < ap_daemons_limit) {
                    /* we're still doing a 1-for-1 replacement of dead
                     * children with new children
                     */
                    make_child(ap_server_conf, child_slot);
                    --remaining_children_to_start;
                }
#if APR_HAS_OTHER_CHILD
            }
            else if (apr_proc_other_child_alert(&pid, APR_OC_REASON_DEATH, status) == APR_SUCCESS) {
                /* handled */
#endif
            }
            else if (retained->mpm->was_graceful) {
                /* Great, we've probably just lost a slot in the
                 * scoreboard.  Somehow we don't know about this
                 * child.
                 */
                ap_log_error(APLOG_MARK, APLOG_WARNING,
                            0, ap_server_conf, APLOGNO(00167)
                            "long lost child came home! (pid %ld)", (long)pid.pid);
            }
            /* Don't perform idle maintenance when a child dies,
             * only do it when there's a timeout.  Remember only a
             * finite number of children can die, and it's pretty
             * pathological for a lot to die suddenly.
             */
            continue;
        }
        else if (remaining_children_to_start) {
            /* we hit a 1 second timeout in which none of the previous
             * generation of children needed to be reaped... so assume
             * they're all done, and pick up the slack if any is left.
             */
            startup_children(remaining_children_to_start);
            remaining_children_to_start = 0;
            /* In any event we really shouldn't do the code below because
             * few of the servers we just started are in the IDLE state
             * yet, so we'd mistakenly create an extra server.
             */
            continue;
        }

        perform_idle_server_maintenance(pconf);
    }

    retained->mpm->mpm_state = AP_MPMQ_STOPPING;

    if (retained->mpm->shutdown_pending && retained->mpm->is_ungraceful) {
        /* Time to shut down:
         * Kill child processes, tell them to call child_exit, etc...
         */
        if (ap_unixd_killpg(getpgrp(), SIGTERM) < 0) {
            ap_log_error(APLOG_MARK, APLOG_WARNING, errno, ap_server_conf, APLOGNO(00168) "killpg SIGTERM");
        }
        ap_reclaim_child_processes(1, /* Start with SIGTERM */
                                   prefork_note_child_killed);

        /* cleanup pid file on normal shutdown */
        ap_remove_pid(pconf, ap_pid_fname);
        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00169)
                    "caught SIGTERM, shutting down");

        return DONE;
    }

    if (retained->mpm->shutdown_pending) {
        /* Time to perform a graceful shut down:
         * Reap the inactive children, and ask the active ones
         * to close their listeners, then wait until they are
         * all done to exit.
         */
        int active_children;
        apr_time_t cutoff = 0;

        /* Stop listening */
        ap_close_listeners();

        /* kill off the idle ones */
        for (i = 0; i < num_buckets; i++) {
            ap_mpm_pod_killpg(retained->buckets[i].pod,
                              retained->max_daemons_limit);
        }

        /* Send SIGUSR1 to the active children */
        active_children = 0;
        for (i = 0; i < ap_daemons_limit; ++i) {
            if (ap_scoreboard_image->servers[i][0].status != SERVER_DEAD) {
                /* Ask each child to close its listeners. */
                ap_mpm_safe_kill(MPM_CHILD_PID(i), AP_SIG_GRACEFUL);
                active_children++;
            }
        }

        /* Allow each child which actually finished to exit */
        ap_relieve_child_processes(prefork_note_child_killed);

        /* cleanup pid file */
        ap_remove_pid(pconf, ap_pid_fname);
        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00170)
           "caught " AP_SIG_GRACEFUL_STOP_STRING ", shutting down gracefully");

        if (ap_graceful_shutdown_timeout) {
            cutoff = apr_time_now() +
                     apr_time_from_sec(ap_graceful_shutdown_timeout);
        }

        /* Don't really exit until each child has finished */
        retained->mpm->shutdown_pending = 0;
        do {
            /* Pause for a second */
            sleep(1);

            /* Relieve any children which have now exited */
            ap_relieve_child_processes(prefork_note_child_killed);

            active_children = 0;
            for (i = 0; i < ap_daemons_limit; ++i) {
                if (ap_mpm_safe_kill(MPM_CHILD_PID(i), 0) == APR_SUCCESS) {
                    active_children = 1;
                    /* Having just one child is enough to stay around */
                    break;
                }
            }
        } while (!retained->mpm->shutdown_pending && active_children &&
                 (!ap_graceful_shutdown_timeout || apr_time_now() < cutoff));

        /* We might be here because we received SIGTERM, either
         * way, try and make sure that all of our processes are
         * really dead.
         */
        ap_unixd_killpg(getpgrp(), SIGTERM);

        return DONE;
    }

    if (!retained->mpm->is_ungraceful) {
        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00171)
                    "Graceful restart requested, doing restart");
    }
    else {
        ap_log_error(APLOG_MARK, APLOG_NOTICE, 0, ap_server_conf, APLOGNO(00173)
                    "SIGHUP received.  Attempting to restart");
    }
    return OK;
}