static/downloads/mirrors.cgi [1:274]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#!/usr/local/bin/python2.7
# -*- python -*-
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# This is a Python CGI script that uses EZT to produce templated
# mirror content and GeoIP to choose the appropriate mirror
#

import sys
import os
import cgi
import stat
import random
import time
import traceback
import cStringIO

# Should be installed in the main system library
import GeoIP

# Insert this directory into PATH so that we can import ezt
this_dir = os.path.dirname(__file__) or '.'
sys.path.insert(0, this_dir)

import ezt


# Configurable stuff
MIRRORS_LIST = "/x1/www/www.apache.org/mirrors/mirrors.list"
DEFAULT_REGION = "us"
DEFAULT_TEMPLATE = "/x1/www/www.apache.org/dyn/closer.html"
DEFAULT_LOCATION = "http://www.apache.org/dyn/closer.cgi"


def get_region(environ):
  """Use GeoIP to find the client's country, falling back to
     DEFAULT_REGION on failure."""
  try:
    remote_ip = environ['REMOTE_ADDR'];
    gi = GeoIP.new(GeoIP.GEOIP_STANDARD)
    region = gi.country_code_by_addr(remote_ip).lower().strip()
    if region == 'gb':
      return 'uk'
    else:
      return region
  except:
    ### should we log an error here? absorbing without reporting is
    ### generally bad form.
    return DEFAULT_REGION


def parse_mirrors(filename, country, preferred, mingood):
  """Parse the mirror database to find the best mirrors for a client.

  The Format of the mirror database is (last two fields are optional):
  ftp au  ftp://ftp.planetmirror.com/pub/apache/dist/ 1117724635 http://example.com/logo.gif http://example.com/ """

  output = { 'http'   : [ ],
             'ftp'    : [ ],
             'backup' : [ ],
             'preferred' : None,
             'logo' : None,
             'link' : None,
             }

  # Read the mirror database and put it in a list of lists
  # skip empty lines and comment
  mirrors = [line.split() for line in open(filename).readlines()
             if line.strip() and not line.startswith('#')]

  mirrors.append(['http', 'us', 'http://archive.apache.org/dist/', '9999999999'])

  # Add trailing slashes where missing.  Otherwise,
  # strcat("http://www.mirror.org", pathinfo=".foo.evil") would link to
  # http://www.mirror.org.foo.evil
  for mir in mirrors:
    if not mir[2].endswith('/'):
      mir[2] += '/'

  # grab the backup mirrors
  backupmirrors = [mir for mir in mirrors if mir[1] == 'Backup']

  # Grab the mirrors for the requested country or, failing that,
  # from the default region (us)
  for region in (country, DEFAULT_REGION):
    countrymirrors = [mir for mir in mirrors if mir[1] == region]
    random.shuffle(countrymirrors)
    goodmirror = None
    for mir in countrymirrors:
      if mir[0] == 'http' and int(mir[3]) > mingood:
        goodmirror = mir
        break
    if goodmirror:
      break

  # Check if the requested Preferred mirror is in the list
  # Note the user-requested mirror doesn't have a trailing-slash
  prefmir = None
  if preferred:
    for mir in mirrors:
      if mir[2] == preferred:
        prefmir = mir
        break
  # Otherwise pick a preferred mirror from our country
  if not prefmir and goodmirror:
    prefmir = goodmirror
  if not prefmir:  # In the worst case, choose a backup
    prefmir = random.choice(backupmirrors)

  # Record the preferred mirror and, if available, its logo and link
  # Keep the trailing-slash on the URL (it is later joined to the path_info)
  output['preferred'] = prefmir[2]
  if len(prefmir) > 5:
    output['logo'] = prefmir[4]
    output['link'] = prefmir[5]

  # Now assemble a list of all the other mirrors.
  # Keep the trailing-slash on the URL (it is later joined to the path_info)
  output['http'] = [mir[2] for mir in countrymirrors if mir[0] == 'http']
  output['ftp'] = [mir[2] for mir in countrymirrors if mir[0] == 'ftp']
  output['backup'] = [mir[2] for mir in backupmirrors]

  return output


def mirrorwrap(environ, start_response):
  try:
    return mirrorsapp(environ, start_response)
  except:
    status = "500 Oops"
    response_headers = [("content-type","text/plain")]
    start_response(status, response_headers, sys.exc_info())
    return ["Problem running mirror.cgi, contact <infrastructure@apache.org> "
            "if it persists.\n\n"
            + traceback.format_exc() ]


def locate_template(environ):
  # Determine the correct template by noting our filesystem location
  if environ.has_key('ASF_MIRROR_FILENAME'):
    template_file = environ['ASF_MIRROR_FILENAME'].replace(".cgi", ".html")
  elif environ.has_key('SCRIPT_FILENAME'):
    template_file = environ['SCRIPT_FILENAME'].replace(".cgi", ".html")
  else:
    template_file = sys.argv[0].replace(".cgi", ".html")

  if not os.path.isfile(template_file):
    # look in docroot instead if this is in a cgi-bin dir
    template_file = template_file.replace("/cgi-bin/", "/content/")
    if not os.path.isfile(template_file):
      template_file = DEFAULT_TEMPLATE

  return template_file


def locate_mirrors(environ):
  # Allow the MIRRORS_LIST environment variable to override the default
  mirrors = environ.get('MIRRORS_LIST')
  if mirrors and os.path.isfile(mirrors):
    return mirrors
  return MIRRORS_LIST  # the default


def mirrorsapp(environ, start_response):
  headers = [ ]
  resp_code = '200 OK'

  # Where is the client coming from
  region = get_region(environ)

  # Was there a preferred mirror or update requirement?
  form = cgi.FieldStorage(fp=environ['wsgi.input'],
                          environ=environ,
                          keep_blank_values=True)
  preferred = form.getfirst("Preferred", "")
  update = form.getfirst("update", "")

  # Get the last update time of the mirror database
  mirrors = locate_mirrors(environ)
  base_time = os.path.getmtime(mirrors)

  # convert from YYYYMMDDhhmm to time-since-unix-epoch
  try:
    mingood = time.mktime(time.strptime(update, "%Y%m%d%H%M"))
    # Never use a mirror more than a week old
    mingood = max(mingood, base_time - 7*24*60*60)
  except:
    # if we didn't get a time, or we can't convert it, then
    # use the time the mirror database was last updated minus 24 hours
    mingood = base_time - 24*60*60

  # Load the mirrors file and parse it out
  data = parse_mirrors(mirrors, region, preferred, mingood)

  # ======== new download tracking code ==========
  action = form.getfirst("action", "")
  filename = form.getfirst("filename", "")
  if action == 'download' and filename != '':
    url = "%s%s" % (data['preferred'][:-1], filename)
    headers.append(('Location', url))
    start_response(resp_code, headers)
    #log_download(url, data['preferred'], environ, region)
    return ''
  # ====== end new download tracking code ========

  # Note location to self
  data['location'] = environ.get('SCRIPT_NAME', DEFAULT_LOCATION)

  path_param = form.getfirst("path", None)
  if path_param:
    path_info = cgi.escape(path_param, 1)
  else:
    # Note any PATH_INFO
    if environ.has_key('PATH_INFO'):
      path_info = cgi.escape(environ['PATH_INFO'], 1)
      if environ.has_key('SCRIPT_NAME'):
        if environ['PATH_INFO'] == environ['SCRIPT_NAME']:
          path_info = ''
    else:
      path_info = ''
  # The mirror URL already has a trailing slash. Avoid doubling it up.
  if path_info.startswith('/'):
    path_info = path_info[1:]
  data['path_info'] = path_info

  template_file = locate_template(environ)

  # Print out the CGI header component
  # using xml if the filename ends with the magic '--xml' string
  if template_file.endswith('--xml.html'):
    headers.append(('Content-type', 'text/xml'))
  else:
    headers.append(('Content-type', 'text/html'))

  start_response(resp_code, headers)

  output = cStringIO.StringIO()
  template = ezt.Template(template_file)
  template.generate(output, data)
  return [ output.getvalue() ]

# ======== new download tracking code ==========
def log_download(url, preferred, environ, region):
  timestamp = int(time.time())
  # format: timestamp,url,mirror,IP,region,"useragent"
  try:
    fh = open('/home/jfthomps/debug', 'a')
    fh.write('%s,%s,%s,%s,%s,"%s"\n' % (timestamp, url, preferred, environ['REMOTE_ADDR'], region, environ['HTTP_USER_AGENT']))
    fh.close
  except:
    pass
# ====== end new download tracking code ========

if __name__ == '__main__':
  #from flup.server.fcgi import WSGIServer
  from flup.server.cgi import WSGIServer
  WSGIServer(mirrorwrap).run()
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -


static/downloads/test2.cgi [1:274]:
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
#!/usr/local/bin/python2.7
# -*- python -*-
#
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements.  See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership.  The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License.  You may obtain a copy of the License at
#
#   http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied.  See the License for the
# specific language governing permissions and limitations
# under the License.
#
#
# This is a Python CGI script that uses EZT to produce templated
# mirror content and GeoIP to choose the appropriate mirror
#

import sys
import os
import cgi
import stat
import random
import time
import traceback
import cStringIO

# Should be installed in the main system library
import GeoIP

# Insert this directory into PATH so that we can import ezt
this_dir = os.path.dirname(__file__) or '.'
sys.path.insert(0, this_dir)

import ezt


# Configurable stuff
MIRRORS_LIST = "/x1/www/www.apache.org/mirrors/mirrors.list"
DEFAULT_REGION = "us"
DEFAULT_TEMPLATE = "/x1/www/www.apache.org/dyn/closer.html"
DEFAULT_LOCATION = "http://www.apache.org/dyn/closer.cgi"


def get_region(environ):
  """Use GeoIP to find the client's country, falling back to
     DEFAULT_REGION on failure."""
  try:
    remote_ip = environ['REMOTE_ADDR'];
    gi = GeoIP.new(GeoIP.GEOIP_STANDARD)
    region = gi.country_code_by_addr(remote_ip).lower().strip()
    if region == 'gb':
      return 'uk'
    else:
      return region
  except:
    ### should we log an error here? absorbing without reporting is
    ### generally bad form.
    return DEFAULT_REGION


def parse_mirrors(filename, country, preferred, mingood):
  """Parse the mirror database to find the best mirrors for a client.

  The Format of the mirror database is (last two fields are optional):
  ftp au  ftp://ftp.planetmirror.com/pub/apache/dist/ 1117724635 http://example.com/logo.gif http://example.com/ """

  output = { 'http'   : [ ],
             'ftp'    : [ ],
             'backup' : [ ],
             'preferred' : None,
             'logo' : None,
             'link' : None,
             }

  # Read the mirror database and put it in a list of lists
  # skip empty lines and comment
  mirrors = [line.split() for line in open(filename).readlines()
             if line.strip() and not line.startswith('#')]

  mirrors.append(['http', 'us', 'http://archive.apache.org/dist/', '9999999999'])

  # Add trailing slashes where missing.  Otherwise,
  # strcat("http://www.mirror.org", pathinfo=".foo.evil") would link to
  # http://www.mirror.org.foo.evil
  for mir in mirrors:
    if not mir[2].endswith('/'):
      mir[2] += '/'

  # grab the backup mirrors
  backupmirrors = [mir for mir in mirrors if mir[1] == 'Backup']

  # Grab the mirrors for the requested country or, failing that,
  # from the default region (us)
  for region in (country, DEFAULT_REGION):
    countrymirrors = [mir for mir in mirrors if mir[1] == region]
    random.shuffle(countrymirrors)
    goodmirror = None
    for mir in countrymirrors:
      if mir[0] == 'http' and int(mir[3]) > mingood:
        goodmirror = mir
        break
    if goodmirror:
      break

  # Check if the requested Preferred mirror is in the list
  # Note the user-requested mirror doesn't have a trailing-slash
  prefmir = None
  if preferred:
    for mir in mirrors:
      if mir[2] == preferred:
        prefmir = mir
        break
  # Otherwise pick a preferred mirror from our country
  if not prefmir and goodmirror:
    prefmir = goodmirror
  if not prefmir:  # In the worst case, choose a backup
    prefmir = random.choice(backupmirrors)

  # Record the preferred mirror and, if available, its logo and link
  # Keep the trailing-slash on the URL (it is later joined to the path_info)
  output['preferred'] = prefmir[2]
  if len(prefmir) > 5:
    output['logo'] = prefmir[4]
    output['link'] = prefmir[5]

  # Now assemble a list of all the other mirrors.
  # Keep the trailing-slash on the URL (it is later joined to the path_info)
  output['http'] = [mir[2] for mir in countrymirrors if mir[0] == 'http']
  output['ftp'] = [mir[2] for mir in countrymirrors if mir[0] == 'ftp']
  output['backup'] = [mir[2] for mir in backupmirrors]

  return output


def mirrorwrap(environ, start_response):
  try:
    return mirrorsapp(environ, start_response)
  except:
    status = "500 Oops"
    response_headers = [("content-type","text/plain")]
    start_response(status, response_headers, sys.exc_info())
    return ["Problem running mirror.cgi, contact <infrastructure@apache.org> "
            "if it persists.\n\n"
            + traceback.format_exc() ]


def locate_template(environ):
  # Determine the correct template by noting our filesystem location
  if environ.has_key('ASF_MIRROR_FILENAME'):
    template_file = environ['ASF_MIRROR_FILENAME'].replace(".cgi", ".html")
  elif environ.has_key('SCRIPT_FILENAME'):
    template_file = environ['SCRIPT_FILENAME'].replace(".cgi", ".html")
  else:
    template_file = sys.argv[0].replace(".cgi", ".html")

  if not os.path.isfile(template_file):
    # look in docroot instead if this is in a cgi-bin dir
    template_file = template_file.replace("/cgi-bin/", "/content/")
    if not os.path.isfile(template_file):
      template_file = DEFAULT_TEMPLATE

  return template_file


def locate_mirrors(environ):
  # Allow the MIRRORS_LIST environment variable to override the default
  mirrors = environ.get('MIRRORS_LIST')
  if mirrors and os.path.isfile(mirrors):
    return mirrors
  return MIRRORS_LIST  # the default


def mirrorsapp(environ, start_response):
  headers = [ ]
  resp_code = '200 OK'

  # Where is the client coming from
  region = get_region(environ)

  # Was there a preferred mirror or update requirement?
  form = cgi.FieldStorage(fp=environ['wsgi.input'],
                          environ=environ,
                          keep_blank_values=True)
  preferred = form.getfirst("Preferred", "")
  update = form.getfirst("update", "")

  # Get the last update time of the mirror database
  mirrors = locate_mirrors(environ)
  base_time = os.path.getmtime(mirrors)

  # convert from YYYYMMDDhhmm to time-since-unix-epoch
  try:
    mingood = time.mktime(time.strptime(update, "%Y%m%d%H%M"))
    # Never use a mirror more than a week old
    mingood = max(mingood, base_time - 7*24*60*60)
  except:
    # if we didn't get a time, or we can't convert it, then
    # use the time the mirror database was last updated minus 24 hours
    mingood = base_time - 24*60*60

  # Load the mirrors file and parse it out
  data = parse_mirrors(mirrors, region, preferred, mingood)

  # ======== new download tracking code ==========
  action = form.getfirst("action", "")
  filename = form.getfirst("filename", "")
  if action == 'download' and filename != '':
    url = "%s%s" % (data['preferred'][:-1], filename)
    headers.append(('Location', url))
    start_response(resp_code, headers)
    #log_download(url, data['preferred'], environ, region)
    return ''
  # ====== end new download tracking code ========

  # Note location to self
  data['location'] = environ.get('SCRIPT_NAME', DEFAULT_LOCATION)

  path_param = form.getfirst("path", None)
  if path_param:
    path_info = cgi.escape(path_param, 1)
  else:
    # Note any PATH_INFO
    if environ.has_key('PATH_INFO'):
      path_info = cgi.escape(environ['PATH_INFO'], 1)
      if environ.has_key('SCRIPT_NAME'):
        if environ['PATH_INFO'] == environ['SCRIPT_NAME']:
          path_info = ''
    else:
      path_info = ''
  # The mirror URL already has a trailing slash. Avoid doubling it up.
  if path_info.startswith('/'):
    path_info = path_info[1:]
  data['path_info'] = path_info

  template_file = locate_template(environ)

  # Print out the CGI header component
  # using xml if the filename ends with the magic '--xml' string
  if template_file.endswith('--xml.html'):
    headers.append(('Content-type', 'text/xml'))
  else:
    headers.append(('Content-type', 'text/html'))

  start_response(resp_code, headers)

  output = cStringIO.StringIO()
  template = ezt.Template(template_file)
  template.generate(output, data)
  return [ output.getvalue() ]

# ======== new download tracking code ==========
def log_download(url, preferred, environ, region):
  timestamp = int(time.time())
  # format: timestamp,url,mirror,IP,region,"useragent"
  try:
    fh = open('/home/jfthomps/debug', 'a')
    fh.write('%s,%s,%s,%s,%s,"%s"\n' % (timestamp, url, preferred, environ['REMOTE_ADDR'], region, environ['HTTP_USER_AGENT']))
    fh.close
  except:
    pass
# ====== end new download tracking code ========

if __name__ == '__main__':
  #from flup.server.fcgi import WSGIServer
  from flup.server.cgi import WSGIServer
  WSGIServer(mirrorwrap).run()
- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -