server/app/lib/middleware.py (91 lines of code) (raw):
#!/usr/bin/env python3
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
"""Selfserve Portal for the Apache Software Foundation"""
if not __debug__:
raise RuntimeError("This code requires assert statements to be enabled")
import sys
import traceback
import typing
import uuid
import quart
from . import config
import werkzeug.routing
import asyncio
import functools
async def consume_body():
"""Consumes the request body, punting it to dev-null. This is required for httpd to not throw 502 at error"""
# See: https://bz.apache.org/bugzilla/show_bug.cgi?id=55433
async for _data in quart.request.body:
pass
def glued(func: typing.Callable) -> typing.Callable:
"""Middleware that collects all form data (except file uploads!) and joins as one dict"""
async def call(**args):
form_data = dict()
form_data.update(quart.request.args.to_dict())
xform = await quart.request.form
# Pre-parse check for form data size
if quart.request.content_type and any(
x in quart.request.content_type
for x in (
"multipart/form-data",
"application/x-www-form-urlencoded",
"application/x-url-encoded",
)
):
if quart.request.content_length > config.server.max_form_size:
await consume_body()
return quart.Response(
status=413,
response=f"Request content length ({quart.request.content_length} bytes) is larger than what is permitted for form data ({config.server.max_form_size} bytes)!",
)
if xform:
form_data.update(xform.to_dict())
if quart.request.is_json:
xjson = await quart.request.json
form_data.update(xjson)
try:
resp = await func(form_data, **args)
assert resp, "No response was provided by the underlying endpoint!"
except Exception: # Catch and spit out errors
exc_type, exc_value, exc_traceback = sys.exc_info()
err = "\n".join(traceback.format_exception(exc_type, exc_value, exc_traceback))
headers = {
"Server": "ASF Selfserve Platform",
"Content-Type": "application/json",
}
# By default, we print the traceback to the user, for easy debugging.
if config.server.error_reporting == "show":
error_text = "API error occurred: \n" + err
return {"success": False, "message": error_text}, 500, headers
# If client traceback is disabled, we print it to stderr instead, but leave an
# error ID for the client to report back to the admin. Every line of the traceback
# will have this error ID at the beginning of the line, for easy grepping.
else:
# We only need a short ID here, let's pick 18 chars.
eid = str(uuid.uuid4())[:18]
sys.stderr.write("API Endpoint %s got into trouble (%s): \n" % (quart.request.path, eid))
for line in err.split("\n"):
sys.stderr.write("%s: %s\n" % (eid, line))
return {"success": False, "message": f"API error occurred. The application journal will have information. Error ID: {eid}"}, 500, headers
# If an error is thrown before the request body has been consumed, eat it quietly.
if not quart.request.body._complete.is_set():
await consume_body()
return resp
# Quart will, if no rule name is specified, default to calling the rule "call" here,
# which leads to carps about duplicate rule definitions. So, given the fact that call()
# is dynamically made from within this function, we simply adjust its internal name to
# refer to the calling module and function, thus providing Quart with a much better
# name for the rule, which will also aid in debugging.
call.__name__ = func.__module__ + "." + func.__name__
return call
def auth_failed():
"""Returns the appropriate authorization failure response, depending on auth mechanism supplied."""
if "x-artifacts-webui" not in quart.request.headers: # Not done via Web UI, standard 401 response
headers = {"WWW-Authenticate": 'Basic realm="selfserve.apache.org"'}
return quart.Response(status=401, headers=headers, response="Please authenticate yourself first!\n")
else: # Web UI response, do not send Realm header (we do not want a pop-up in our browser!)
return quart.Response(status=401, response="Please authenticate yourself first!\n")
class FilenameConverter(werkzeug.routing.BaseConverter):
"""Simple converter that splits a filename into a basename and an extension"""
regex = r"^[^/.]*(\.[A-Za-z0-9]+)?$"
part_isolating = False
def to_python(self, filename):
extension = ""
# If foo.bar, split into base and ext. Otherwise, keep filename as full string (even for .htaccess etc)
if "." in filename[1:]:
filename, extension = filename.split(".", maxsplit=1)
return filename, extension
async def reset_rate_limits():
"""Reset daily rate limits for lookups"""
while True:
await asyncio.sleep(86400)
config.rate_limits.clear()
def rate_limited(func):
"""Decorator for calls that are rate-limited for anonymous users.
Once the number of requests per day has been exceeded, this decorator
will return a 429 HTTP response to the client instead.
"""
@functools.wraps(func)
async def session_wrapper(*args):
ip = quart.request.headers.get("X-Forwarded-For", quart.request.remote_addr).split(",")[-1].strip()
usage = config.rate_limits.get(ip, 0) + 1
if config.server.rate_limit_per_ip and usage > config.server.rate_limit_per_ip:
return quart.Response(status=429, response="Your request has been rate-limited. Please check back tomorrow!")
config.rate_limits[ip] = usage
print(ip, usage)
return await func(*args)
return session_wrapper