From d4412956ad6ee995b5b559ba661236b0fc1b38c7 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Sat, 25 Jul 2020 22:57:51 -0400 Subject: [PATCH 1/3] Rate limiting proof of concept --- .isort.cfg | 2 ++ .pre-commit-config.yaml | 4 ++-- examples/chatroom.py | 3 ++- examples/counter.py | 3 ++- examples/rate_limit.py | 38 ++++++++++++++++++++++++++++++++ jetforce/__init__.py | 9 +++++++- jetforce/__main__.py | 8 ++++++- jetforce/app/base.py | 49 +++++++++++++++++++++++++++++++++++++++++ jetforce/app/static.py | 17 ++++++++++++-- 9 files changed, 125 insertions(+), 8 deletions(-) create mode 100644 .isort.cfg create mode 100644 examples/rate_limit.py diff --git a/.isort.cfg b/.isort.cfg new file mode 100644 index 0000000..9caba16 --- /dev/null +++ b/.isort.cfg @@ -0,0 +1,2 @@ +[isort] +profile=black diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index ec7af1a..be145a7 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -7,6 +7,6 @@ repos: hooks: - id: black - repo: https://github.com/pre-commit/mirrors-isort - rev: v4.3.21 + rev: v5.1.4 hooks: - - id: isort \ No newline at end of file + - id: isort diff --git a/examples/chatroom.py b/examples/chatroom.py index f69f455..bd81e26 100644 --- a/examples/chatroom.py +++ b/examples/chatroom.py @@ -17,9 +17,10 @@ streaming. from collections import deque from datetime import datetime -from jetforce import GeminiServer, JetforceApplication, Response, Status from twisted.internet.defer import AlreadyCalledError, Deferred +from jetforce import GeminiServer, JetforceApplication, Response, Status + class MessageQueue: def __init__(self, filename): diff --git a/examples/counter.py b/examples/counter.py index f60b89d..16ca794 100644 --- a/examples/counter.py +++ b/examples/counter.py @@ -9,11 +9,12 @@ loading the entire response into memory at once. """ import time -from jetforce import GeminiServer, JetforceApplication, Response, Status from twisted.internet import reactor from twisted.internet.task import deferLater from twisted.internet.threads import deferToThread +from jetforce import GeminiServer, JetforceApplication, Response, Status + def blocking_counter(): """ diff --git a/examples/rate_limit.py b/examples/rate_limit.py new file mode 100644 index 0000000..ce44ab6 --- /dev/null +++ b/examples/rate_limit.py @@ -0,0 +1,38 @@ +#!/usr/local/env python3 +""" +This example shows how you can implement advanced rate limiting schemes. +""" +from jetforce import GeminiServer, JetforceApplication, RateLimiter, Response, Status + +app = JetforceApplication() + +INDEX_PAGE = """\ +# Rate Limiting Demo + +=>/short short rate limiter (5/30s) +=>/long long rate limiter (60/5m) +""" + + +@app.route("", strict_trailing_slash=False) +def index(request): + return Response(Status.SUCCESS, "text/gemini", INDEX_PAGE) + + +@app.route("/short") +@RateLimiter("5/30s") +def short(request): + # Maximum of 5 requests per 30 seconds + return Response(Status.SUCCESS, "text/gemini", "Request was successful") + + +@app.route("/long") +@RateLimiter("60/5m") +def long(request): + # Maximum of 60 requests per 5 minutes + return Response(Status.SUCCESS, "text/gemini", "Request was successful") + + +if __name__ == "__main__": + server = GeminiServer(app, host="127.0.0.1", hostname="localhost") + server.run() diff --git a/jetforce/__init__.py b/jetforce/__init__.py index 072636d..16d0a3c 100644 --- a/jetforce/__init__.py +++ b/jetforce/__init__.py @@ -2,7 +2,14 @@ isort:skip_file """ from .__version__ import __version__ -from .app.base import JetforceApplication, Request, Response, RoutePattern, Status +from .app.base import ( + JetforceApplication, + Request, + Response, + RoutePattern, + Status, + RateLimiter, +) from .app.static import StaticDirectoryApplication from .app.composite import CompositeApplication from .protocol import GeminiProtocol diff --git a/jetforce/__main__.py b/jetforce/__main__.py index 18f74b1..9d1be3e 100644 --- a/jetforce/__main__.py +++ b/jetforce/__main__.py @@ -91,13 +91,18 @@ group.add_argument( metavar="FILE", dest="index_file", ) - group.add_argument( "--default-lang", help="A lang parameter that will be indicated in the response meta", default=None, dest="default_lang", ) +group.add_argument( + "--rate-limit", + help="An IP rate limit string, e.g. '60/5m' (60 requests per 5 minutes)", + default=None, + dest="rate_limit", +) def main(): @@ -107,6 +112,7 @@ def main(): index_file=args.index_file, cgi_directory=args.cgi_directory, default_lang=args.default_lang, + rate_limit=args.rate_limit, ) server = GeminiServer( app=app, diff --git a/jetforce/app/base.py b/jetforce/app/base.py index 7f83686..2913fdd 100644 --- a/jetforce/app/base.py +++ b/jetforce/app/base.py @@ -1,6 +1,8 @@ import dataclasses import re +import time import typing +from collections import defaultdict from urllib.parse import unquote, urlparse from twisted.internet.defer import Deferred @@ -196,3 +198,50 @@ class JetforceApplication: Set the error response based on the URL type. """ return Response(Status.PERMANENT_FAILURE, "Not Found") + + +class RateLimiter: + + RE = re.compile("(?P[0-9]+)/(?P[0-9]+)?(?P[smhd])") + + def __init__(self, rate: str) -> None: + match = self.RE.fullmatch(rate) + if not match: + raise ValueError(f"Invalid rate format: {rate}") + + rate_data = match.groupdict() + + self.number = int(rate_data["number"]) + self.period = int(rate_data["period"] or 1) + if rate_data["unit"] == "m": + self.period *= 60 + elif rate_data["unit"] == "h": + self.period += 60 * 60 + elif rate_data["unit"] == "d": + self.period *= 60 * 60 * 24 + + self.reset() + + def reset(self) -> None: + self.timestamp = time.time() + self.period + self.counter = defaultdict(int) + + def get_key(self, request: Request) -> typing.Optional[str]: + return request.environ["REMOTE_ADDR"] + + def __call__(self, func: typing.Callable) -> typing.Callable: + def handler(request, **kwargs) -> Response: + time_left = self.timestamp - time.time() + if time_left < 0: + self.reset() + + rate_key = self.get_key(request) + if rate_key is not None: + self.counter[rate_key] += 1 + if self.counter[rate_key] > self.number: + msg = f"Rate limit exceeded, wait {time_left:.0f} seconds." + return Response(Status.SLOW_DOWN, msg) + + return func(request, **kwargs) + + return handler diff --git a/jetforce/app/static.py b/jetforce/app/static.py index 5043e5d..104ae9e 100644 --- a/jetforce/app/static.py +++ b/jetforce/app/static.py @@ -6,7 +6,14 @@ import subprocess import typing import urllib.parse -from .base import JetforceApplication, Request, Response, RoutePattern, Status +from .base import ( + JetforceApplication, + RateLimiter, + Request, + Response, + RoutePattern, + Status, +) class StaticDirectoryApplication(JetforceApplication): @@ -32,9 +39,15 @@ class StaticDirectoryApplication(JetforceApplication): index_file: str = "index.gmi", cgi_directory: str = "cgi-bin", default_lang: typing.Optional[str] = None, + rate_limit: typing.Optional[str] = None, ): super().__init__() - self.routes.append((RoutePattern(), self.serve_static_file)) + + request_method = self.serve_static_file + if rate_limit is not None: + request_method = RateLimiter(rate_limit)(request_method) + + self.routes.append((RoutePattern(), request_method)) self.root = pathlib.Path(root_directory).resolve(strict=True) self.cgi_directory = cgi_directory.strip("/") + "/" From 1e5be2b45a8237abf8deb58897d78ac79a4d9f02 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Mon, 27 Jul 2020 00:02:51 -0400 Subject: [PATCH 2/3] Restructure rate limiting --- CHANGELOG.md | 23 +++++-- README.md | 43 ++++++------ examples/rate_limit.py | 17 +++-- jetforce/__main__.py | 6 +- jetforce/app/base.py | 147 ++++++++++++++++++++++++++--------------- jetforce/app/static.py | 10 +-- 6 files changed, 154 insertions(+), 92 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index ab598d8..22bc02c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,16 +1,29 @@ # Jetforce Changelog -### Unreleased +### v0.6.0 (Unreleased) + +#### Bugfixes -- File chunking has been optimized for streaming large static files. -- Server access logs are now redirected to ``stdout`` instead of ``stderr``. - This is intended to make it easier to use a log manager tool to split them - out from other server messages like startup information and error tracebacks. - The default mimetype for unknown file extensions will now be sent as "application/octet-stream" instead of "text/plain". The expectation is that it would be safer for a client to download an unknown file rather than attempting to display it inline as text. +#### Features + +- The static file server now has a ``--rate-limit`` flag that can be used + to define per-IP address rate limiting for requests. Requests that exceed + the specified rate will receive a 44 SLOW DOWN error response. +- Server access logs are now redirected to ``stdout`` instead of ``stderr``. + This is intended to make it easier to use a log manager tool to split them + out from other server messages like startup information and error tracebacks. +- File chunking has been optimized for streaming large static files. + +#### Examples + +- Added a new example that demonstrates how to use the new ``RateLimiter`` + class (examples/rate_limit.py). + ### v0.5.0 (2020-07-14) #### Spec Changes diff --git a/README.md b/README.md index 8704cbe..51a83ba 100644 --- a/README.md +++ b/README.md @@ -59,36 +59,39 @@ $ /opt/jetforce/venv/bin/jetforce Use the ``--help`` flag to view command-line options: ```bash -$ jetforce --help usage: jetforce [-h] [-V] [--host HOST] [--port PORT] [--hostname HOSTNAME] [--tls-certfile FILE] [--tls-keyfile FILE] [--tls-cafile FILE] - [--tls-capath DIR] [--dir DIR] [--cgi-dir DIR] - [--index-file FILE] + [--tls-capath DIR] [--dir DIR] [--cgi-dir DIR] [--index-file FILE] + [--default-lang DEFAULT_LANG] [--rate-limit RATE_LIMIT] An Experimental Gemini Protocol Server optional arguments: - -h, --help show this help message and exit - -V, --version show program's version number and exit + -h, --help show this help message and exit + -V, --version show program's version number and exit server configuration: - --host HOST Server address to bind to (default: 127.0.0.1) - --port PORT Server port to bind to (default: 1965) - --hostname HOSTNAME Server hostname (default: localhost) - --tls-certfile FILE Server TLS certificate file (default: None) - --tls-keyfile FILE Server TLS private key file (default: None) - --tls-cafile FILE A CA file to use for validating clients (default: None) - --tls-capath DIR A directory containing CA files for validating clients - (default: None) + --host HOST Server address to bind to (default: 127.0.0.1) + --port PORT Server port to bind to (default: 1965) + --hostname HOSTNAME Server hostname (default: localhost) + --tls-certfile FILE Server TLS certificate file (default: None) + --tls-keyfile FILE Server TLS private key file (default: None) + --tls-cafile FILE A CA file to use for validating clients (default: None) + --tls-capath DIR A directory containing CA files for validating clients (default: + None) fileserver configuration: - --dir DIR Root directory on the filesystem to serve (default: - /var/gemini) - --cgi-dir DIR CGI script directory, relative to the server's root - directory (default: cgi-bin) - --index-file FILE If a directory contains a file with this name, that - file will be served instead of auto-generating an index - page (default: index.gmi) + --dir DIR Root directory on the filesystem to serve (default: /var/gemini) + --cgi-dir DIR CGI script directory, relative to the server's root directory + (default: cgi-bin) + --index-file FILE If a directory contains a file with this name, that file will be + served instead of auto-generating an index page (default: index.gmi) + --default-lang DEFAULT_LANG + A lang parameter that will be indicated in the response meta + (default: None) + --rate-limit RATE_LIMIT + Enable IP rate limiting, e.g. '60/5m' (60 requests per 5 minutes) + (default: None) ``` ### Setting the ``hostname`` diff --git a/examples/rate_limit.py b/examples/rate_limit.py index ce44ab6..09e1146 100644 --- a/examples/rate_limit.py +++ b/examples/rate_limit.py @@ -1,10 +1,17 @@ #!/usr/local/env python3 """ -This example shows how you can implement advanced rate limiting schemes. +This example shows how you can implement rate limiting on a per-endpoint basis. """ from jetforce import GeminiServer, JetforceApplication, RateLimiter, Response, Status -app = JetforceApplication() +# Apply a global rate limiter that will be applied to all requests +global_rate_limiter = RateLimiter("100/m") +app = JetforceApplication(rate_limiter=global_rate_limiter) + +# Setup some custom rate limiting for specific endpoints +short_rate_limiter = RateLimiter("5/30s") +long_rate_limiter = RateLimiter("60/5m") + INDEX_PAGE = """\ # Rate Limiting Demo @@ -20,16 +27,14 @@ def index(request): @app.route("/short") -@RateLimiter("5/30s") +@short_rate_limiter.apply def short(request): - # Maximum of 5 requests per 30 seconds return Response(Status.SUCCESS, "text/gemini", "Request was successful") @app.route("/long") -@RateLimiter("60/5m") +@long_rate_limiter.apply def long(request): - # Maximum of 60 requests per 5 minutes return Response(Status.SUCCESS, "text/gemini", "Request was successful") diff --git a/jetforce/__main__.py b/jetforce/__main__.py index 9d1be3e..7fea58c 100644 --- a/jetforce/__main__.py +++ b/jetforce/__main__.py @@ -9,6 +9,7 @@ import argparse import sys from .__version__ import __version__ +from .app.base import RateLimiter from .app.static import StaticDirectoryApplication from .server import GeminiServer @@ -99,7 +100,7 @@ group.add_argument( ) group.add_argument( "--rate-limit", - help="An IP rate limit string, e.g. '60/5m' (60 requests per 5 minutes)", + help="Enable IP rate limiting, e.g. '60/5m' (60 requests per 5 minutes)", default=None, dest="rate_limit", ) @@ -107,12 +108,13 @@ group.add_argument( def main(): args = parser.parse_args() + rate_limiter = RateLimiter(args.rate_limit) if args.rate_limit else None app = StaticDirectoryApplication( root_directory=args.root_directory, index_file=args.index_file, cgi_directory=args.cgi_directory, default_lang=args.default_lang, - rate_limit=args.rate_limit, + rate_limiter=rate_limiter, ) server = GeminiServer( app=app, diff --git a/jetforce/app/base.py b/jetforce/app/base.py index 2913fdd..fbc5571 100644 --- a/jetforce/app/base.py +++ b/jetforce/app/base.py @@ -123,6 +123,91 @@ class RoutePattern: return re.fullmatch(self.path, request_path) +RouteHandler = typing.Callable[..., Response] + + +class RateLimiter: + """ + A class that can be used to apply rate-limiting to endpoints. + + Rates are defined as human-readable strings, e.g. + + "5/s (5 requests per-second) + "10/5m" (10 requests per-5 minutes) + "100/2h" (100 requests per-2 hours) + "1000/d" (1k requests per-day) + """ + + RE = re.compile("(?P[0-9]+)/(?P[0-9]+)?(?P[smhd])") + + def __init__(self, rate: str) -> None: + match = self.RE.fullmatch(rate) + if not match: + raise ValueError(f"Invalid rate format: {rate}") + + rate_data = match.groupdict() + + self.number = int(rate_data["number"]) + self.period = int(rate_data["period"] or 1) + if rate_data["unit"] == "m": + self.period *= 60 + elif rate_data["unit"] == "h": + self.period += 60 * 60 + elif rate_data["unit"] == "d": + self.period *= 60 * 60 * 24 + + self.reset() + + def reset(self) -> None: + self.next_timestamp = time.time() + self.period + self.rate_counter = defaultdict(int) + + def get_key(self, request: Request) -> typing.Optional[str]: + """ + Rate limit based on the client's IP-address. + """ + return request.environ["REMOTE_ADDR"] + + def check(self, request: Request) -> typing.Optional[Response]: + """ + Check if the given request should be rate limited. + + This method will return a failure response if the request should be + rate limited. + """ + time_left = self.next_timestamp - time.time() + if time_left < 0: + self.reset() + + key = self.get_key(request) + if key is not None: + self.rate_counter[key] += 1 + if self.rate_counter[key] > self.number: + msg = f"Rate limit exceeded, wait {time_left:.0f} seconds." + return Response(Status.SLOW_DOWN, msg) + + def apply(self, wrapped_func: RouteHandler) -> RouteHandler: + """ + Decorator to apply rate limiting to an individual application route. + + Usage: + rate_limiter = RateLimiter("10/m") + + @app.route("/endpoint") + @rate_limiter.apply + def my_endpoint(request): + return Response(Status.SUCCESS, "text/gemini", "hello world!") + """ + + def wrapper(request: Request, **kwargs) -> Response: + response = self.check(request) + if response: + return response + return wrapped_func(request, **kwargs) + + return wrapper + + class JetforceApplication: """ Base Jetforce application class with primitive URL routing. @@ -135,10 +220,9 @@ class JetforceApplication: how to accomplish this. """ - def __init__(self): - self.routes: typing.List[ - typing.Tuple[RoutePattern, typing.Callable[[Request, ...], Response]] - ] = [] + def __init__(self, rate_limiter: typing.Optional[RateLimiter] = None): + self.rate_limiter = rate_limiter + self.routes: typing.List[typing.Tuple[RoutePattern, RouteHandler]] = [] def __call__( self, environ: dict, send_status: typing.Callable @@ -149,6 +233,12 @@ class JetforceApplication: send_status(Status.BAD_REQUEST, "Invalid URL") return + if self.rate_limiter: + response = self.rate_limiter.check(request) + if response: + send_status(response.status, response.meta) + return + for route_pattern, callback in self.routes[::-1]: match = route_pattern.match(request) if route_pattern.match(request): @@ -187,7 +277,7 @@ class JetforceApplication: path, scheme, hostname, strict_hostname, strict_trailing_slash ) - def wrap(func: typing.Callable) -> typing.Callable: + def wrap(func: RouteHandler) -> RouteHandler: self.routes.append((route_pattern, func)) return func @@ -198,50 +288,3 @@ class JetforceApplication: Set the error response based on the URL type. """ return Response(Status.PERMANENT_FAILURE, "Not Found") - - -class RateLimiter: - - RE = re.compile("(?P[0-9]+)/(?P[0-9]+)?(?P[smhd])") - - def __init__(self, rate: str) -> None: - match = self.RE.fullmatch(rate) - if not match: - raise ValueError(f"Invalid rate format: {rate}") - - rate_data = match.groupdict() - - self.number = int(rate_data["number"]) - self.period = int(rate_data["period"] or 1) - if rate_data["unit"] == "m": - self.period *= 60 - elif rate_data["unit"] == "h": - self.period += 60 * 60 - elif rate_data["unit"] == "d": - self.period *= 60 * 60 * 24 - - self.reset() - - def reset(self) -> None: - self.timestamp = time.time() + self.period - self.counter = defaultdict(int) - - def get_key(self, request: Request) -> typing.Optional[str]: - return request.environ["REMOTE_ADDR"] - - def __call__(self, func: typing.Callable) -> typing.Callable: - def handler(request, **kwargs) -> Response: - time_left = self.timestamp - time.time() - if time_left < 0: - self.reset() - - rate_key = self.get_key(request) - if rate_key is not None: - self.counter[rate_key] += 1 - if self.counter[rate_key] > self.number: - msg = f"Rate limit exceeded, wait {time_left:.0f} seconds." - return Response(Status.SLOW_DOWN, msg) - - return func(request, **kwargs) - - return handler diff --git a/jetforce/app/static.py b/jetforce/app/static.py index 104ae9e..a0f8156 100644 --- a/jetforce/app/static.py +++ b/jetforce/app/static.py @@ -39,15 +39,11 @@ class StaticDirectoryApplication(JetforceApplication): index_file: str = "index.gmi", cgi_directory: str = "cgi-bin", default_lang: typing.Optional[str] = None, - rate_limit: typing.Optional[str] = None, + rate_limiter: typing.Optional[RateLimiter] = None, ): - super().__init__() + super().__init__(rate_limiter=rate_limiter) - request_method = self.serve_static_file - if rate_limit is not None: - request_method = RateLimiter(rate_limit)(request_method) - - self.routes.append((RoutePattern(), request_method)) + self.routes.append((RoutePattern(), self.serve_static_file)) self.root = pathlib.Path(root_directory).resolve(strict=True) self.cgi_directory = cgi_directory.strip("/") + "/" From dd6fcb4456691b1352c738b72efd6ced73057aa9 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Mon, 27 Jul 2020 00:09:00 -0400 Subject: [PATCH 3/3] Clean up wording --- CHANGELOG.md | 4 ++-- README.md | 2 +- examples/rate_limit.py | 2 +- jetforce/__main__.py | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 22bc02c..24f34a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,8 +21,8 @@ #### Examples -- Added a new example that demonstrates how to use the new ``RateLimiter`` - class (examples/rate_limit.py). +- Added an example that demonstrates how to use the new ``RateLimiter`` class + (examples/rate_limit.py). ### v0.5.0 (2020-07-14) diff --git a/README.md b/README.md index 51a83ba..76bb222 100644 --- a/README.md +++ b/README.md @@ -87,7 +87,7 @@ fileserver configuration: --index-file FILE If a directory contains a file with this name, that file will be served instead of auto-generating an index page (default: index.gmi) --default-lang DEFAULT_LANG - A lang parameter that will be indicated in the response meta + A lang parameter that will be used for all text/gemini responses (default: None) --rate-limit RATE_LIMIT Enable IP rate limiting, e.g. '60/5m' (60 requests per 5 minutes) diff --git a/examples/rate_limit.py b/examples/rate_limit.py index 09e1146..d2c1c49 100644 --- a/examples/rate_limit.py +++ b/examples/rate_limit.py @@ -4,7 +4,7 @@ This example shows how you can implement rate limiting on a per-endpoint basis. """ from jetforce import GeminiServer, JetforceApplication, RateLimiter, Response, Status -# Apply a global rate limiter that will be applied to all requests +# Setup a global rate limiter that will be applied to all requests global_rate_limiter = RateLimiter("100/m") app = JetforceApplication(rate_limiter=global_rate_limiter) diff --git a/jetforce/__main__.py b/jetforce/__main__.py index 7fea58c..1c1e775 100644 --- a/jetforce/__main__.py +++ b/jetforce/__main__.py @@ -94,7 +94,7 @@ group.add_argument( ) group.add_argument( "--default-lang", - help="A lang parameter that will be indicated in the response meta", + help="A lang parameter that will be used for all text/gemini responses", default=None, dest="default_lang", )