From e68c8a3215eccdce77d0dcf05991b41b8ae6ac67 Mon Sep 17 00:00:00 2001 From: ar Date: Thu, 7 May 2020 22:13:23 +0200 Subject: [PATCH] Support for extra-path component and PATH_INFO according to RFC 3875 --- README.md | 22 +++++++++++++++++++++- jetforce.py | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index bece300..fc97597 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,7 @@ the request using environment variables: | GATEWAY_INTERFACE | GCI/1.1 | | GEMINI_URL | gemini://mozz.us/cgi-bin/debug.cgi?foobar | HOSTNAME | mozz.us | -| PATH_INFO | /cgi-bin/debug.cgi | +| PATH_INFO | | | QUERY_STRING | foobar | | REMOTE_ADDR | 10.10.0.2 | | REMOTE_HOST | 10.10.0.2 | @@ -157,6 +157,26 @@ optional response body on subsequent lines. The bytes generated by the CGI script will be forwarded *verbatim* to the gemini client, without any additional modification by the server. +It is possible to call CGI scripts with an extra-path component, in which +case the additional path component gets stored in PATH_INFO: + +| Variable Name | Example | +| --- | --- | +| GATEWAY_INTERFACE | GCI/1.1 | +| GEMINI_URL | gemini://mozz.us/cgi-bin/debug.cgi/an/extra/path?foobar +| HOSTNAME | mozz.us | +| PATH_INFO | /an/extra/path | +| QUERY_STRING | foobar | +| REMOTE_ADDR | 10.10.0.2 | +| REMOTE_HOST | 10.10.0.2 | +| SCRIPT_NAME | /usr/local/www/mozz/gemini/cgi-bin/debug.cgi | +| SERVER_NAME | mozz.us | +| SERVER_PORT | 1965 | +| SERVER_PROTOCOL | GEMINI | +| SERVER_SOFTWARE | jetforce/0.0.7 | + + + ## Deployment Jetforce is intended to be run behind a process manager that handles diff --git a/jetforce.py b/jetforce.py index 79aa538..4d7ac6a 100755 --- a/jetforce.py +++ b/jetforce.py @@ -298,10 +298,40 @@ class StaticDirectoryApplication(JetforceApplication): url_path = pathlib.Path(request.path.strip("/")) filename = pathlib.Path(os.path.normpath(str(url_path))) + if filename.is_absolute() or str(filename.name).startswith(".."): # Guard against breaking out of the directory return Response(Status.NOT_FOUND, "Not Found") + + if str(filename).startswith(self.cgi_directory): + # CGI needs special treatment to account for extra-path + # PATH_INFO component (RCFC3875 section 4.1.5) + + # Identify shortest path that is not a directory + url_buildup = '' + for i in range(1, len(url_path.parts)+1): + url_buildup = '/'.join(url_path.parts[:i]) + filename = pathlib.Path(os.path.normpath(str(url_buildup))) + filesystem_path = self.root / filename + + try: + if (filesystem_path.is_file() and + os.access(filesystem_path, os.R_OK) and + os.access(filesystem_path, os.X_OK)): + path_info = '/'.join(url_path.parts[i:]) + + if len(path_info) > 0: + # If PATH_INFO is non-empty, it should include the slash + path_info = '/' + path_info + + return self.run_cgi_script(filesystem_path, request.environ, path_info) + + except OSError: + # Filename too large, etc. + return Response(Status.NOT_FOUND, "Not Found") + + filesystem_path = self.root / filename try: @@ -313,11 +343,6 @@ class StaticDirectoryApplication(JetforceApplication): return Response(Status.NOT_FOUND, "Not Found") if filesystem_path.is_file(): - is_cgi = str(filename).startswith(self.cgi_directory) - is_exe = os.access(filesystem_path, os.X_OK) - if is_cgi and is_exe: - return self.run_cgi_script(filesystem_path, request.environ) - mimetype = self.guess_mimetype(filesystem_path.name) generator = self.load_file(filesystem_path) return Response(Status.SUCCESS, mimetype, generator) @@ -339,7 +364,7 @@ class StaticDirectoryApplication(JetforceApplication): else: return Response(Status.NOT_FOUND, "Not Found") - def run_cgi_script(self, filesystem_path: pathlib.Path, environ: dict) -> Response: + def run_cgi_script(self, filesystem_path: pathlib.Path, environ: dict, path_info: str) -> Response: """ Execute the given file as a CGI script and return the script's stdout stream to the client. @@ -348,6 +373,7 @@ class StaticDirectoryApplication(JetforceApplication): cgi_env = environ.copy() cgi_env["GATEWAY_INTERFACE"] = "GCI/1.1" cgi_env["SCRIPT_NAME"] = script_name + cgi_env["PATH_INFO"] = path_info # Decode the stream as unicode so we can parse the status line # Use surrogateescape to preserve any non-UTF8 byte sequences. @@ -514,7 +540,6 @@ class GeminiRequestHandler: environ = { "GEMINI_URL": self.url, "HOSTNAME": self.server.hostname, - "PATH_INFO": url_parts.path, "QUERY_STRING": url_parts.query, "REMOTE_ADDR": self.remote_addr, "REMOTE_HOST": self.remote_addr,