From e68c8a3215eccdce77d0dcf05991b41b8ae6ac67 Mon Sep 17 00:00:00 2001 From: ar Date: Thu, 7 May 2020 22:13:23 +0200 Subject: [PATCH 1/2] Support for extra-path component and PATH_INFO according to RFC 3875 --- README.md | 22 +++++++++++++++++++++- jetforce.py | 39 ++++++++++++++++++++++++++++++++------- 2 files changed, 53 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index bece300..fc97597 100644 --- a/README.md +++ b/README.md @@ -141,7 +141,7 @@ the request using environment variables: | GATEWAY_INTERFACE | GCI/1.1 | | GEMINI_URL | gemini://mozz.us/cgi-bin/debug.cgi?foobar | HOSTNAME | mozz.us | -| PATH_INFO | /cgi-bin/debug.cgi | +| PATH_INFO | | | QUERY_STRING | foobar | | REMOTE_ADDR | 10.10.0.2 | | REMOTE_HOST | 10.10.0.2 | @@ -157,6 +157,26 @@ optional response body on subsequent lines. The bytes generated by the CGI script will be forwarded *verbatim* to the gemini client, without any additional modification by the server. +It is possible to call CGI scripts with an extra-path component, in which +case the additional path component gets stored in PATH_INFO: + +| Variable Name | Example | +| --- | --- | +| GATEWAY_INTERFACE | GCI/1.1 | +| GEMINI_URL | gemini://mozz.us/cgi-bin/debug.cgi/an/extra/path?foobar +| HOSTNAME | mozz.us | +| PATH_INFO | /an/extra/path | +| QUERY_STRING | foobar | +| REMOTE_ADDR | 10.10.0.2 | +| REMOTE_HOST | 10.10.0.2 | +| SCRIPT_NAME | /usr/local/www/mozz/gemini/cgi-bin/debug.cgi | +| SERVER_NAME | mozz.us | +| SERVER_PORT | 1965 | +| SERVER_PROTOCOL | GEMINI | +| SERVER_SOFTWARE | jetforce/0.0.7 | + + + ## Deployment Jetforce is intended to be run behind a process manager that handles diff --git a/jetforce.py b/jetforce.py index 79aa538..4d7ac6a 100755 --- a/jetforce.py +++ b/jetforce.py @@ -298,10 +298,40 @@ class StaticDirectoryApplication(JetforceApplication): url_path = pathlib.Path(request.path.strip("/")) filename = pathlib.Path(os.path.normpath(str(url_path))) + if filename.is_absolute() or str(filename.name).startswith(".."): # Guard against breaking out of the directory return Response(Status.NOT_FOUND, "Not Found") + + if str(filename).startswith(self.cgi_directory): + # CGI needs special treatment to account for extra-path + # PATH_INFO component (RCFC3875 section 4.1.5) + + # Identify shortest path that is not a directory + url_buildup = '' + for i in range(1, len(url_path.parts)+1): + url_buildup = '/'.join(url_path.parts[:i]) + filename = pathlib.Path(os.path.normpath(str(url_buildup))) + filesystem_path = self.root / filename + + try: + if (filesystem_path.is_file() and + os.access(filesystem_path, os.R_OK) and + os.access(filesystem_path, os.X_OK)): + path_info = '/'.join(url_path.parts[i:]) + + if len(path_info) > 0: + # If PATH_INFO is non-empty, it should include the slash + path_info = '/' + path_info + + return self.run_cgi_script(filesystem_path, request.environ, path_info) + + except OSError: + # Filename too large, etc. + return Response(Status.NOT_FOUND, "Not Found") + + filesystem_path = self.root / filename try: @@ -313,11 +343,6 @@ class StaticDirectoryApplication(JetforceApplication): return Response(Status.NOT_FOUND, "Not Found") if filesystem_path.is_file(): - is_cgi = str(filename).startswith(self.cgi_directory) - is_exe = os.access(filesystem_path, os.X_OK) - if is_cgi and is_exe: - return self.run_cgi_script(filesystem_path, request.environ) - mimetype = self.guess_mimetype(filesystem_path.name) generator = self.load_file(filesystem_path) return Response(Status.SUCCESS, mimetype, generator) @@ -339,7 +364,7 @@ class StaticDirectoryApplication(JetforceApplication): else: return Response(Status.NOT_FOUND, "Not Found") - def run_cgi_script(self, filesystem_path: pathlib.Path, environ: dict) -> Response: + def run_cgi_script(self, filesystem_path: pathlib.Path, environ: dict, path_info: str) -> Response: """ Execute the given file as a CGI script and return the script's stdout stream to the client. @@ -348,6 +373,7 @@ class StaticDirectoryApplication(JetforceApplication): cgi_env = environ.copy() cgi_env["GATEWAY_INTERFACE"] = "GCI/1.1" cgi_env["SCRIPT_NAME"] = script_name + cgi_env["PATH_INFO"] = path_info # Decode the stream as unicode so we can parse the status line # Use surrogateescape to preserve any non-UTF8 byte sequences. @@ -514,7 +540,6 @@ class GeminiRequestHandler: environ = { "GEMINI_URL": self.url, "HOSTNAME": self.server.hostname, - "PATH_INFO": url_parts.path, "QUERY_STRING": url_parts.query, "REMOTE_ADDR": self.remote_addr, "REMOTE_HOST": self.remote_addr, From fe037b812b1a3c1fd58cb4d9b58382cb6e5f4632 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Sun, 24 May 2020 23:09:28 -0400 Subject: [PATCH 2/2] Fix SCRIPT_NAME to use the RFC 3875 definition, minor refactoring --- jetforce/app/static.py | 45 +++++++++++++++++++++++++++++++++--------- jetforce/protocol.py | 1 - 2 files changed, 36 insertions(+), 10 deletions(-) diff --git a/jetforce/app/static.py b/jetforce/app/static.py index 59a1f27..b86d162 100644 --- a/jetforce/app/static.py +++ b/jetforce/app/static.py @@ -52,6 +52,41 @@ class StaticDirectoryApplication(JetforceApplication): # Guard against breaking out of the directory return Response(Status.NOT_FOUND, "Not Found") + if str(filename).startswith(self.cgi_directory): + # CGI needs special treatment to account for extra-path + # PATH_INFO component (RFC 3875 section 4.1.5) + + # Identify the shortest path that is not a directory + for i in range(2, len(filename.parts) + 1): + # Split the path into SCRIPT_NAME and PATH_INFO + script_name = pathlib.Path(*filename.parts[:i]) + path_info = pathlib.Path(*filename.parts[i:]) + + filesystem_path = self.root / script_name + try: + if not filesystem_path.is_file(): + continue + elif not os.access(filesystem_path, os.R_OK): + continue + elif not os.access(filesystem_path, os.X_OK): + continue + else: + if str(script_name) == ".": + request.environ["SCRIPT_NAME"] = "" + else: + request.environ["SCRIPT_NAME"] = f"/{script_name}" + + if str(path_info) == ".": + request.environ["PATH_INFO"] = "" + else: + request.environ["PATH_INFO"] = f"/{path_info}" + + return self.run_cgi_script(filesystem_path, request.environ) + + except OSError: + # Filename too large, etc. + return Response(Status.NOT_FOUND, "Not Found") + filesystem_path = self.root / filename try: @@ -63,11 +98,6 @@ class StaticDirectoryApplication(JetforceApplication): return Response(Status.NOT_FOUND, "Not Found") if filesystem_path.is_file(): - is_cgi = str(filename).startswith(self.cgi_directory) - is_exe = os.access(filesystem_path, os.X_OK) - if is_cgi and is_exe: - return self.run_cgi_script(filesystem_path, request.environ) - mimetype = self.guess_mimetype(filesystem_path.name) generator = self.load_file(filesystem_path) return Response(Status.SUCCESS, mimetype, generator) @@ -95,16 +125,13 @@ class StaticDirectoryApplication(JetforceApplication): Execute the given file as a CGI script and return the script's stdout stream to the client. """ - script_name = str(filesystem_path) - cgi_env = {k: v for k, v in environ.items() if k.isupper()} cgi_env["GATEWAY_INTERFACE"] = "GCI/1.1" - cgi_env["SCRIPT_NAME"] = script_name # Decode the stream as unicode so we can parse the status line # Use surrogateescape to preserve any non-UTF8 byte sequences. out = subprocess.Popen( - [script_name], + [str(filesystem_path)], stdout=subprocess.PIPE, env=cgi_env, bufsize=1, diff --git a/jetforce/protocol.py b/jetforce/protocol.py index cd435c3..582c679 100644 --- a/jetforce/protocol.py +++ b/jetforce/protocol.py @@ -126,7 +126,6 @@ class GeminiProtocol(LineOnlyReceiver): environ = { "GEMINI_URL": self.url, "HOSTNAME": self.server.hostname, - "PATH_INFO": url_parts.path, "QUERY_STRING": url_parts.query, "REMOTE_ADDR": self.client_addr.host, "REMOTE_HOST": self.client_addr.host,