Support for extra-path component and PATH_INFO according to RFC 3875

This commit is contained in:
ar 2020-05-07 22:13:23 +02:00
parent 2a97095247
commit e68c8a3215
2 changed files with 53 additions and 8 deletions

View File

@ -141,7 +141,7 @@ the request using environment variables:
| GATEWAY_INTERFACE | GCI/1.1 | | GATEWAY_INTERFACE | GCI/1.1 |
| GEMINI_URL | gemini://mozz.us/cgi-bin/debug.cgi?foobar | GEMINI_URL | gemini://mozz.us/cgi-bin/debug.cgi?foobar
| HOSTNAME | mozz.us | | HOSTNAME | mozz.us |
| PATH_INFO | /cgi-bin/debug.cgi | | PATH_INFO | |
| QUERY_STRING | foobar | | QUERY_STRING | foobar |
| REMOTE_ADDR | 10.10.0.2 | | REMOTE_ADDR | 10.10.0.2 |
| REMOTE_HOST | 10.10.0.2 | | REMOTE_HOST | 10.10.0.2 |
@ -157,6 +157,26 @@ optional response body on subsequent lines. The bytes generated by the
CGI script will be forwarded *verbatim* to the gemini client, without any CGI script will be forwarded *verbatim* to the gemini client, without any
additional modification by the server. additional modification by the server.
It is possible to call CGI scripts with an extra-path component, in which
case the additional path component gets stored in PATH_INFO:
| Variable Name | Example |
| --- | --- |
| GATEWAY_INTERFACE | GCI/1.1 |
| GEMINI_URL | gemini://mozz.us/cgi-bin/debug.cgi/an/extra/path?foobar
| HOSTNAME | mozz.us |
| PATH_INFO | /an/extra/path |
| QUERY_STRING | foobar |
| REMOTE_ADDR | 10.10.0.2 |
| REMOTE_HOST | 10.10.0.2 |
| SCRIPT_NAME | /usr/local/www/mozz/gemini/cgi-bin/debug.cgi |
| SERVER_NAME | mozz.us |
| SERVER_PORT | 1965 |
| SERVER_PROTOCOL | GEMINI |
| SERVER_SOFTWARE | jetforce/0.0.7 |
## Deployment ## Deployment
Jetforce is intended to be run behind a process manager that handles Jetforce is intended to be run behind a process manager that handles

View File

@ -298,10 +298,40 @@ class StaticDirectoryApplication(JetforceApplication):
url_path = pathlib.Path(request.path.strip("/")) url_path = pathlib.Path(request.path.strip("/"))
filename = pathlib.Path(os.path.normpath(str(url_path))) filename = pathlib.Path(os.path.normpath(str(url_path)))
if filename.is_absolute() or str(filename.name).startswith(".."): if filename.is_absolute() or str(filename.name).startswith(".."):
# Guard against breaking out of the directory # Guard against breaking out of the directory
return Response(Status.NOT_FOUND, "Not Found") return Response(Status.NOT_FOUND, "Not Found")
if str(filename).startswith(self.cgi_directory):
# CGI needs special treatment to account for extra-path
# PATH_INFO component (RCFC3875 section 4.1.5)
# Identify shortest path that is not a directory
url_buildup = ''
for i in range(1, len(url_path.parts)+1):
url_buildup = '/'.join(url_path.parts[:i])
filename = pathlib.Path(os.path.normpath(str(url_buildup)))
filesystem_path = self.root / filename
try:
if (filesystem_path.is_file() and
os.access(filesystem_path, os.R_OK) and
os.access(filesystem_path, os.X_OK)):
path_info = '/'.join(url_path.parts[i:])
if len(path_info) > 0:
# If PATH_INFO is non-empty, it should include the slash
path_info = '/' + path_info
return self.run_cgi_script(filesystem_path, request.environ, path_info)
except OSError:
# Filename too large, etc.
return Response(Status.NOT_FOUND, "Not Found")
filesystem_path = self.root / filename filesystem_path = self.root / filename
try: try:
@ -313,11 +343,6 @@ class StaticDirectoryApplication(JetforceApplication):
return Response(Status.NOT_FOUND, "Not Found") return Response(Status.NOT_FOUND, "Not Found")
if filesystem_path.is_file(): if filesystem_path.is_file():
is_cgi = str(filename).startswith(self.cgi_directory)
is_exe = os.access(filesystem_path, os.X_OK)
if is_cgi and is_exe:
return self.run_cgi_script(filesystem_path, request.environ)
mimetype = self.guess_mimetype(filesystem_path.name) mimetype = self.guess_mimetype(filesystem_path.name)
generator = self.load_file(filesystem_path) generator = self.load_file(filesystem_path)
return Response(Status.SUCCESS, mimetype, generator) return Response(Status.SUCCESS, mimetype, generator)
@ -339,7 +364,7 @@ class StaticDirectoryApplication(JetforceApplication):
else: else:
return Response(Status.NOT_FOUND, "Not Found") return Response(Status.NOT_FOUND, "Not Found")
def run_cgi_script(self, filesystem_path: pathlib.Path, environ: dict) -> Response: def run_cgi_script(self, filesystem_path: pathlib.Path, environ: dict, path_info: str) -> Response:
""" """
Execute the given file as a CGI script and return the script's stdout Execute the given file as a CGI script and return the script's stdout
stream to the client. stream to the client.
@ -348,6 +373,7 @@ class StaticDirectoryApplication(JetforceApplication):
cgi_env = environ.copy() cgi_env = environ.copy()
cgi_env["GATEWAY_INTERFACE"] = "GCI/1.1" cgi_env["GATEWAY_INTERFACE"] = "GCI/1.1"
cgi_env["SCRIPT_NAME"] = script_name cgi_env["SCRIPT_NAME"] = script_name
cgi_env["PATH_INFO"] = path_info
# Decode the stream as unicode so we can parse the status line # Decode the stream as unicode so we can parse the status line
# Use surrogateescape to preserve any non-UTF8 byte sequences. # Use surrogateescape to preserve any non-UTF8 byte sequences.
@ -514,7 +540,6 @@ class GeminiRequestHandler:
environ = { environ = {
"GEMINI_URL": self.url, "GEMINI_URL": self.url,
"HOSTNAME": self.server.hostname, "HOSTNAME": self.server.hostname,
"PATH_INFO": url_parts.path,
"QUERY_STRING": url_parts.query, "QUERY_STRING": url_parts.query,
"REMOTE_ADDR": self.remote_addr, "REMOTE_ADDR": self.remote_addr,
"REMOTE_HOST": self.remote_addr, "REMOTE_HOST": self.remote_addr,