Networking & CGI optimizations (#50)

More efficient TCP packets, miscellaneous fixes to CGI handling.
This commit is contained in:
Michael Lazar 2021-01-05 23:56:41 -05:00 committed by GitHub
parent 135dbda878
commit bfa68c62de
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 87 additions and 35 deletions

View File

@ -2,19 +2,17 @@
### v0.8.0 (Unreleased) ### v0.8.0 (Unreleased)
#### Spec Changes
- Added support for international domain names using IDN encoding. - Added support for international domain names using IDN encoding.
- Several improvements to internal python type hinting coverage.
#### New Features
- Several fixes & improvements to python type hinting coverage.
- Added a ``py.typed`` file to indicate project support for type hints. - Added a ``py.typed`` file to indicate project support for type hints.
- Optimized TCP packets when streaming directory listings.
#### Bug Fixes - Optimized TCP packets when streaming large CGI responses.
- Improved error handling to catch invalid responses from CGI scripts.
- Fixed a bug where TLS_CLIENT_AUTHORISED would sometimes be set to - Fixed a bug where TLS_CLIENT_AUTHORISED would sometimes be set to
``True``/``False`` instead of ``1``/``0``. ``True``/``False`` instead of ``1``/``0``.
- Fixed error handling edge case when the client killed the connection
before all data has been sent. A `CancelledError` exception will now
be raised internally instead of a ``ConnectionClosed`` exception.
### v0.7.0 (2020-12-06) ### v0.7.0 (2020-12-06)

View File

@ -1,4 +1,3 @@
import codecs
import mimetypes import mimetypes
import os import os
import pathlib import pathlib
@ -6,6 +5,10 @@ import subprocess
import typing import typing
import urllib.parse import urllib.parse
from twisted.internet import reactor
from twisted.internet.task import deferLater
from twisted.internet.defer import Deferred
from .base import ( from .base import (
EnvironDict, EnvironDict,
JetforceApplication, JetforceApplication,
@ -34,6 +37,12 @@ class StaticDirectoryApplication(JetforceApplication):
# Chunk size for streaming files, taken from the twisted FileSender class # Chunk size for streaming files, taken from the twisted FileSender class
CHUNK_SIZE = 2 ** 14 CHUNK_SIZE = 2 ** 14
# Length of time to defer while waiting for more data from a CGI script
CGI_POLLING_PERIOD = 0.05
# Maximum size in bytes of the first line of a server response
CGI_MAX_RESPONSE_HEADER_SIZE = 2048
mimetypes: mimetypes.MimeTypes mimetypes: mimetypes.MimeTypes
def __init__( def __init__(
@ -157,27 +166,54 @@ class StaticDirectoryApplication(JetforceApplication):
cgi_env = {k: str(v) for k, v in environ.items() if k.isupper()} cgi_env = {k: str(v) for k, v in environ.items() if k.isupper()}
cgi_env["GATEWAY_INTERFACE"] = "CGI/1.1" cgi_env["GATEWAY_INTERFACE"] = "CGI/1.1"
# Decode the stream as unicode so we can parse the status line proc = subprocess.Popen(
# Use surrogateescape to preserve any non-UTF8 byte sequences.
out = subprocess.Popen(
[str(filesystem_path)], [str(filesystem_path)],
stdout=subprocess.PIPE, stdout=subprocess.PIPE,
env=cgi_env, env=cgi_env,
bufsize=1, bufsize=0,
universal_newlines=True,
errors="surrogateescape",
) )
status_line = out.stdout.readline().strip() status_line = proc.stdout.readline(self.CGI_MAX_RESPONSE_HEADER_SIZE)
status_parts = status_line.split(maxsplit=1) if len(status_line) == self.CGI_MAX_RESPONSE_HEADER_SIZE:
# Too large response header line received from the CGI script.
return Response(Status.CGI_ERROR, "Unexpected Error")
status_parts = status_line.decode().strip().split(maxsplit=1)
if len(status_parts) != 2 or not status_parts[0].isdecimal(): if len(status_parts) != 2 or not status_parts[0].isdecimal():
# Malformed header line received from the CGI script.
return Response(Status.CGI_ERROR, "Unexpected Error") return Response(Status.CGI_ERROR, "Unexpected Error")
status, meta = status_parts status, meta = status_parts
return Response(int(status), meta, self.cgi_body_generator(proc))
# Re-encode the rest of the body as bytes def cgi_body_generator(
body = codecs.iterencode(out.stdout, encoding="utf-8", errors="surrogateescape") self,
return Response(int(status), meta, body) proc: subprocess.Popen[bytes],
) -> typing.Iterator[typing.Union[bytes, Deferred]]:
"""
Non-blocking read from the stdout of the CGI process and pipe it
to the socket transport.
"""
while True:
proc.poll()
data = proc.stdout.read(self.CHUNK_SIZE)
if len(data) == self.CHUNK_SIZE:
# Send the chunk and yield control of the event loop
yield data
elif proc.returncode is None:
# We didn't get a full chunk's worth of data from the
# subprocess. Send what we have, but add a delay before
# attempting to read again to allow time for more bytes
# to buffer in stdout.
if data:
yield data
yield deferLater(reactor, self.CGI_POLLING_PERIOD)
else:
# Subprocess has finished, send everything that's left.
if data:
yield data
break
def load_file(self, filesystem_path: pathlib.Path) -> typing.Iterator[bytes]: def load_file(self, filesystem_path: pathlib.Path) -> typing.Iterator[bytes]:
""" """
@ -196,9 +232,9 @@ class StaticDirectoryApplication(JetforceApplication):
""" """
Auto-generate a text/gemini document based on the contents of the file system. Auto-generate a text/gemini document based on the contents of the file system.
""" """
yield f"Directory: /{url_path}\r\n".encode() buffer = f"Directory: /{url_path}]\r\n".encode()
if url_path.parent != url_path: if url_path.parent != url_path:
yield f"=>/{url_path.parent}\t..\r\n".encode() buffer += f"=>/{url_path.parent}\t..\r\n".encode()
for file in sorted(filesystem_path.iterdir()): for file in sorted(filesystem_path.iterdir()):
if file.name.startswith("."): if file.name.startswith("."):
@ -207,9 +243,16 @@ class StaticDirectoryApplication(JetforceApplication):
encoded_path = urllib.parse.quote(str(url_path / file.name)) encoded_path = urllib.parse.quote(str(url_path / file.name))
if file.is_dir(): if file.is_dir():
yield f"=>/{encoded_path}/\t{file.name}/\r\n".encode() buffer += f"=>/{encoded_path}/\t{file.name}/\r\n".encode()
else: else:
yield f"=>/{encoded_path}\t{file.name}\r\n".encode() buffer += f"=>/{encoded_path}\t{file.name}\r\n".encode()
if len(buffer) >= self.CHUNK_SIZE:
data, buffer = buffer[: self.CHUNK_SIZE], buffer[self.CHUNK_SIZE :]
yield data
if buffer:
yield buffer
def guess_mimetype(self, filename: str) -> str: def guess_mimetype(self, filename: str) -> str:
""" """

View File

@ -6,8 +6,7 @@ import typing
import urllib.parse import urllib.parse
from twisted.internet.address import IPv4Address, IPv6Address from twisted.internet.address import IPv4Address, IPv6Address
from twisted.internet.defer import Deferred, ensureDeferred from twisted.internet.defer import Deferred, ensureDeferred, CancelledError
from twisted.internet.error import ConnectionClosed
from twisted.internet.protocol import connectionDone from twisted.internet.protocol import connectionDone
from twisted.internet.task import deferLater from twisted.internet.task import deferLater
from twisted.protocols.basic import LineOnlyReceiver from twisted.protocols.basic import LineOnlyReceiver
@ -40,6 +39,7 @@ class GeminiProtocol(LineOnlyReceiver):
""" """
TIMESTAMP_FORMAT = "%d/%b/%Y:%H:%M:%S %z" TIMESTAMP_FORMAT = "%d/%b/%Y:%H:%M:%S %z"
DEBUG = False
client_addr: typing.Union[IPv4Address, IPv6Address] client_addr: typing.Union[IPv4Address, IPv6Address]
connected_timestamp: time.struct_time connected_timestamp: time.struct_time
@ -69,8 +69,7 @@ class GeminiProtocol(LineOnlyReceiver):
This is invoked by twisted after the connection has been closed. This is invoked by twisted after the connection has been closed.
""" """
if self._currently_deferred: if self._currently_deferred:
self._currently_deferred.errback(reason) self._currently_deferred.cancel()
self._currently_deferred = None
def lineReceived(self, line: bytes) -> Deferred: def lineReceived(self, line: bytes) -> Deferred:
""" """
@ -150,7 +149,8 @@ class GeminiProtocol(LineOnlyReceiver):
response_generator = await self.track_deferred(response_generator) response_generator = await self.track_deferred(response_generator)
else: else:
# Yield control of the event loop # Yield control of the event loop
await deferLater(self.server.reactor, 0) deferred = deferLater(self.server.reactor, 0)
await self.track_deferred(deferred)
for data in response_generator: for data in response_generator:
if isinstance(data, Deferred): if isinstance(data, Deferred):
@ -159,9 +159,9 @@ class GeminiProtocol(LineOnlyReceiver):
else: else:
self.write_body(data) self.write_body(data)
# Yield control of the event loop # Yield control of the event loop
await deferLater(self.server.reactor, 0) deferred = deferLater(self.server.reactor, 0)
await self.track_deferred(deferred)
except ConnectionClosed: except CancelledError:
pass pass
except Exception: except Exception:
self.server.log_message(traceback.format_exc()) self.server.log_message(traceback.format_exc())
@ -172,6 +172,10 @@ class GeminiProtocol(LineOnlyReceiver):
self.finish_connection() self.finish_connection()
async def track_deferred(self, deferred: Deferred) -> typing.Union[str, bytes]: async def track_deferred(self, deferred: Deferred) -> typing.Union[str, bytes]:
"""
Keep track of the deferred that we're waiting on so we can send an
error back to it if the connection is abruptly killed.
"""
self._currently_deferred = deferred self._currently_deferred = deferred
try: try:
return await deferred return await deferred
@ -252,15 +256,20 @@ class GeminiProtocol(LineOnlyReceiver):
self.meta = meta self.meta = meta
self.response_buffer = f"{status} {meta}\r\n" self.response_buffer = f"{status} {meta}\r\n"
def write_body(self, data: typing.Union[str, bytes]) -> None: def write_body(self, data: typing.Union[str, bytes, None]) -> None:
""" """
Write bytes to the gemini response body. Write bytes to the gemini response body.
""" """
if data is None:
return
if isinstance(data, str): if isinstance(data, str):
data = data.encode() data = data.encode()
self.flush_status() self.flush_status()
self.response_size += len(data) self.response_size += len(data)
if self.DEBUG:
print(f"Writing body: {len(data)} bytes")
self.transport.write(data) self.transport.write(data)
def flush_status(self) -> None: def flush_status(self) -> None:
@ -270,6 +279,8 @@ class GeminiProtocol(LineOnlyReceiver):
if self.response_buffer and not self.response_size: if self.response_buffer and not self.response_size:
data = self.response_buffer.encode() data = self.response_buffer.encode()
self.response_size += len(data) self.response_size += len(data)
if self.DEBUG:
print(f"Writing status: {len(data)} bytes")
self.transport.write(data) self.transport.write(data)
self.response_buffer = "" self.response_buffer = ""