231 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			Python
		
	
	
	
			
		
		
	
	
			231 lines
		
	
	
		
			8.7 KiB
		
	
	
	
		
			Python
		
	
	
	
| from __future__ import annotations
 | |
| 
 | |
| import time
 | |
| import traceback
 | |
| import typing
 | |
| import urllib.parse
 | |
| 
 | |
| from twisted.internet.address import IPv4Address, IPv6Address
 | |
| from twisted.internet.defer import ensureDeferred
 | |
| from twisted.internet.threads import deferToThread
 | |
| from twisted.protocols.basic import LineOnlyReceiver
 | |
| 
 | |
| from .__version__ import __version__
 | |
| from .app.base import JetforceApplication, Status
 | |
| from .tls import inspect_certificate
 | |
| 
 | |
| 
 | |
| class GeminiProtocol(LineOnlyReceiver):
 | |
|     """
 | |
|     Handle a single Gemini Protocol TCP request.
 | |
| 
 | |
|     The request handler manages the life of a single gemini request. It exposes
 | |
|     a simplified interface to read the request URL and write the gemini response
 | |
|     status line and body to the socket. The request URL and other server
 | |
|     information is stuffed into an ``environ`` dictionary that encapsulates the
 | |
|     request at a low level. This dictionary, along with a callback to write the
 | |
|     response data, and passed to a configurable "application" function or class.
 | |
| 
 | |
|     This design borrows heavily from the standard library's HTTP request
 | |
|     handler (http.server.BaseHTTPRequestHandler). However, I did not make any
 | |
|     attempts to directly emulate the existing conventions, because Gemini is an
 | |
|     inherently simpler protocol than HTTP and much of the boilerplate could be
 | |
|     removed.
 | |
|     """
 | |
| 
 | |
|     TIMESTAMP_FORMAT = "%d/%b/%Y:%H:%M:%S %z"
 | |
| 
 | |
|     client_addr: typing.Union[IPv4Address, IPv6Address]
 | |
|     connected_timestamp: time.struct_time
 | |
|     request: bytes
 | |
|     url: str
 | |
|     status: int
 | |
|     meta: str
 | |
|     response_buffer: str
 | |
|     response_size: int
 | |
| 
 | |
|     def __init__(self, server: "GeminiServer", app: JetforceApplication):
 | |
|         self.server = server
 | |
|         self.app = app
 | |
| 
 | |
|     def connectionMade(self):
 | |
|         """
 | |
|         This is invoked by twisted after the connection is first established.
 | |
|         """
 | |
|         self.connected_timestamp = time.localtime()
 | |
|         self.response_size = 0
 | |
|         self.response_buffer = ""
 | |
|         self.client_addr = self.transport.getPeer()
 | |
| 
 | |
|     def lineReceived(self, line):
 | |
|         """
 | |
|         This method is invoked by LineOnlyReceiver for every incoming line.
 | |
|         """
 | |
|         self.request = line
 | |
|         return ensureDeferred(self._handle_request_noblock())
 | |
| 
 | |
|     async def _handle_request_noblock(self):
 | |
|         """
 | |
|         Handle the gemini request and write the raw response to the socket.
 | |
| 
 | |
|         This method is implemented using an async coroutine, which has been
 | |
|         supported by twisted since python 3.5 by wrapping the method in
 | |
|         ensureDeferred(). Twisted + coroutines is a bitch to figure out, but
 | |
|         once it clicks it really does turn out to be an elegant solution.
 | |
| 
 | |
|         Any time that we call into the application code, we wrap the call with
 | |
|         deferToThread() which will execute the code in a separate thread using
 | |
|         twisted's thread pool. deferToThread() will return a future object
 | |
|         that we can then `await` to get the result when the thread finishes.
 | |
|         This is important because we don't want application code to block the
 | |
|         twisted event loop from serving other requests at the same time.
 | |
| 
 | |
|         In the future, I would like to add the capability for applications to
 | |
|         implement proper coroutines that can call `await` on directly without
 | |
|         needing to wrap them in threads. Conceptually, this shouldn't be too
 | |
|         difficult, but it will require implementing an alternate version of
 | |
|         the JetforceApplication that's async-compatible.
 | |
|         """
 | |
|         try:
 | |
|             self.parse_header()
 | |
|         except Exception:
 | |
|             # Malformed request, throw it away and exit immediately
 | |
|             self.server.log_message(traceback.format_exc())
 | |
|             self.write_status(Status.BAD_REQUEST, "Malformed request")
 | |
|             self.flush_status()
 | |
|             self.transport.loseConnection()
 | |
|             raise
 | |
| 
 | |
|         try:
 | |
|             environ = self.build_environ()
 | |
|             response_generator = await deferToThread(
 | |
|                 self.app, environ, self.write_status
 | |
|             )
 | |
|             while True:
 | |
|                 try:
 | |
|                     data = await deferToThread(response_generator.__next__)
 | |
|                     self.write_body(data)
 | |
|                 except StopIteration:
 | |
|                     break
 | |
|         except Exception:
 | |
|             self.server.log_message(traceback.format_exc())
 | |
|             self.write_status(Status.CGI_ERROR, "An unexpected error occurred")
 | |
|         finally:
 | |
|             self.flush_status()
 | |
|             self.log_request()
 | |
|             self.transport.loseConnection()
 | |
| 
 | |
|     def build_environ(self) -> typing.Dict[str, typing.Any]:
 | |
|         """
 | |
|         Construct a dictionary that will be passed to the application handler.
 | |
| 
 | |
|         Variable names (mostly) conform to the CGI spec defined in RFC 3875.
 | |
|         The TLS variable names borrow from the GLV-1.12556 server.
 | |
|         """
 | |
|         url_parts = urllib.parse.urlparse(self.url)
 | |
|         environ = {
 | |
|             "GEMINI_URL": self.url,
 | |
|             "HOSTNAME": self.server.hostname,
 | |
|             "PATH_INFO": url_parts.path,
 | |
|             "QUERY_STRING": url_parts.query,
 | |
|             "REMOTE_ADDR": self.client_addr.host,
 | |
|             "REMOTE_HOST": self.client_addr.host,
 | |
|             "SERVER_NAME": self.server.hostname,
 | |
|             "SERVER_PORT": str(self.client_addr.port),
 | |
|             "SERVER_PROTOCOL": "GEMINI",
 | |
|             "SERVER_SOFTWARE": f"jetforce/{__version__}",
 | |
|             "client_certificate": None,
 | |
|         }
 | |
| 
 | |
|         cert = self.transport.getPeerCertificate()
 | |
|         if cert:
 | |
|             x509_cert = cert.to_cryptography()
 | |
|             cert_data = inspect_certificate(x509_cert)
 | |
|             conn = self.transport.getHandle()
 | |
|             environ.update(
 | |
|                 {
 | |
|                     "client_certificate": x509_cert,
 | |
|                     "AUTH_TYPE": "CERTIFICATE",
 | |
|                     "REMOTE_USER": cert_data["common_name"],
 | |
|                     "TLS_CLIENT_HASH": cert_data["fingerprint"],
 | |
|                     "TLS_CLIENT_NOT_BEFORE": cert_data["not_before"],
 | |
|                     "TLS_CLIENT_NOT_AFTER": cert_data["not_after"],
 | |
|                     "TLS_CLIENT_SERIAL_NUMBER": cert_data["serial_number"],
 | |
|                     # Grab the value that was stashed during the TLS handshake
 | |
|                     "TLS_CLIENT_VERIFIED": getattr(conn, "verified", False),
 | |
|                     "TLS_CIPHER": conn.get_cipher_name(),
 | |
|                     "TLS_VERSION": conn.get_protocol_version_name(),
 | |
|                 }
 | |
|             )
 | |
|         return environ
 | |
| 
 | |
|     def parse_header(self) -> None:
 | |
|         """
 | |
|         Parse the gemini header line.
 | |
| 
 | |
|         The request is a single UTF-8 line formatted as: <URL>\r\n
 | |
|         """
 | |
|         if len(self.request) > 1024:
 | |
|             raise ValueError("URL exceeds max length of 1024 bytes")
 | |
| 
 | |
|         self.url = self.request.decode()
 | |
| 
 | |
|     def write_status(self, status: int, meta: str) -> None:
 | |
|         """
 | |
|         Write the gemini status line to an internal buffer.
 | |
| 
 | |
|         The status line is a single UTF-8 line formatted as:
 | |
|             <code>\t<meta>\r\n
 | |
| 
 | |
|         If the response status is 2, the meta field will contain the mimetype
 | |
|         of the response data sent. If the status is something else, the meta
 | |
|         will contain a descriptive message.
 | |
| 
 | |
|         The status is not written immediately, it's added to an internal buffer
 | |
|         that must be flushed. This is done so that the status can be updated as
 | |
|         long as no other data has been written to the stream yet.
 | |
|         """
 | |
|         self.status = status
 | |
|         self.meta = meta
 | |
|         self.response_buffer = f"{status}\t{meta}\r\n"
 | |
| 
 | |
|     def write_body(self, data: typing.Union[str, bytes]) -> None:
 | |
|         """
 | |
|         Write bytes to the gemini response body.
 | |
|         """
 | |
|         if isinstance(data, str):
 | |
|             data = data.encode()
 | |
| 
 | |
|         self.flush_status()
 | |
|         self.response_size += len(data)
 | |
|         self.transport.write(data)
 | |
| 
 | |
|     def flush_status(self) -> None:
 | |
|         """
 | |
|         Flush the status line from the internal buffer to the socket stream.
 | |
|         """
 | |
|         if self.response_buffer and not self.response_size:
 | |
|             data = self.response_buffer.encode()
 | |
|             self.response_size += len(data)
 | |
|             self.transport.write(data)
 | |
|         self.response_buffer = ""
 | |
| 
 | |
|     def log_request(self) -> None:
 | |
|         """
 | |
|         Log a gemini request using a format derived from the Common Log Format.
 | |
|         """
 | |
|         try:
 | |
|             message = '{} [{}] "{}" {} {} {}'.format(
 | |
|                 self.client_addr.host,
 | |
|                 time.strftime(self.TIMESTAMP_FORMAT, self.connected_timestamp),
 | |
|                 self.url,
 | |
|                 self.status,
 | |
|                 self.meta,
 | |
|                 self.response_size,
 | |
|             )
 | |
|         except AttributeError:
 | |
|             # The connection ended before we got far enough to log anything
 | |
|             pass
 | |
|         else:
 | |
|             self.server.log_message(message)
 |