diff --git a/jetforce.py b/jetforce.py index 307a72b..6454245 100755 --- a/jetforce.py +++ b/jetforce.py @@ -10,6 +10,7 @@ import subprocess import sys import tempfile import typing +import urllib.parse # Fail early to avoid crashing with an obscure error if sys.version_info < (3, 7): @@ -76,8 +77,8 @@ class EchoApp: def __iter__(self) -> typing.Iterator[bytes]: self.send_status(STATUS_SUCCESS, "text/plain") - path = self.environ["PATH_INFO"] - yield f"Received path: {path}".encode() + url = self.environ["RAW_URL"] + yield f"Received path: {url}".encode() class StaticDirectoryApp: @@ -108,7 +109,7 @@ class StaticDirectoryApp: return build_class def __iter__(self) -> typing.Iterator[bytes]: - url_path = pathlib.Path(self.environ["PATH_INFO"].strip("/")) + url_path = pathlib.Path(self.environ["URL"].path.strip("/")) filename = pathlib.Path(os.path.normpath(str(url_path))) if filename.is_absolute() or str(filename.name).startswith(".."): @@ -184,9 +185,10 @@ class GeminiRequestHandler: self.writer: typing.Optional[asyncio.StreamWriter] = None self.received_timestamp: typing.Optional[datetime.datetime] = None self.remote_addr: typing.Optional[str] = None - self.path: typing.Optional[str] = None + self.raw_url: typing.Optional[str] = None + self.url: typing.Optional[urllib.parse.ParseResult] = None self.status: typing.Optional[int] = None - self.mimetype: typing.Optional[str] = None + self.meta: typing.Optional[str] = None self.response_buffer: typing.Optional[str] = None self.response_size: int = 0 @@ -206,10 +208,24 @@ class GeminiRequestHandler: self.received_timestamp = datetime.datetime.utcnow() try: - await self.parse_request() + await self.parse_header() except Exception: # Malformed request, throw it away and exit immediately - return + self.write_status(STATUS_BAD_REQUEST, "Could not understand request line") + return await self.close_connection() + + # Discard proxy requests, may revisit this in a later version + if self.url.scheme and self.url.scheme != "gemini": + self.write_status( + STATUS_PROXY_REQUEST_REFUSED, 'URL scheme must be "gemini://"' + ) + return await self.close_connection() + elif self.url.hostname and self.url.hostname != self.server.hostname: + self.write_status( + STATUS_PROXY_REQUEST_REFUSED, + f'URL hostname must be "{self.server.hostname}"', + ) + return await self.close_connection() try: environ = self.build_environ() @@ -220,49 +236,58 @@ class GeminiRequestHandler: self.write_status(STATUS_CGI_ERROR, str(e)) raise finally: - await self.flush_status() - self.log_request() - await writer.drain() + await self.close_connection() def build_environ(self) -> typing.Dict[str, typing.Any]: """ Construct a dictionary that will be passed to the application handler. """ return { - "SERVER_NAME": self.server.host, + "SERVER_HOST": self.server.host, "SERVER_PORT": self.server.port, "REMOTE_ADDR": self.remote_addr, - "PATH_INFO": self.path, + "HOSTNAME": self.server.hostname, + "RAW_URL": self.raw_url, + "URL": self.url, } - async def parse_request(self) -> None: + async def parse_header(self) -> None: """ - Parse the gemini request line. + Parse the gemini header line. - The request is a single UTF-8 line formatted as: \r\n + The request is a single UTF-8 line formatted as: \r\n """ data = await self.reader.readuntil(b"\r\n") - request = data.decode() - self.path = request[:-2] # strip the line ending + data = data[:-2] # strip the line ending + if len(data) > 1024: + raise ValueError("URL exceeds max length of 1024 bytes") - def write_status(self, status: int, mimetype: str) -> None: + self.raw_url = data.decode() + self.url = urllib.parse.urlparse(self.raw_url) + if not self.url.netloc: + # URL does not contain a scheme and was not prefixed with // per RFC 1808 + # TODO: Suggest spec should enforce // when scheme is omitted + self.url = urllib.parse.urlparse(f"//{self.raw_url}") + + def write_status(self, status: int, meta: str) -> None: """ Write the gemini status line to an internal buffer. The status line is a single UTF-8 line formatted as: - \t\r\n + \t\r\n - If the response status is 2, the mimetype field will contain the type - of the response data sent. If the status is something else, the mimetype + If the response status is 2, the meta field will contain the mimetype + of the response data sent. If the status is something else, the meta will contain a descriptive message. The status is not written immediately, it's added to an internal buffer that must be flushed. This is done so that the status can be updated as long as no other data has been written to the stream yet. """ + # TODO: enforce restriction on response meta <= 1024 bytes self.status = status - self.mimetype = mimetype - self.response_buffer = f"{status}\t{mimetype}\r\n" + self.meta = meta + self.response_buffer = f"{status}\t{meta}\r\n" async def write_body(self, data: bytes) -> None: """ @@ -284,6 +309,14 @@ class GeminiRequestHandler: await self.writer.drain() self.response_buffer = None + async def close_connection(self) -> None: + """ + Flush any remaining bytes and close the stream. + """ + await self.flush_status() + self.log_request() + await self.writer.drain() + def log_request(self) -> None: """ Log a gemini request using a format derived from the Common Log Format. @@ -291,9 +324,9 @@ class GeminiRequestHandler: self.server.log_message( f"{self.remote_addr} " f"[{self.received_timestamp:%d/%b/%Y:%H:%M:%S +0000}] " - f'"{self.path}" ' + f'"{self.raw_url}" ' f"{self.status} " - f'"{self.mimetype}" ' + f'"{self.meta}" ' f"{self.response_size}" ) diff --git a/jetforce_client.py b/jetforce_client.py new file mode 100755 index 0000000..2f23b5b --- /dev/null +++ b/jetforce_client.py @@ -0,0 +1,49 @@ +#!/usr/bin/env python3.7 +""" +A dead-simple gemini client intended to be used for server development and testing. + +./jetforce-client +""" +import argparse +import socket +import ssl +import urllib.parse + +context = ssl.create_default_context() +context.check_hostname = False +context.verify_mode = ssl.CERT_NONE + + +def fetch(url: str, host: str = None, port: str = None): + parsed_url = urllib.parse.urlparse(url) + if not parsed_url.scheme: + parsed_url = urllib.parse.urlparse(f"gemini://{url}") + + host = host or parsed_url.hostname + port = port or parsed_url.port or 1965 + + with socket.create_connection((host, port)) as sock: + with context.wrap_socket(sock) as ssock: + ssock.sendall((url + "\r\n").encode()) + fp = ssock.makefile("rb") + header = fp.readline().decode() + print(header) + body = fp.read().decode() + print(body) + + +def run_client(): + parser = argparse.ArgumentParser(description="A simple gemini client") + parser.add_argument("url") + parser.add_argument( + "--host", help="Optional server to connect to, will default to the URL" + ) + parser.add_argument( + "--port", help="Optional port to connect to, will default to the URL" + ) + args = parser.parse_args() + fetch(args.url, args.host, args.port) + + +if __name__ == "__main__": + run_client() diff --git a/setup.py b/setup.py index 8e5087d..8e5dd3c 100644 --- a/setup.py +++ b/setup.py @@ -17,8 +17,13 @@ setuptools.setup( author_email="lazar.michael22@gmail.com", description="An Experimental Gemini Server", long_description=long_description(), - py_modules=["jetforce"], - entry_points={"console_scripts": ["jetforce=jetforce:run_server"]}, + py_modules=["jetforce", "jetforce_client"], + entry_points={ + "console_scripts": [ + "jetforce=jetforce:run_server", + "jetforce-client=jetforce_client:run_client", + ] + }, python_requires=">=3.7", keywords="gemini server tcp gopher asyncio", classifiers=[