From 0416a77901628664002f00db2b91b1096690cd72 Mon Sep 17 00:00:00 2001 From: "Dustin C. Hatch" Date: Tue, 13 Jun 2023 16:06:32 -0500 Subject: [PATCH] Initial commit --- .dockerignore | 3 + .gitignore | 3 + Containerfile | 25 ++++ dch_webhooks.py | 332 ++++++++++++++++++++++++++++++++++++++++++++++++ pyproject.toml | 59 +++++++++ 5 files changed, 422 insertions(+) create mode 100644 .dockerignore create mode 100644 .gitignore create mode 100644 Containerfile create mode 100644 dch_webhooks.py create mode 100644 pyproject.toml diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 0000000..a4243eb --- /dev/null +++ b/.dockerignore @@ -0,0 +1,3 @@ +**/* +!dch_webhooks.py +!pyproject.toml diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b84d18a --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +/.venv +__pycache__/ +*.egg-info/ diff --git a/Containerfile b/Containerfile new file mode 100644 index 0000000..a3a08aa --- /dev/null +++ b/Containerfile @@ -0,0 +1,25 @@ +FROM docker.io/library/python:3.11 AS build + +RUN python3 -m pip install -U pip setuptools wheel + +COPY . /tmp/build + +RUN cd /tmp/build && python3 -m pip wheel -w dist . + +FROM docker.io/library/python:3.11-slim + +RUN --mount=type=cache,target=/var/cache/apt \ + apt-get update && \ + apt-get install -y \ + tini \ + && : + +RUN --mount=from=build,source=/tmp/build/dist,target=/tmp/wheels \ + python3 -m pip install -f /tmp/wheels \ + dch_webhooks \ + uvicorn \ + && : + +USER 1000:1000 + +CMD ["tini", "/usr/local/bin/uvicorn", "dch_webhooks:app"] diff --git a/dch_webhooks.py b/dch_webhooks.py new file mode 100644 index 0000000..e24717e --- /dev/null +++ b/dch_webhooks.py @@ -0,0 +1,332 @@ +import datetime +import importlib.metadata +import logging +import os +import re +from types import TracebackType +from typing import Optional, Self, Type + +import fastapi +import httpx +import pydantic +import pyrfc6266 + + +__all__ = [ + 'app', +] + +log = logging.getLogger(__name__) + +DIST = importlib.metadata.metadata(__name__) + +DESCRIPTION_CLEAN_PATTERN = re.compile('[^a-z ]') +EXCLUDE_DESCRIPTION_WORDS = { + 'a', + 'ach', + 'an', + 'card', + 'debit', + 'pay', + 'payment', + 'purchase', + 'the', +} + +FIREFLY_URL = os.environ.get( + 'FIREFLY_URL', + 'http://firefly-iii', +) +MAX_DOCUMENT_SIZE = int( + os.environ.get( + 'MAX_DOCUMENT_SIZE', + 50 * 2**20, + ) +) +PAPERLESS_URL = os.environ.get( + 'PAPERLESS_URL', + 'http://paperless-ngx', +) + + +class FireflyIIITransactionSplit(pydantic.BaseModel): + type: str + date: datetime.datetime + amount: str + transaction_journal_id: int + description: str + + +class FireflyIIITransaction(pydantic.BaseModel): + transactions: list[FireflyIIITransactionSplit] + + +class FireflyIIIWebhook(pydantic.BaseModel): + content: FireflyIIITransaction + + +class PaperlessNgxDocument(pydantic.BaseModel): + id: int + title: str + + +class PaperlessNgxSearchResults(pydantic.BaseModel): + count: int + next: str | None + previous: str | None + results: list[PaperlessNgxDocument] + + +class HttpxClientMixin: + def __init__(self) -> None: + super().__init__() + self._client: Optional[httpx.AsyncClient] = None + + async def __aenter__(self) -> Self: + await self.client.__aenter__() + return self + + async def __aexit__( + self, + exc_type: Optional[Type[Exception]], + exc_value: Optional[Exception], + tb: Optional[TracebackType], + ) -> None: + await self.client.__aexit__(exc_type, exc_value, tb) + + @property + def client(self) -> httpx.AsyncClient: + if self._client is None: + self._client = self._get_client() + return self._client + + def _get_client(self) -> httpx.AsyncClient: + return httpx.AsyncClient( + headers={ + 'User-Agent': f'{DIST["Name"]}/{DIST["Version"]}', + }, + ) + + +class Firefly(HttpxClientMixin): + def _get_client(self) -> httpx.AsyncClient: + client = super()._get_client() + if token_file := os.environ.get('FIREFLY_AUTH_TOKEN'): + try: + f = open(token_file, encoding='utf-8') + except OSError as e: + log.error('Could not load Firefly-III access token: %s', e) + else: + with f: + token = f.read().strip() + client.headers['Authorization'] = f'Bearer {token}' + return client + + async def attach_receipt( + self, + xact_id: int, + doc: bytes, + filename: str, + title: str | None = None, + ) -> None: + log.info('Attaching receipt %r to transaction %d', filename, xact_id) + url = f'{FIREFLY_URL}/api/v1/attachments' + data = { + 'filename': filename, + 'attachable_type': 'TransactionJournal', + 'attachable_id': xact_id, + } + if title: + data['title'] = title + r = await self.client.post(url, data=data) + r.raise_for_status() + rbody = r.json() + attachment = rbody['data'] + url = f'{FIREFLY_URL}/api/v1/attachments/{attachment["id"]}/upload' + r = await self.client.post(url, content=doc, headers={ + 'Content-Type': 'application/octet-stream', + }) + r.raise_for_status() + + +class Paperless(HttpxClientMixin): + def _get_client(self) -> httpx.AsyncClient: + client = super()._get_client() + if token_file := os.environ.get('PAPERLESS_AUTH_TOKEN'): + try: + f = open(token_file, encoding='utf-8') + except OSError as e: + log.error( + 'Could not load Paperless-ngx authentication token: %s', e + ) + else: + with f: + token = f.read().strip() + client.headers['Authorization'] = f'Token {token}' + return client + + async def find_receipts( + self, search: str, amount: float, date: datetime.date + ) -> list[tuple[str, str, bytes]]: + date_begin = date - datetime.timedelta(days=2) + date_end = date + datetime.timedelta(days=2) + query = ' '.join( + ( + search, + str(amount), + 'type:Invoice/Receipt', + f'created:[{date_begin} TO {date_end}]', + ) + ) + log.info('Searching for receipt in Paperless: %s', query) + docs: list[tuple[str, str, bytes]] = [] + url = f'{PAPERLESS_URL}/api/documents/' + r = await self.client.get(url, params={'query': query}) + if r.status_code != 200: + if log.isEnabledFor(logging.ERROR): + try: + data = r.json() + except ValueError as e: + log.debug( + 'Failed to parse HTTP error response as JSON: %s', e + ) + detail = r.text + else: + try: + detail = data['detail'] + except KeyError: + detail = '' + log.error( + 'Error searching Paperless: HTTP %d %s: %s', + r.status_code, + r.reason_phrase, + detail, + ) + return docs + try: + data = r.json() + except ValueError as e: + log.error('Failed to parse HTTP response as JSON: %s', e) + return docs + try: + results = PaperlessNgxSearchResults.parse_obj(data) + except pydantic.ValidationError as e: + log.error('Could not parse search response: %s', e) + return docs + log.info('Search returned %d documents', results.count) + if results.next: + log.warning( + 'Search returned multiple pages of results; ' + 'only the results on the first page are used' + ) + for doc in results.results: + url = f'{PAPERLESS_URL}/api/documents/{doc.id}/download/' + r = await self.client.get(url, params={'original': True}) + if r.status_code != 200: + log.error( + 'Failed to download document: HTTP %d %s', + r.status_code, + r.reason_phrase, + ) + continue + try: + size = int(r.headers['Content-Length']) + except (KeyError, ValueError) as e: + log.error( + 'Skipping document ID %d: Cannot determine file size: %s', + doc.id, + e, + ) + continue + if size > MAX_DOCUMENT_SIZE: + log.warning( + 'Skipping document ID %d: Size (%d bytes) is greater than ' + 'the configured maximum document size (%d bytes)', + size, + MAX_DOCUMENT_SIZE, + ) + continue + docs.append( + (response_filename(r), doc.title, await r.aread()) + ) + return docs + + +async def handle_firefly_transaction(xact: FireflyIIITransaction) -> None: + async with Firefly() as ff, Paperless() as pl: + for split in xact.transactions: + search = clean_description(split.description) + try: + amount = float(split.amount) + except ValueError as e: + log.error('Invalid transaction amount: %s', e) + continue + for filename, title, doc in await pl.find_receipts( + search, + amount, + split.date.date(), + ): + try: + await ff.attach_receipt( + split.transaction_journal_id, doc, filename, title + ) + except Exception as e: + log.error( + 'Failed to attach receipt to transaction ID %d: %s', + split.transaction_journal_id, + e, + ) + + +def clean_description(text: str) -> str: + matches = DESCRIPTION_CLEAN_PATTERN.sub('', text.lower()) + if not matches: + log.warning( + 'Failed to clean transaction description: ' + 'text did not match regular expression pattern' + ) + return text + match_tokens = set(matches.split()) + terms = match_tokens - EXCLUDE_DESCRIPTION_WORDS + return ' '.join(terms) + + +def response_filename(response: httpx.Response) -> str: + if cd := response.headers.get('Content-Disposition'): + __, params = pyrfc6266.parse(cd) + maybename = '' + for p in params: + if p.name == 'filename*': + return p.value + if p.name == 'filename': + maybename = p.value + if maybename: + if maybename.startswith("b'") and maybename.endswith("'"): + maybename = maybename[2:-1] + return maybename + return response.url.path.rstrip('/').rsplit('/', 1)[-1] + + +app = fastapi.FastAPI( + name=DIST['Name'], + version=DIST['Version'], + docs_url='/api-doc/', +) + + +@app.on_event('startup') +def on_start() -> None: + log.setLevel(logging.DEBUG) + h = logging.StreamHandler() + h.setLevel(logging.DEBUG) + log.addHandler(h) + + +@app.get('/') +def status() -> str: + return 'UP' + + +@app.post('/hooks/firefly-iii/create') +async def firefly_iii_create(hook: FireflyIIIWebhook) -> None: + await handle_firefly_transaction(hook.content) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..db1f609 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,59 @@ +[project] +name = 'dch-webhooks' +authors = [ + {name = "Dustin C. Hatch", email = "dustin@hatch.name"}, +] +description = "Webhook receiver for internal automation" +requires-python = ">=3.11" +license = {text = "CC0"} +classifiers = [ + "License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication", + "Programming Language :: Python :: 3", +] +dependencies = [ + "fastapi~=0.97.0", + "httpx~=0.24.1", + "pyrfc6266~=1.0.2", +] +dynamic = ["version"] + +[build-system] +requires = ["setuptools", "setuptools-scm"] +build-backend = "setuptools.build_meta" + +[tool.pyright] +venvPath = '.' +venv = '.venv' + +[tool.black] +line-length = 79 +skip-string-normalization = true + +[tool.isort] +line_length = 79 +ensure_newline_before_comments = true +force_grid_wrap = 0 +include_trailing_comma = true +lines_after_imports = 2 +multi_line_output = 3 +use_parentheses = true + +[tool.mypy] +mypy_path = 'stubs' +namespace_packages = true +ignore_missing_imports = true +warn_redundant_casts = true +warn_unused_configs = true +warn_unused_ignores = true + +allow_untyped_globals = false +allow_redefinition = false +strict_optional = true +disallow_subclassing_any = true +disallow_any_generics = true +disallow_untyped_defs = true +disallow_incomplete_defs = true +check_untyped_defs = true +disallow_untyped_decorators = true +no_implicit_optional = true +warn_return_any = true