1
0
Fork 0

Import CSV files via HTTP importer

Since I ulimately want to run `xactfetch` in Kubernetes, running the
importer in a container as a child process doesn't make much sense.
While running `podman` in a Kubernetes container is possible, getting it
to work is non trivial.  Rather than go through all that effort, I think
it makes more sense to just use HTTP to communicate with the importer I
already have running.

I had originally chosen not to use the web importer because of how I
have it configured to use Authelia for authentication.  The importer
itself does not have any authentication beyond the "secret" parameter
(which is not secret at all, given that it is passed in the query string
and thus visible to anyone and stored in access logs), so I was hesitant
to add an access control rule to bypass authentication for the
`/autoupload` path.  Fortunately, I discovered that Authelia will use
the value of the `Proxy-Authorization` header to authenticate the
request without redirecting to the login screen.  With just a couple of
lines in the Ingress configuration, I got it to work using the regular
`Authorization` header as well:

```yaml
kind: Ingress
metadata:
  annotations:
    nginx.ingress.kubernetes.io/auth-snippet: |
      proxy_set_header Proxy-Authorization $http_authorization;
      proxy_set_header X-Forwarded-Method $request_method;
    nginx.ingress.kubernetes.io/configuration-snippet: |
      proxy_set_header Authorization "";
```
master
Dustin 2023-12-02 09:32:56 -06:00
parent ca8bff8fc5
commit ddee93c8e4
1 changed files with 75 additions and 56 deletions

View File

@ -1,14 +1,12 @@
import base64 import base64
import copy import copy
import datetime import datetime
import getpass
import json import json
import logging import logging
import os import os
import random import random
import shlex
import shutil
import subprocess import subprocess
import tempfile
import urllib.parse import urllib.parse
from pathlib import Path from pathlib import Path
from types import TracebackType from types import TracebackType
@ -25,6 +23,7 @@ log = logging.getLogger('xactfetch')
NTFY_URL = 'https://ntfy.pyrocufflink.net' NTFY_URL = 'https://ntfy.pyrocufflink.net'
NTFY_TOPIC = 'dustin' NTFY_TOPIC = 'dustin'
FIREFLY_III_URL = 'https://firefly.pyrocufflink.blue' FIREFLY_III_URL = 'https://firefly.pyrocufflink.blue'
FIREFLY_III_IMPORTER_URL = 'https://dustin.import.firefly.pyrocufflink.blue'
ACCOUNTS = { ACCOUNTS = {
'commerce': { 'commerce': {
'8357': 1, '8357': 1,
@ -34,6 +33,42 @@ ACCOUNTS = {
} }
class FireflyImporter:
def __init__(
self,
url: str,
secret: str,
auth: Optional[tuple[str, str]],
) -> None:
self.url = url
self.secret = secret
self.auth = auth
def import_csv(
self,
csv: Path,
config: dict[str, Any],
) -> None:
log.debug('Importing transactions from %s to Firefly III', csv)
url = f'{self.url.rstrip("/")}/autoupload'
with csv.open('rb') as f:
r = requests.post(
url,
auth=self.auth,
headers={
'Accept': 'application/json',
},
params={
'secret': self.secret,
},
files={
'importable': ('import.csv', f),
'json': ('import.json', json.dumps(config)),
},
)
r.raise_for_status()
def ntfy( def ntfy(
message: Optional[str] = None, message: Optional[str] = None,
topic: str = NTFY_TOPIC, topic: str = NTFY_TOPIC,
@ -132,48 +167,11 @@ def rfc2047_base64encode(
return f"=?UTF-8?B?{encoded}?=" return f"=?UTF-8?B?{encoded}?="
def firefly_import(csv: Path, config: dict[str, Any], token: str) -> None: def secret_from_file(env: str, default: str) -> str:
log.debug('Importing transactions from %s to Firefly III', csv) filename = os.environ.get(env, default)
env = { log.debug('Loading secret value from %s', filename)
'PATH': os.environ['PATH'], with open(filename, 'r', encoding='utf-8') as f:
'FIREFLY_III_ACCESS_TOKEN': token, return f.read().rstrip()
'IMPORT_DIR_ALLOWLIST': '/import',
'FIREFLY_III_URL': FIREFLY_III_URL,
'WEB_SERVER': 'false',
}
with tempfile.TemporaryDirectory() as tmpdir:
dest = Path(tmpdir) / 'import.csv'
log.debug('Copying %s to %s', csv, dest)
shutil.copyfile(csv, dest)
configfile = dest.with_suffix('.json')
log.debug('Saving config as %s', configfile)
with configfile.open('w', encoding='utf-8') as f:
json.dump(config, f)
cmd = [
'podman',
'run',
'--rm',
'-it',
'-v',
f'{tmpdir}:/import:ro,z',
'--env-host',
'docker.io/fireflyiii/data-importer',
]
if log.isEnabledFor(logging.DEBUG):
log.debug(
'Running command: %s',
' '.join(shlex.quote(str(a)) for a in cmd),
)
if os.environ.get('DEBUG_SKIP_IMPORT'):
cmd = ['true']
p = subprocess.run(cmd, env=env, check=False)
if p.returncode == 0:
log.info(
'Successfully imported transactions from %s to Firefly III',
csv,
)
else:
log.error('Failed to import transactions from %s')
def get_last_transaction_date(key: int, token: str) -> datetime.date: def get_last_transaction_date(key: int, token: str) -> datetime.date:
@ -204,7 +202,9 @@ def get_last_transaction_date(key: int, token: str) -> datetime.date:
return last_date.date() + datetime.timedelta(days=1) return last_date.date() + datetime.timedelta(days=1)
def download_chase(page: Page, end_date: datetime.date, token: str) -> bool: def download_chase(
page: Page, end_date: datetime.date, token: str, importer: FireflyImporter
) -> bool:
with Chase(page) as c, ntfyerror('Chase', page) as r: with Chase(page) as c, ntfyerror('Chase', page) as r:
c.login() c.login()
key = ACCOUNTS['chase'] key = ACCOUNTS['chase']
@ -224,11 +224,16 @@ def download_chase(page: Page, end_date: datetime.date, token: str) -> bool:
return True return True
csv = c.download_transactions(start_date, end_date) csv = c.download_transactions(start_date, end_date)
log.info('Importing transactions from Chase into Firefly III') log.info('Importing transactions from Chase into Firefly III')
c.firefly_import(csv, key, token) c.firefly_import(csv, key, importer)
return r.success return r.success
def download_commerce(page: Page, end_date: datetime.date, token: str) -> bool: def download_commerce(
page: Page,
end_date: datetime.date,
token: str,
importer: FireflyImporter,
) -> bool:
log.info('Downloading transaction lists from Commerce Bank') log.info('Downloading transaction lists from Commerce Bank')
csvs = [] csvs = []
with CommerceBank(page) as c, ntfyerror('Commerce Bank', page) as r: with CommerceBank(page) as c, ntfyerror('Commerce Bank', page) as r:
@ -259,7 +264,7 @@ def download_commerce(page: Page, end_date: datetime.date, token: str) -> bool:
csvs.append((key, c.download_transactions(start_date, end_date))) csvs.append((key, c.download_transactions(start_date, end_date)))
log.info('Importing transactions from Commerce Bank into Firefly III') log.info('Importing transactions from Commerce Bank into Firefly III')
for key, csv in csvs: for key, csv in csvs:
c.firefly_import(csv, key, token) c.firefly_import(csv, key, importer)
return r.success return r.success
@ -451,10 +456,12 @@ class CommerceBank:
modal.get_by_label('Close').click() modal.get_by_label('Close').click()
return path return path
def firefly_import(self, csv: Path, account: int, token: str) -> None: def firefly_import(
self, csv: Path, account: int, importer: FireflyImporter
) -> None:
config = copy.deepcopy(self.IMPORT_CONFIG) config = copy.deepcopy(self.IMPORT_CONFIG)
config['default_account'] = account config['default_account'] = account
firefly_import(csv, config, token) importer.import_csv(csv, config)
class Chase: class Chase:
@ -635,7 +642,9 @@ class Chase:
self.page.get_by_role('button', name='Sign out').click() self.page.get_by_role('button', name='Sign out').click()
log.info('Logged out of Chase') log.info('Logged out of Chase')
def firefly_import(self, csv: Path, account: int, token: str) -> None: def firefly_import(
self, csv: Path, account: int, importer: FireflyImporter
) -> None:
config = copy.deepcopy(self.IMPORT_CONFIG) config = copy.deepcopy(self.IMPORT_CONFIG)
config['default_account'] = account config['default_account'] = account
with csv.open('r', encoding='utf-8') as f: with csv.open('r', encoding='utf-8') as f:
@ -648,7 +657,7 @@ class Chase:
config['do_mapping'].pop(0) config['do_mapping'].pop(0)
else: else:
raise ValueError(f'Unexpected CSV schema: {headers}') raise ValueError(f'Unexpected CSV schema: {headers}')
firefly_import(csv, config, token) importer.import_csv(csv, config)
def main() -> None: def main() -> None:
@ -660,15 +669,25 @@ def main() -> None:
) )
log.debug('Getting Firefly III access token from rbw vault') log.debug('Getting Firefly III access token from rbw vault')
token = rbw_get('xactfetch') token = rbw_get('xactfetch')
import_secret = secret_from_file(
'FIREFLY_IMPORT_SECRET_FILE', 'import.secret'
)
import_auth = (
os.environ.get('FIREFLY_IMPORT_USER', getpass.getuser()),
secret_from_file('FIREFLY_IMPORT_PASSWORD_FILE', 'import.password'),
)
importer = FireflyImporter(
FIREFLY_III_IMPORTER_URL, import_secret, import_auth
)
end_date = datetime.date.today() - datetime.timedelta(days=1) end_date = datetime.date.today() - datetime.timedelta(days=1)
with sync_playwright() as pw: with sync_playwright() as pw:
headless = os.environ.get('DEBUG_HEADLESS_BROWSER', '1') == '1' headless = os.environ.get('DEBUG_HEADLESS_BROWSER', '1') == '1'
browser = pw.firefox.launch(headless=headless) browser = pw.firefox.launch(headless=headless)
page = browser.new_page() page = browser.new_page()
failed = False failed = False
if not download_commerce(page, end_date, token): if not download_commerce(page, end_date, token, importer):
failed = True failed = True
if not download_chase(page, end_date, token): if not download_chase(page, end_date, token, importer):
failed = True failed = True
raise SystemExit(1 if failed else 0) raise SystemExit(1 if failed else 0)