Compare commits

...

3 Commits

Author SHA1 Message Date
0b1b0a72c3 wip: view recorded pages 2025-04-18 10:17:03 -05:00
666ae19efd page: Fix tracing span in save_page
Span guards (i.e. `Span::enter`) cannot be used in asynchronous
methods:

> The drop guard returned by Span::enter exits the span when it is
> dropped. When an async function or async block yields at an .await
> point, the current scope is exited, but values in that scope are not
> dropped (because the async block will eventually resume execution
> from that await point). This means that another task will begin
> executing while remaining in the entered span. This results in an
> incorrect trace.

https://docs.rs/tracing/0.1.41/tracing/span/struct.Span.html#in-asynchronous-code

Instead, we can use the `instrument` attribute macro, which correctly
instruments asynchronous functions.  I originally did not use this
macro because it logs all function parameters' values by default, which
means every log line emitted while in the span contained the entire
contents of the page being saved.  Fortunately, I discovered you can
omit certain parameters using `skip`.
2025-04-18 10:15:10 -05:00
9375c5d991 wip: index page 2025-04-08 21:50:14 -05:00
9 changed files with 723 additions and 33 deletions

5
.editorconfig Normal file
View File

@@ -0,0 +1,5 @@
root = true
[*.html.tera]
indent_style = space
indent_size = 2

452
Cargo.lock generated
View File

@@ -1,6 +1,6 @@
# This file is automatically @generated by Cargo. # This file is automatically @generated by Cargo.
# It is not intended for manual editing. # It is not intended for manual editing.
version = 3 version = 4
[[package]] [[package]]
name = "addr2line" name = "addr2line"
@@ -181,6 +181,12 @@ version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "383d29d513d8764dcdc42ea295d979eb99c3c9f00607b3692cf68a431f7dca72" checksum = "383d29d513d8764dcdc42ea295d979eb99c3c9f00607b3692cf68a431f7dca72"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]] [[package]]
name = "bitflags" name = "bitflags"
version = "2.9.0" version = "2.9.0"
@@ -196,6 +202,16 @@ dependencies = [
"generic-array", "generic-array",
] ]
[[package]]
name = "bstr"
version = "1.12.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "234113d19d0d7d613b40e86fb654acf958910802bcceab913a4f9e7cda03b1a4"
dependencies = [
"memchr",
"serde",
]
[[package]] [[package]]
name = "bumpalo" name = "bumpalo"
version = "3.17.0" version = "3.17.0"
@@ -256,6 +272,28 @@ dependencies = [
"windows-link", "windows-link",
] ]
[[package]]
name = "chrono-tz"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93698b29de5e97ad0ae26447b344c482a7284c737d9ddc5f9e52b74a336671bb"
dependencies = [
"chrono",
"chrono-tz-build",
"phf",
]
[[package]]
name = "chrono-tz-build"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0c088aee841df9c3041febbb73934cfc39708749bf96dc827e3359cd39ef11b1"
dependencies = [
"parse-zoneinfo",
"phf",
"phf_codegen",
]
[[package]] [[package]]
name = "cipher" name = "cipher"
version = "0.4.4" version = "0.4.4"
@@ -341,6 +379,40 @@ dependencies = [
"libc", "libc",
] ]
[[package]]
name = "crossbeam-channel"
version = "0.5.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "82b8f8f868b36967f9606790d1903570de9ceaf870a7bf9fbbd3016d636a2cb2"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dd111b7b7f7d55b72c0a6ae361660ee5853c9af73f70c3c2ef6858b950e2e51"
dependencies = [
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b82ac4a3c2ca9c3460964f020e1402edd5753411d7737aa39c3714ad1b5420e"
dependencies = [
"crossbeam-utils",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d0a5c400df2834b80a4c3327b3aad3a4c4cd4de0629063962b03235697506a28"
[[package]] [[package]]
name = "crypto-bigint" name = "crypto-bigint"
version = "0.5.5" version = "0.5.5"
@@ -490,6 +562,12 @@ dependencies = [
"syn", "syn",
] ]
[[package]]
name = "deunicode"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dc55fe0d1f6c107595572ec8b107c0999bb1a2e0b75e37429a4fb0d6474a0e7d"
[[package]] [[package]]
name = "devise" name = "devise"
version = "0.4.2" version = "0.4.2"
@@ -516,7 +594,7 @@ version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b035a542cf7abf01f2e3c4d5a7acbaebfefe120ae4efc7bde3df98186e4b8af7" checksum = "b035a542cf7abf01f2e3c4d5a7acbaebfefe120ae4efc7bde3df98186e4b8af7"
dependencies = [ dependencies = [
"bitflags", "bitflags 2.9.0",
"proc-macro2", "proc-macro2",
"proc-macro2-diagnostics", "proc-macro2-diagnostics",
"quote", "quote",
@@ -711,6 +789,18 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "filetime"
version = "0.2.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "35c0522e981e68cbfa8c3f978441a5f34b30b96e146b33cd3359176b50fe8586"
dependencies = [
"cfg-if",
"libc",
"libredox",
"windows-sys 0.59.0",
]
[[package]] [[package]]
name = "fnv" name = "fnv"
version = "1.0.7" version = "1.0.7"
@@ -741,6 +831,15 @@ dependencies = [
"percent-encoding", "percent-encoding",
] ]
[[package]]
name = "fsevent-sys"
version = "4.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76ee7a02da4d231650c7cea31349b889be2f45ddb3ef3032d2ec8185f6313fd2"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "futf" name = "futf"
version = "0.1.5" version = "0.1.5"
@@ -922,6 +1021,30 @@ version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2"
[[package]]
name = "globset"
version = "0.4.16"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "54a1028dfc5f5df5da8a56a73e6c153c9a9708ec57232470703592a3f18e49f5"
dependencies = [
"aho-corasick",
"bstr",
"log",
"regex-automata 0.4.9",
"regex-syntax 0.8.5",
]
[[package]]
name = "globwalk"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0bf760ebf69878d9fd8f110c89703d90ce35095324d1f1edcb595c63945ee757"
dependencies = [
"bitflags 2.9.0",
"ignore",
"walkdir",
]
[[package]] [[package]]
name = "group" name = "group"
version = "0.13.0" version = "0.13.0"
@@ -1113,6 +1236,15 @@ version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
[[package]]
name = "humansize"
version = "2.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6cb51c9a029ddc91b07a787f1d86b53ccfa49b0e86688c946ebe8d3555685dd7"
dependencies = [
"libm",
]
[[package]] [[package]]
name = "hyper" name = "hyper"
version = "0.14.32" version = "0.14.32"
@@ -1380,6 +1512,22 @@ dependencies = [
"icu_properties", "icu_properties",
] ]
[[package]]
name = "ignore"
version = "0.4.23"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d89fd380afde86567dfba715db065673989d6253f42b88179abd3eae47bda4b"
dependencies = [
"crossbeam-deque",
"globset",
"log",
"memchr",
"regex-automata 0.4.9",
"same-file",
"walkdir",
"winapi-util",
]
[[package]] [[package]]
name = "indexmap" name = "indexmap"
version = "1.9.3" version = "1.9.3"
@@ -1408,6 +1556,26 @@ version = "0.1.15"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb" checksum = "c8fae54786f62fb2918dcfae3d568594e50eb9b5c25bf04371af6fe7516452fb"
[[package]]
name = "inotify"
version = "0.9.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8069d3ec154eb856955c1c0fbffefbf5f3c40a104ec912d4797314c1801abff"
dependencies = [
"bitflags 1.3.2",
"inotify-sys",
"libc",
]
[[package]]
name = "inotify-sys"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e05c02b5e89bff3b946cedeca278abc628fe811e604f027c45a8aa3cf793d0eb"
dependencies = [
"libc",
]
[[package]] [[package]]
name = "inout" name = "inout"
version = "0.1.4" version = "0.1.4"
@@ -1481,6 +1649,26 @@ dependencies = [
"serde_json", "serde_json",
] ]
[[package]]
name = "kqueue"
version = "1.0.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7447f1ca1b7b563588a205fe93dea8df60fd981423a768bc1c0ded35ed147d0c"
dependencies = [
"kqueue-sys",
"libc",
]
[[package]]
name = "kqueue-sys"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed9625ffda8729b85e45cf04090035ac368927b8cebc34898e7c120f52e4838b"
dependencies = [
"bitflags 1.3.2",
"libc",
]
[[package]] [[package]]
name = "lazy_static" name = "lazy_static"
version = "1.5.0" version = "1.5.0"
@@ -1502,6 +1690,17 @@ version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa" checksum = "8355be11b20d696c8f18f6cc018c4e372165b1fa8126cef092399c9951984ffa"
[[package]]
name = "libredox"
version = "0.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d"
dependencies = [
"bitflags 2.9.0",
"libc",
"redox_syscall",
]
[[package]] [[package]]
name = "linux-raw-sys" name = "linux-raw-sys"
version = "0.9.3" version = "0.9.3"
@@ -1678,6 +1877,18 @@ dependencies = [
"adler2", "adler2",
] ]
[[package]]
name = "mio"
version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a4a650543ca06a924e8b371db273b2756685faae30f8487da1b56505a8f78b0c"
dependencies = [
"libc",
"log",
"wasi 0.11.0+wasi-snapshot-preview1",
"windows-sys 0.48.0",
]
[[package]] [[package]]
name = "mio" name = "mio"
version = "1.0.3" version = "1.0.3"
@@ -1740,6 +1951,34 @@ dependencies = [
"memchr", "memchr",
] ]
[[package]]
name = "normpath"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8911957c4b1549ac0dc74e30db9c8b0e66ddcd6d7acc33098f4c63a64a6d7ed"
dependencies = [
"windows-sys 0.59.0",
]
[[package]]
name = "notify"
version = "6.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6205bd8bb1e454ad2e27422015fb5e4f2bcc7e08fa8f27058670d208324a4d2d"
dependencies = [
"bitflags 2.9.0",
"crossbeam-channel",
"filetime",
"fsevent-sys",
"inotify",
"kqueue",
"libc",
"log",
"mio 0.8.11",
"walkdir",
"windows-sys 0.48.0",
]
[[package]] [[package]]
name = "nu-ansi-term" name = "nu-ansi-term"
version = "0.46.0" version = "0.46.0"
@@ -1891,7 +2130,7 @@ version = "0.10.72"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da"
dependencies = [ dependencies = [
"bitflags", "bitflags 2.9.0",
"cfg-if", "cfg-if",
"foreign-types", "foreign-types",
"libc", "libc",
@@ -1991,6 +2230,15 @@ dependencies = [
"windows-targets 0.52.6", "windows-targets 0.52.6",
] ]
[[package]]
name = "parse-zoneinfo"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f2a05b18d44e2957b88f96ba460715e295bc1d7510468a2f3d3b44535d26c24"
dependencies = [
"regex",
]
[[package]] [[package]]
name = "pear" name = "pear"
version = "0.2.9" version = "0.2.9"
@@ -2029,6 +2277,51 @@ version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "pest"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "198db74531d58c70a361c42201efde7e2591e976d518caf7662a47dc5720e7b6"
dependencies = [
"memchr",
"thiserror 2.0.12",
"ucd-trie",
]
[[package]]
name = "pest_derive"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d725d9cfd79e87dccc9341a2ef39d1b6f6353d68c4b33c177febbe1a402c97c5"
dependencies = [
"pest",
"pest_generator",
]
[[package]]
name = "pest_generator"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "db7d01726be8ab66ab32f9df467ae8b1148906685bbe75c82d1e65d7f5b3f841"
dependencies = [
"pest",
"pest_meta",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "pest_meta"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f9f832470494906d1fca5329f8ab5791cc60beb230c74815dff541cbd2b5ca0"
dependencies = [
"once_cell",
"pest",
"sha2",
]
[[package]] [[package]]
name = "phf" name = "phf"
version = "0.11.3" version = "0.11.3"
@@ -2335,7 +2628,7 @@ version = "0.5.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1" checksum = "0b8c0c260b63a8219631167be35e6a988e9554dbd323f8bd08439c8ed1302bd1"
dependencies = [ dependencies = [
"bitflags", "bitflags 2.9.0",
] ]
[[package]] [[package]]
@@ -2534,6 +2827,19 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "rocket_dyn_templates"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bbab919c9e67df3f7ac6624a32ef897df4cd61c0969f4d66f3ced0534660d7a"
dependencies = [
"normpath",
"notify",
"rocket",
"tera",
"walkdir",
]
[[package]] [[package]]
name = "rocket_http" name = "rocket_http"
version = "0.5.1" version = "0.5.1"
@@ -2608,7 +2914,7 @@ version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf" checksum = "d97817398dd4bb2e6da002002db259209759911da105da92bec29ccb12cf58bf"
dependencies = [ dependencies = [
"bitflags", "bitflags 2.9.0",
"errno", "errno",
"libc", "libc",
"linux-raw-sys", "linux-raw-sys",
@@ -2670,6 +2976,15 @@ version = "1.0.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]] [[package]]
name = "schannel" name = "schannel"
version = "0.1.27" version = "0.1.27"
@@ -2725,7 +3040,7 @@ version = "2.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02"
dependencies = [ dependencies = [
"bitflags", "bitflags 2.9.0",
"core-foundation", "core-foundation",
"core-foundation-sys", "core-foundation-sys",
"libc", "libc",
@@ -2756,6 +3071,7 @@ dependencies = [
"rand 0.9.0", "rand 0.9.0",
"reqwest", "reqwest",
"rocket", "rocket",
"rocket_dyn_templates",
"scraper", "scraper",
"serde", "serde",
"thiserror 2.0.12", "thiserror 2.0.12",
@@ -2769,7 +3085,7 @@ version = "0.26.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8" checksum = "fd568a4c9bb598e291a08244a5c1f5a8a6650bee243b5b0f8dbb3d9cc1d87fe8"
dependencies = [ dependencies = [
"bitflags", "bitflags 2.9.0",
"cssparser", "cssparser",
"derive_more", "derive_more",
"fxhash", "fxhash",
@@ -2969,6 +3285,16 @@ dependencies = [
"autocfg", "autocfg",
] ]
[[package]]
name = "slug"
version = "0.1.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "882a80f72ee45de3cc9a5afeb2da0331d58df69e4e7d8eeb5d3c7784ae67e724"
dependencies = [
"deunicode",
"wasm-bindgen",
]
[[package]] [[package]]
name = "smallvec" name = "smallvec"
version = "1.14.0" version = "1.14.0"
@@ -3122,7 +3448,7 @@ version = "0.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b"
dependencies = [ dependencies = [
"bitflags", "bitflags 2.9.0",
"core-foundation", "core-foundation",
"system-configuration-sys", "system-configuration-sys",
] ]
@@ -3161,6 +3487,28 @@ dependencies = [
"utf-8", "utf-8",
] ]
[[package]]
name = "tera"
version = "1.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab9d851b45e865f178319da0abdbfe6acbc4328759ff18dafc3a41c16b4cd2ee"
dependencies = [
"chrono",
"chrono-tz",
"globwalk",
"humansize",
"lazy_static",
"percent-encoding",
"pest",
"pest_derive",
"rand 0.8.5",
"regex",
"serde",
"serde_json",
"slug",
"unic-segment",
]
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.69" version = "1.0.69"
@@ -3276,7 +3624,7 @@ dependencies = [
"backtrace", "backtrace",
"bytes", "bytes",
"libc", "libc",
"mio", "mio 1.0.3",
"pin-project-lite", "pin-project-lite",
"signal-hook-registry", "signal-hook-registry",
"socket2", "socket2",
@@ -3482,6 +3830,12 @@ dependencies = [
"serde", "serde",
] ]
[[package]]
name = "ucd-trie"
version = "0.1.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2896d95c02a80c6d6a5d6e953d479f5ddf2dfdb6a244441010e373ac0fb88971"
[[package]] [[package]]
name = "uncased" name = "uncased"
version = "0.9.10" version = "0.9.10"
@@ -3492,6 +3846,56 @@ dependencies = [
"version_check", "version_check",
] ]
[[package]]
name = "unic-char-property"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a8c57a407d9b6fa02b4795eb81c5b6652060a15a7903ea981f3d723e6c0be221"
dependencies = [
"unic-char-range",
]
[[package]]
name = "unic-char-range"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0398022d5f700414f6b899e10b8348231abf9173fa93144cbc1a43b9793c1fbc"
[[package]]
name = "unic-common"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "80d7ff825a6a654ee85a63e80f92f054f904f21e7d12da4e22f9834a4aaa35bc"
[[package]]
name = "unic-segment"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4ed5d26be57f84f176157270c112ef57b86debac9cd21daaabbe56db0f88f23"
dependencies = [
"unic-ucd-segment",
]
[[package]]
name = "unic-ucd-segment"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2079c122a62205b421f499da10f3ee0f7697f012f55b675e002483c73ea34700"
dependencies = [
"unic-char-property",
"unic-char-range",
"unic-ucd-version",
]
[[package]]
name = "unic-ucd-version"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96bd2f2237fe450fcd0a1d2f5f4e91711124f7857ba2e964247776ebeeb7b0c4"
dependencies = [
"unic-common",
]
[[package]] [[package]]
name = "unicode-ident" name = "unicode-ident"
version = "1.0.18" version = "1.0.18"
@@ -3585,6 +3989,16 @@ version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a"
[[package]]
name = "walkdir"
version = "2.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "29790946404f91d9c5d06f9874efddea1dc06c5efe94541a7d6863108e3a5e4b"
dependencies = [
"same-file",
"winapi-util",
]
[[package]] [[package]]
name = "want" name = "want"
version = "0.3.1" version = "0.3.1"
@@ -3738,6 +4152,15 @@ version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6" checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb"
dependencies = [
"windows-sys 0.59.0",
]
[[package]] [[package]]
name = "winapi-x86_64-pc-windows-gnu" name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0" version = "0.4.0"
@@ -3832,6 +4255,15 @@ dependencies = [
"windows-link", "windows-link",
] ]
[[package]]
name = "windows-sys"
version = "0.48.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "677d2418bec65e3338edb076e806bc1ec15693c5d0104683f2efe857f61056a9"
dependencies = [
"windows-targets 0.48.5",
]
[[package]] [[package]]
name = "windows-sys" name = "windows-sys"
version = "0.52.0" version = "0.52.0"
@@ -4050,7 +4482,7 @@ version = "0.39.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1"
dependencies = [ dependencies = [
"bitflags", "bitflags 2.9.0",
] ]
[[package]] [[package]]

View File

@@ -13,6 +13,7 @@ openidconnect = { version = "4.0.0", default-features = false, features = ["reqw
rand = "0.9.0" rand = "0.9.0"
reqwest = { version = "0.12.15", features = ["json", "native-tls"] } reqwest = { version = "0.12.15", features = ["json", "native-tls"] }
rocket = { version = "0.5.1", features = ["json", "secrets"] } rocket = { version = "0.5.1", features = ["json", "secrets"] }
rocket_dyn_templates = { version = "0.2.0", features = ["tera"] }
serde = { version = "1.0.219", features = ["derive"] } serde = { version = "1.0.219", features = ["derive"] }
thiserror = "2.0.12" thiserror = "2.0.12"
tracing = "0.1.41" tracing = "0.1.41"

View File

@@ -1,6 +1,6 @@
use std::path::Path;
use std::collections::HashMap; use std::collections::HashMap;
use std::io::Read; use std::io::Read;
use std::path::Path;
use std::time::SystemTime; use std::time::SystemTime;
use jsonwebtoken::{ use jsonwebtoken::{
@@ -47,9 +47,7 @@ pub struct User {
impl User { impl User {
pub fn new(id: &'static str) -> Self { pub fn new(id: &'static str) -> Self {
Self { Self { id: id.into() }
id: id.into(),
}
} }
/// Return the user ID /// Return the user ID
@@ -198,7 +196,9 @@ pub async fn get_oidc_client(
} }
/// Load a secret from the specified path /// Load a secret from the specified path
pub fn load_secret<P: AsRef<Path>>(path: P) -> Result<Vec<u8>, std::io::Error> { pub fn load_secret<P: AsRef<Path>>(
path: P,
) -> Result<Vec<u8>, std::io::Error> {
let mut secret = vec![]; let mut secret = vec![];
let mut f = std::fs::File::open(path)?; let mut f = std::fs::File::open(path)?;
f.read_to_end(&mut secret)?; f.read_to_end(&mut secret)?;
@@ -360,6 +360,7 @@ pub async fn oidc_callback(
cookies: &CookieJar<'_>, cookies: &CookieJar<'_>,
ctx: &State<Context>, ctx: &State<Context>,
config: &State<Config>, config: &State<Config>,
rconfig: &RocketConfig,
) -> Result<Redirect, LoginFailed> { ) -> Result<Redirect, LoginFailed> {
trace!("{:?}", params); trace!("{:?}", params);
let state = cookies let state = cookies
@@ -390,7 +391,12 @@ pub async fn oidc_callback(
ctx.make_jwt(&user, config.auth.login_ttl) ctx.make_jwt(&user, config.auth.login_ttl)
.map_err(LoginError::from)?, .map_err(LoginError::from)?,
)) ))
.secure(true) .secure(rconfig.profile != RocketConfig::DEBUG_PROFILE)
.same_site(if rconfig.profile == RocketConfig::DEBUG_PROFILE {
SameSite::Lax
} else {
SameSite::Strict
})
.http_only(true) .http_only(true)
.expires(expires) .expires(expires)
.build(), .build(),

View File

@@ -7,6 +7,7 @@ pub mod page;
use meilisearch_sdk::client::Client as MeilisearchClient; use meilisearch_sdk::client::Client as MeilisearchClient;
use rocket::fairing::{self, AdHoc}; use rocket::fairing::{self, AdHoc};
use rocket::Rocket; use rocket::Rocket;
use rocket_dyn_templates::Template;
use tracing::error; use tracing::error;
use config::Config; use config::Config;
@@ -98,6 +99,8 @@ pub fn rocket() -> Rocket<rocket::Build> {
.mount( .mount(
"/", "/",
rocket::routes![ rocket::routes![
page::index,
page::get_page,
auth::oidc_callback, auth::oidc_callback,
auth::oidc_login, auth::oidc_login,
page::post_page page::post_page
@@ -106,4 +109,5 @@ pub fn rocket() -> Rocket<rocket::Build> {
.attach(AdHoc::config::<Config>()) .attach(AdHoc::config::<Config>())
.attach(AdHoc::try_on_ignite("Initialize context", init_context)) .attach(AdHoc::try_on_ignite("Initialize context", init_context))
.attach(AdHoc::try_on_ignite("Meilisearch Setup", meilisearch_setup)) .attach(AdHoc::try_on_ignite("Meilisearch Setup", meilisearch_setup))
.attach(Template::fairing())
} }

View File

@@ -20,7 +20,10 @@ impl TryFrom<&Config> for Client {
Some(t) => Some(std::fs::read_to_string(t).map_err(Error::Token)?), Some(t) => Some(std::fs::read_to_string(t).map_err(Error::Token)?),
None => None, None => None,
}; };
Ok(Client::new(&config.meilisearch.url, token.as_deref().map(str::trim))?) Ok(Client::new(
&config.meilisearch.url,
token.as_deref().map(str::trim),
)?)
} }
} }
@@ -31,24 +34,35 @@ pub async fn ensure_index(
match client.get_index(name).await { match client.get_index(name).await {
Ok(_) => { Ok(_) => {
debug!("Meilisearch index '{}' already exists", name); debug!("Meilisearch index '{}' already exists", name);
Ok(())
}, },
Err(MeilisearchError::Meilisearch(e)) Err(MeilisearchError::Meilisearch(e))
if e.error_code == ErrorCode::IndexNotFound => if e.error_code == ErrorCode::IndexNotFound =>
{ {
info!("Creating Meilisearch index: {}", name); info!("Creating Meilisearch index: {}", name);
if let Err(e) = create_index(client, name).await { create_index(client, name).await.inspect_err(|e| {
error!("Failed to create index: {}", e); error!("Failed to create index: {}", e);
Err(e) })?;
} else {
Ok(())
}
}, },
Err(e) => { Err(e) => {
error!("Failed to check index: {}", e); error!("Failed to check index: {}", e);
Err(e) return Err(e);
}, },
} };
client
.index(name)
.set_sortable_attributes(&["timestamp", "title"])
.await
.inspect_err(|e| {
error!("Failed to set index sortable attributes: {}", e);
})?;
client
.index(name)
.set_filterable_attributes(&["id", "user_id", "timestamp"])
.await
.inspect_err(|e| {
error!("Failed to set index filterable attributes: {}", e);
})?;
Ok(())
} }
async fn create_index( async fn create_index(

View File

@@ -2,13 +2,17 @@ use chrono::{DateTime, Utc};
use html5ever::parse_document; use html5ever::parse_document;
use html5ever::tendril::TendrilSink; use html5ever::tendril::TendrilSink;
use markup5ever_rcdom::{Handle, NodeData, RcDom}; use markup5ever_rcdom::{Handle, NodeData, RcDom};
use meilisearch_sdk::documents::DocumentsQuery;
use meilisearch_sdk::errors::Error; use meilisearch_sdk::errors::Error;
use meilisearch_sdk::search::{SearchResults, Selectors};
use rand::Rng; use rand::Rng;
use rocket::form::Form; use rocket::form::Form;
use rocket::response::Redirect;
use rocket::serde::json::Json; use rocket::serde::json::Json;
use rocket::State; use rocket::State;
use serde::Serialize; use rocket_dyn_templates::{context, Template};
use tracing::{debug, error, event, span, Level}; use serde::{Deserialize, Serialize};
use tracing::{debug, error, event, instrument, Level};
use crate::auth::User; use crate::auth::User;
use crate::config::Config; use crate::config::Config;
@@ -17,14 +21,16 @@ use crate::Context;
static ID_CHARSET: &[u8] = b"0123456789abcdefghijklmnopqrstuvwxyz"; static ID_CHARSET: &[u8] = b"0123456789abcdefghijklmnopqrstuvwxyz";
/// A saved page /// A saved page
#[derive(Clone, Serialize)] #[derive(Clone, Deserialize, Serialize)]
pub struct Page { pub struct Page {
/// Unique saved page ID /// Unique saved page ID
id: String, id: String,
/// User ID of page owner /// User ID of page owner
user_id: String, user_id: String,
/// Visit timestamp /// Visit date/time
timestamp: DateTime<Utc>, datetime: DateTime<Utc>,
/// Visit date/time (as Unix timestamp, for sorting)
timestamp: i64,
/// Page URL /// Page URL
url: String, url: String,
/// Page title (extracted from HTML document) /// Page title (extracted from HTML document)
@@ -33,6 +39,21 @@ pub struct Page {
data: String, data: String,
} }
/// Saved page info (no contents)
#[derive(Clone, Deserialize, Serialize)]
pub struct PageInfo {
/// Unique saved page ID
id: String,
/// User ID of page owner
user_id: String,
/// Visit date/time
datetime: DateTime<Utc>,
/// Page URL
url: String,
/// Page title (extracted from HTML document)
title: Option<String>,
}
/// Save page form /// Save page form
#[derive(rocket::FromForm)] #[derive(rocket::FromForm)]
pub struct SavePageForm { pub struct SavePageForm {
@@ -42,6 +63,75 @@ pub struct SavePageForm {
data: String, data: String,
} }
#[derive(rocket::Responder)]
pub enum IndexResponse {
Success(Template),
#[response(status = 500)]
Error(Template),
}
#[rocket::get("/")]
pub async fn index(
user: Option<User>,
ctx: &State<Context>,
config: &State<Config>,
) -> Result<IndexResponse, Redirect> {
if let Some(user) = user {
match get_page_list(&user, ctx, config).await {
Ok(r) => Ok(IndexResponse::Success(Template::render(
"index",
context! {
user: user,
pages: r.hits.into_iter().map(|r| r.result).collect::<Vec<_>>(),
total: r.estimated_total_hits,
},
))),
Err(e) => {
error!("Failed to retrieve page list: {}", e);
Ok(IndexResponse::Error(Template::render(
"error",
context! {
error: e.to_string(),
},
)))
},
}
} else {
Err(Redirect::to(rocket::uri![crate::auth::oidc_login]))
}
}
#[derive(rocket::Responder)]
pub enum PageResponse {
#[response(content_type = "html")]
Success(String),
#[response(status = 500)]
Error(Template),
}
#[rocket::get("/page/<id>")]
pub async fn get_page(
id: &str,
user: Option<User>,
ctx: &State<Context>,
config: &State<Config>,
) -> Result<Option<PageResponse>, Redirect> {
if let Some(user) = user {
match _get_page(id, &user, ctx, config).await {
Ok(Some(p)) => Ok(Some(PageResponse::Success(p.data))),
Ok(None) => Ok(None),
Err(e) => Ok(Some(PageResponse::Error(Template::render(
"error",
context! {
error: e.to_string(),
},
)))),
}
} else {
Err(Redirect::to(rocket::uri![crate::auth::oidc_login]))
}
}
/// Save a visited page in SingleFile format /// Save a visited page in SingleFile format
#[rocket::post("/save", data = "<form>")] #[rocket::post("/save", data = "<form>")]
pub async fn post_page( pub async fn post_page(
@@ -59,7 +149,51 @@ pub async fn post_page(
} }
} }
async fn get_page_list(
user: &User,
ctx: &Context,
config: &Config,
) -> Result<SearchResults<PageInfo>, Error> {
let index_name = &config.meilisearch.index;
debug!(
"Searching for pages in Meilisearch index {} owned by user {}",
index_name,
user.id()
);
let index = ctx.client.index(index_name);
let documents = index
.search()
.with_attributes_to_retrieve(Selectors::Some(&[
"id", "user_id", "datetime", "title", "url",
]))
.with_filter(&format!("user_id = {}", user.id()))
.with_sort(&["timestamp:desc"])
.execute()
.await?;
Ok(documents)
}
async fn _get_page(
id: &str,
user: &User,
ctx: &Context,
config: &Config,
) -> Result<Option<Page>, Error> {
let index_name = &config.meilisearch.index;
debug!(
"Retreiving page {} from Meilisearch index {}",
id, index_name,
);
let index = ctx.client.index(index_name);
let mut results = DocumentsQuery::new(&index)
.with_filter(&format!("user_id = {} AND id = {}", user.id(), id))
.execute()
.await?;
Ok(results.results.pop())
}
/// Save the page /// Save the page
#[instrument(level = "info", skip(data, ctx, config))]
pub async fn save_page( pub async fn save_page(
url: &str, url: &str,
data: &str, data: &str,
@@ -67,15 +201,15 @@ pub async fn save_page(
config: &Config, config: &Config,
user: &str, user: &str,
) -> Result<Page, Error> { ) -> Result<Page, Error> {
let span = span!(Level::INFO, "save_page", url = url, user = user);
let _guard = span.enter();
let index_name = &config.meilisearch.index; let index_name = &config.meilisearch.index;
debug!("Saving page in Meilisearch index {}", index_name); debug!("Saving page in Meilisearch index {}", index_name);
let index = ctx.client.get_index(index_name).await?; let index = ctx.client.get_index(index_name).await?;
let now = Utc::now();
let doc = Page { let doc = Page {
id: gen_id(), id: gen_id(),
user_id: user.into(), user_id: user.into(),
timestamp: Utc::now(), datetime: now,
timestamp: now.timestamp(),
url: url.into(), url: url.into(),
title: extract_title(data), title: extract_title(data),
data: data.into(), data: data.into(),

69
templates/index.html.tera Normal file
View File

@@ -0,0 +1,69 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta charset="utf-8" />
<title>seensite</title>
<style>
table {
width: 100%;
max-width: 100%;
}
col.timestamp {
width: 1px;
}
td {
white-space: nowrap;
padding: 0.5em;
}
td.title {
padding-right: 2em;
}
td.title,
td.url {
white-space: unset;
}
td.title div,
td.url div {
display: -webkit-box;
-webkit-line-clamp: 1;
-webkit-box-orient: vertical;
line-clamp: 1;
overflow: hidden;
}
</style>
</head>
<body>
<h1>Welcome</h1>
<p>Displaying {{ pages | length }} of {{ total }} pages.</p>
<table>
<colgroup>
<col class="timestamp"/>
<col class="title"/>
<col class="url" />
</colgroup>
<thead>
<tr>
<th scope="col">Date</th>
<th scope="col">Title</th>
<th scope="col">URL</th>
</tr>
</thead>
<tbody>
{% for page in pages -%}
<tr>
<td class="timestamp"><a
href="/page/{{ page.id }}"
>{{ page.datetime | date(format="%x %X") }}</a
></td>
<td class="title"><div>{{ page.title }}</div></td>
<td class="url"><div><a
href="{{ page.url }}">{{ page.url }}</a
></div></td>
</tr>
{%- endfor %}
</tbody>
</body>
</html>

View File

@@ -63,3 +63,28 @@ fn test_post_page_unauth() {
let res = req.dispatch(); let res = req.dispatch();
assert_eq!(res.status(), Status::Unauthorized); assert_eq!(res.status(), Status::Unauthorized);
} }
#[test]
fn test_index_unauth() {
super::setup();
let client = Client::tracked(seensite::rocket()).unwrap();
let req = client.get(uri![index]);
let res = req.dispatch();
assert_eq!(res.status(), Status::SeeOther);
let location = res.headers().get_one("Location").unwrap();
assert_eq!(location, "/login");
}
#[test]
fn test_index_auth() {
super::setup();
let client = Client::tracked(seensite::rocket()).unwrap();
let ctx: &Context = client.rocket().state().unwrap();
let user = User::new("test1");
let token = ctx.make_jwt(&user, 60).unwrap();
let req = client
.get(uri![index])
.header(Header::new("Authorization", format!("Bearer {}", token)));
let res = req.dispatch();
assert_eq!(res.status(), Status::Ok);
}