Initial commit

This commit is contained in:
2025-04-03 19:24:45 -05:00
commit 03fe19aa9b
8 changed files with 3326 additions and 0 deletions

3
.gitignore vendored Normal file
View File

@@ -0,0 +1,3 @@
/target
/Rocket.toml
/meilisearch.token

3039
Cargo.lock generated Normal file

File diff suppressed because it is too large Load Diff

16
Cargo.toml Normal file
View File

@@ -0,0 +1,16 @@
[package]
name = "seensite"
version = "0.1.0"
edition = "2021"
[dependencies]
chrono = { version = "0.4.40", default-features = false, features = ["std", "clock", "serde"] }
html5ever = "0.27.0"
markup5ever_rcdom = "0.3.0"
meilisearch-sdk = "0.28.0"
rand = "0.9.0"
rocket = { version = "0.5.1", features = ["json"] }
serde = { version = "1.0.219", features = ["derive"] }
thiserror = "2.0.12"
tracing = "0.1.41"
tracing-subscriber = { version = "0.3.19", features = ["env-filter"] }

2
rustfmt.toml Normal file
View File

@@ -0,0 +1,2 @@
match_block_trailing_comma = true
max_width = 79

19
src/config.rs Normal file
View File

@@ -0,0 +1,19 @@
use serde::Deserialize;
#[derive(Debug, Deserialize)]
pub struct MeilisearchConfig {
pub url: String,
#[serde(default)]
pub token: Option<String>,
#[serde(default = "default_index")]
pub index: String,
}
#[derive(Debug, Deserialize)]
pub struct Config {
pub meilisearch: MeilisearchConfig,
}
fn default_index() -> String {
env!("CARGO_PKG_NAME").into()
}

88
src/main.rs Normal file
View File

@@ -0,0 +1,88 @@
mod config;
mod meilisearch;
mod page;
use meilisearch_sdk::client::Client as MeilisearchClient;
use rocket::fairing::{self, AdHoc};
use rocket::form::Form;
use rocket::serde::json::Json;
use rocket::{Rocket, State};
use tracing::error;
use config::Config;
struct Context {
#[allow(dead_code)]
config: Config,
client: MeilisearchClient,
}
#[derive(Debug, thiserror::Error)]
enum InitError {
#[error("Meilisearch error: {0}")]
Meilisearch(#[from] meilisearch::Error),
}
impl Context {
pub fn init(config: Config) -> Result<Self, InitError> {
let client = MeilisearchClient::try_from(&config)?;
Ok(Self { config, client })
}
}
/// Save page form
#[derive(rocket::FromForm)]
pub struct SavePageForm {
/// Page URL
url: String,
/// Page content (SingleFile HTML)
data: String,
}
/// Save a visited page in SingleFile format
#[rocket::post("/save", data = "<form>")]
async fn save_page(
form: Form<SavePageForm>,
ctx: &State<Context>,
) -> Result<Json<page::Page>, String> {
match page::save_page(&form.url, &form.data, ctx).await {
Ok(p) => Ok(Json(p)),
Err(e) => {
error!("Failed to save page: {}", e);
Err(e.to_string())
},
}
}
/// Set up Meilisearch
async fn meilisearch_setup(rocket: Rocket<rocket::Build>) -> fairing::Result {
let ctx: &Context = rocket.state().unwrap();
let client = &ctx.client;
let config = &ctx.config;
if let Err(e) =
meilisearch::ensure_index(client, &config.meilisearch.index).await
{
error!("Failed to create Meilisearch index: {}", e);
Err(rocket)
} else {
Ok(rocket)
}
}
#[rocket::launch]
async fn rocket() -> _ {
tracing_subscriber::fmt()
.with_env_filter(tracing_subscriber::EnvFilter::from_default_env())
.with_writer(std::io::stderr)
.init();
let rocket = rocket::build();
let config: Config = rocket.figment().extract().unwrap();
let ctx = Context::init(config).unwrap();
rocket
.manage(ctx)
.mount("/", rocket::routes![save_page])
.attach(AdHoc::try_on_ignite("Meilisearch Setup", meilisearch_setup))
}

61
src/meilisearch.rs Normal file
View File

@@ -0,0 +1,61 @@
use meilisearch_sdk::client::Client;
use meilisearch_sdk::errors::{Error as MeilisearchError, ErrorCode};
use tracing::{debug, error, info};
use crate::config::Config;
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("Could not read token: {0}")]
Token(std::io::Error),
#[error("{0}")]
Meilisearch(#[from] MeilisearchError),
}
impl TryFrom<&Config> for Client {
type Error = Error;
fn try_from(config: &Config) -> Result<Self, Self::Error> {
let token = match &config.meilisearch.token {
Some(t) => Some(std::fs::read_to_string(t).map_err(Error::Token)?),
None => None,
};
Ok(Client::new(&config.meilisearch.url, token)?)
}
}
pub async fn ensure_index(
client: &Client,
name: &str,
) -> Result<(), MeilisearchError> {
match client.get_index(name).await {
Ok(_) => {
debug!("Meilisearch index '{}' already exists", name);
Ok(())
},
Err(MeilisearchError::Meilisearch(e))
if e.error_code == ErrorCode::IndexNotFound =>
{
info!("Creating Meilisearch index: {}", name);
if let Err(e) = create_index(client, name).await {
error!("Failed to create index: {}", e);
Err(e)
} else {
Ok(())
}
},
Err(e) => {
error!("Failed to check index: {}", e);
Err(e)
},
}
}
async fn create_index(
client: &Client,
index: &str,
) -> Result<(), meilisearch_sdk::errors::Error> {
let task = client.create_index(index, Some("id")).await?;
task.wait_for_completion(client, None, None).await?;
Ok(())
}

98
src/page.rs Normal file
View File

@@ -0,0 +1,98 @@
use chrono::{DateTime, Utc};
use html5ever::parse_document;
use html5ever::tendril::TendrilSink;
use markup5ever_rcdom::{Handle, NodeData, RcDom};
use meilisearch_sdk::errors::Error;
use rand::Rng;
use serde::Serialize;
use crate::Context;
static ID_CHARSET: &[u8] = b"0123456789abcdefghijklmnopqrstuvwxyz";
/// A saved page
#[derive(Clone, Serialize)]
pub struct Page {
/// Unique saved page ID
id: String,
/// Visit timestamp
timestamp: DateTime<Utc>,
/// Page URL
url: String,
/// Page title (extracted from HTML document)
title: Option<String>,
/// Page content (SingleFile HTML)
data: String,
}
/// Save the page
pub async fn save_page(
url: &str,
data: &str,
ctx: &Context,
) -> Result<Page, Error> {
let client = &ctx.client;
let index = client.get_index(&ctx.config.meilisearch.index).await?;
let doc = Page {
id: gen_id(),
timestamp: Utc::now(),
url: url.into(),
title: extract_title(data),
data: data.into(),
};
index.add_or_replace(&[doc.clone()], Some("id")).await?;
Ok(doc)
}
/// Generate a unique document ID
fn gen_id() -> String {
let mut rng = rand::rng();
(0..12)
.map(|_| ID_CHARSET[rng.random_range(..ID_CHARSET.len())] as char)
.collect()
}
/// Extracts the title from an HTML document string.
fn extract_title(html: &str) -> Option<String> {
let dom = parse_document(RcDom::default(), Default::default())
.from_utf8()
.read_from(&mut html.as_bytes())
.unwrap();
find_title(&dom.document)
}
/// Recursively searches for the <title> tag and returns its text content.
fn find_title(handle: &Handle) -> Option<String> {
match &handle.data {
NodeData::Element { name, .. } if name.local == *"title" => {
extract_text(handle)
},
NodeData::Document | NodeData::Element { .. } => {
for child in handle.children.borrow().iter() {
if let Some(title) = find_title(child) {
return Some(title);
}
}
None
},
_ => None,
}
}
/// Extracts concatenated text content from a node.
fn extract_text(handle: &Handle) -> Option<String> {
let mut text = String::new();
for child in handle.children.borrow().iter() {
if let NodeData::Text { contents } = &child.data {
text.push_str(&contents.borrow());
}
}
text = text.trim().into();
if text.is_empty() {
None
} else {
Some(text)
}
}