feat: ship blog platform admin and deploy stack

2026-03-31 21:48:39 +08:00
parent a9a05aa105
commit 313f174fbc
210 changed files with 25476 additions and 5803 deletions
--- a/backend/src/controllers/search.rs
+++ b/backend/src/controllers/search.rs
@@ -1,12 +1,14 @@
 use axum::http::HeaderMap;
 use loco_rs::prelude::*;
-use sea_orm::{ConnectionTrait, DatabaseBackend, DbBackend, FromQueryResult, Statement};
 use serde::{Deserialize, Deserializer, Serialize};
 use serde_json::Value;
-use std::time::Instant;
+use std::{collections::HashSet, time::Instant};

-use crate::models::_entities::posts;
-use crate::services::{analytics, content};
+use crate::{
+    controllers::site_settings,
+    models::_entities::posts,
+    services::{abuse_guard, analytics, content},
+};

 fn deserialize_boolish_option<'de, D>(
    deserializer: D,
@@ -26,6 +28,243 @@ where
    .transpose()
 }

+fn normalize_text(value: &str) -> String {
+    value
+        .split_whitespace()
+        .collect::<Vec<_>>()
+        .join(" ")
+        .trim()
+        .to_ascii_lowercase()
+}
+
+fn tokenize(value: &str) -> Vec<String> {
+    value
+        .split(|ch: char| !ch.is_alphanumeric() && ch != '-' && ch != '_')
+        .map(normalize_text)
+        .filter(|item| !item.is_empty())
+        .collect()
+}
+
+fn levenshtein_distance(left: &str, right: &str) -> usize {
+    if left == right {
+        return 0;
+    }
+    if left.is_empty() {
+        return right.chars().count();
+    }
+    if right.is_empty() {
+        return left.chars().count();
+    }
+
+    let right_chars = right.chars().collect::<Vec<_>>();
+    let mut prev = (0..=right_chars.len()).collect::<Vec<_>>();
+
+    for (i, left_ch) in left.chars().enumerate() {
+        let mut curr = vec![i + 1; right_chars.len() + 1];
+        for (j, right_ch) in right_chars.iter().enumerate() {
+            let cost = usize::from(left_ch != *right_ch);
+            curr[j + 1] = (curr[j] + 1)
+                .min(prev[j + 1] + 1)
+                .min(prev[j] + cost);
+        }
+        prev = curr;
+    }
+
+    prev[right_chars.len()]
+}
+
+fn parse_synonym_groups(value: &Option<Value>) -> Vec<Vec<String>> {
+    value
+        .as_ref()
+        .and_then(Value::as_array)
+        .cloned()
+        .unwrap_or_default()
+        .into_iter()
+        .filter_map(|item| item.as_str().map(ToString::to_string))
+        .map(|item| {
+            let normalized = item.replace("=>", ",").replace('|', ",");
+            normalized
+                .split([',', '，'])
+                .map(normalize_text)
+                .filter(|token| !token.is_empty())
+                .collect::<Vec<_>>()
+        })
+        .filter(|group| !group.is_empty())
+        .collect()
+}
+
+fn expand_search_terms(query: &str, synonym_groups: &[Vec<String>]) -> Vec<String> {
+    let normalized_query = normalize_text(query);
+    let query_tokens = tokenize(query);
+    let mut expanded = Vec::new();
+    let mut seen = HashSet::new();
+
+    if !normalized_query.is_empty() && seen.insert(normalized_query.clone()) {
+        expanded.push(normalized_query.clone());
+    }
+
+    for token in &query_tokens {
+        if seen.insert(token.clone()) {
+            expanded.push(token.clone());
+        }
+    }
+
+    for group in synonym_groups {
+        let matched = group.iter().any(|item| {
+            *item == normalized_query
+                || query_tokens.iter().any(|token| token == item)
+                || normalized_query.contains(item)
+        });
+
+        if matched {
+            for token in group {
+                if seen.insert(token.clone()) {
+                    expanded.push(token.clone());
+                }
+            }
+        }
+    }
+
+    expanded
+}
+
+fn candidate_terms(posts: &[posts::Model]) -> Vec<String> {
+    let mut seen = HashSet::new();
+    let mut candidates = Vec::new();
+
+    for post in posts {
+        for source in [
+            post.title.as_deref().unwrap_or_default(),
+            post.category.as_deref().unwrap_or_default(),
+            &post.slug,
+        ] {
+            for token in tokenize(source) {
+                if token.len() >= 3 && seen.insert(token.clone()) {
+                    candidates.push(token);
+                }
+            }
+        }
+
+        if let Some(tags) = post.tags.as_ref().and_then(Value::as_array) {
+            for token in tags.iter().filter_map(Value::as_str).flat_map(tokenize) {
+                if token.len() >= 2 && seen.insert(token.clone()) {
+                    candidates.push(token);
+                }
+            }
+        }
+    }
+
+    candidates
+}
+
+fn find_spelling_fallback(query: &str, posts: &[posts::Model], synonym_groups: &[Vec<String>]) -> Vec<String> {
+    let primary_token = tokenize(query).into_iter().next().unwrap_or_default();
+    if primary_token.len() < 3 {
+        return Vec::new();
+    }
+
+    let mut nearest = candidate_terms(posts)
+        .into_iter()
+        .map(|candidate| {
+            let distance = levenshtein_distance(&primary_token, &candidate);
+            (candidate, distance)
+        })
+        .filter(|(_, distance)| *distance <= 2)
+        .collect::<Vec<_>>();
+    nearest.sort_by(|left, right| left.1.cmp(&right.1).then_with(|| left.0.cmp(&right.0)));
+
+    nearest
+        .into_iter()
+        .take(3)
+        .flat_map(|(candidate, _)| expand_search_terms(&candidate, synonym_groups))
+        .collect()
+}
+
+fn post_has_tag(post: &posts::Model, wanted_tag: &str) -> bool {
+    let wanted = normalize_text(wanted_tag);
+
+    post.tags
+        .as_ref()
+        .and_then(Value::as_array)
+        .map(|tags| {
+            tags.iter()
+                .filter_map(Value::as_str)
+                .map(normalize_text)
+                .any(|tag| tag == wanted)
+        })
+        .unwrap_or(false)
+}
+
+fn score_post(post: &posts::Model, query: &str, terms: &[String]) -> f64 {
+    let normalized_query = normalize_text(query);
+    let title = normalize_text(post.title.as_deref().unwrap_or_default());
+    let description = normalize_text(post.description.as_deref().unwrap_or_default());
+    let content_text = normalize_text(post.content.as_deref().unwrap_or_default());
+    let category = normalize_text(post.category.as_deref().unwrap_or_default());
+    let slug = normalize_text(&post.slug);
+    let tags = post
+        .tags
+        .as_ref()
+        .and_then(Value::as_array)
+        .cloned()
+        .unwrap_or_default()
+        .into_iter()
+        .filter_map(|item| item.as_str().map(normalize_text))
+        .collect::<Vec<_>>();
+
+    let mut score = 0.0;
+
+    if !normalized_query.is_empty() {
+        if title.contains(&normalized_query) {
+            score += 6.0;
+        }
+        if description.contains(&normalized_query) {
+            score += 4.0;
+        }
+        if slug.contains(&normalized_query) {
+            score += 4.0;
+        }
+        if category.contains(&normalized_query) {
+            score += 3.0;
+        }
+        if tags.iter().any(|tag| tag.contains(&normalized_query)) {
+            score += 4.0;
+        }
+        if content_text.contains(&normalized_query) {
+            score += 2.0;
+        }
+    }
+
+    for term in terms {
+        if term.is_empty() {
+            continue;
+        }
+
+        if title.contains(term) {
+            score += 3.5;
+        }
+        if description.contains(term) {
+            score += 2.2;
+        }
+        if slug.contains(term) {
+            score += 2.0;
+        }
+        if category.contains(term) {
+            score += 1.8;
+        }
+        if tags.iter().any(|tag| tag == term) {
+            score += 2.5;
+        } else if tags.iter().any(|tag| tag.contains(term)) {
+            score += 1.5;
+        }
+        if content_text.contains(term) {
+            score += 0.8;
+        }
+    }
+
+    score
+}
+
 fn is_preview_search(query: &SearchQuery, headers: &HeaderMap) -> bool {
    query.preview.unwrap_or(false)
        || headers
@@ -39,11 +278,15 @@ fn is_preview_search(query: &SearchQuery, headers: &HeaderMap) -> bool {
 pub struct SearchQuery {
    pub q: Option<String>,
    pub limit: Option<u64>,
+    pub category: Option<String>,
+    pub tag: Option<String>,
+    #[serde(alias = "type")]
+    pub post_type: Option<String>,
    #[serde(default, deserialize_with = "deserialize_boolish_option")]
    pub preview: Option<bool>,
 }

-#[derive(Clone, Debug, Serialize, FromQueryResult)]
+#[derive(Clone, Debug, Serialize)]
 pub struct SearchResult {
    pub id: i32,
    pub title: Option<String>,
@@ -59,131 +302,6 @@ pub struct SearchResult {
    pub rank: f64,
 }

-fn search_sql() -> &'static str {
-    r#"
-    SELECT
-        p.id,
-        p.title,
-        p.slug,
-        p.description,
-        p.content,
-        p.category,
-        p.tags,
-        p.post_type,
-        p.pinned,
-        p.created_at,
-        p.updated_at,
-        ts_rank_cd(
-            setweight(to_tsvector('simple', coalesce(p.title, '')), 'A') ||
-            setweight(to_tsvector('simple', coalesce(p.description, '')), 'B') ||
-            setweight(to_tsvector('simple', coalesce(p.category, '')), 'C') ||
-            setweight(to_tsvector('simple', coalesce(p.tags::text, '')), 'C') ||
-            setweight(to_tsvector('simple', coalesce(p.content, '')), 'D'),
-            plainto_tsquery('simple', $1)
-        )::float8 AS rank
-    FROM posts p
-    WHERE (
-        setweight(to_tsvector('simple', coalesce(p.title, '')), 'A') ||
-        setweight(to_tsvector('simple', coalesce(p.description, '')), 'B') ||
-        setweight(to_tsvector('simple', coalesce(p.category, '')), 'C') ||
-        setweight(to_tsvector('simple', coalesce(p.tags::text, '')), 'C') ||
-        setweight(to_tsvector('simple', coalesce(p.content, '')), 'D')
-    ) @@ plainto_tsquery('simple', $1)
-    ORDER BY rank DESC, p.created_at DESC
-    LIMIT $2
-    "#
-}
-
-fn app_level_rank(post: &posts::Model, wanted: &str) -> f64 {
-    let wanted_lower = wanted.to_lowercase();
-    let mut rank = 0.0;
-
-    if post
-        .title
-        .as_deref()
-        .unwrap_or_default()
-        .to_lowercase()
-        .contains(&wanted_lower)
-    {
-        rank += 4.0;
-    }
-
-    if post
-        .description
-        .as_deref()
-        .unwrap_or_default()
-        .to_lowercase()
-        .contains(&wanted_lower)
-    {
-        rank += 2.5;
-    }
-
-    if post
-        .content
-        .as_deref()
-        .unwrap_or_default()
-        .to_lowercase()
-        .contains(&wanted_lower)
-    {
-        rank += 1.0;
-    }
-
-    if post
-        .category
-        .as_deref()
-        .unwrap_or_default()
-        .to_lowercase()
-        .contains(&wanted_lower)
-    {
-        rank += 1.5;
-    }
-
-    if post
-        .tags
-        .as_ref()
-        .and_then(Value::as_array)
-        .map(|tags| {
-            tags.iter()
-                .filter_map(Value::as_str)
-                .any(|tag| tag.to_lowercase().contains(&wanted_lower))
-        })
-        .unwrap_or(false)
-    {
-        rank += 2.0;
-    }
-
-    rank
-}
-
-async fn fallback_search(ctx: &AppContext, q: &str, limit: u64) -> Result<Vec<SearchResult>> {
-    let mut results = posts::Entity::find().all(&ctx.db).await?;
-    results.sort_by(|left, right| right.created_at.cmp(&left.created_at));
-
-    Ok(results
-        .into_iter()
-        .map(|post| {
-            let rank = app_level_rank(&post, q);
-            (post, rank)
-        })
-        .filter(|(_, rank)| *rank > 0.0)
-        .take(limit as usize)
-        .map(|(post, rank)| SearchResult {
-            id: post.id,
-            title: post.title,
-            slug: post.slug,
-            description: post.description,
-            content: post.content,
-            category: post.category,
-            tags: post.tags,
-            post_type: post.post_type,
-            pinned: post.pinned,
-            created_at: post.created_at.into(),
-            updated_at: post.updated_at.into(),
-            rank,
-        })
-        .collect())
-}
-
 #[debug_handler]
 pub async fn search(
    Query(query): Query<SearchQuery>,
@@ -199,26 +317,107 @@ pub async fn search(
        return format::json(Vec::<SearchResult>::new());
    }

-    let limit = query.limit.unwrap_or(20).clamp(1, 100);
+    if !preview_search {
+        abuse_guard::enforce_public_scope(
+            "search",
+            abuse_guard::detect_client_ip(&headers).as_deref(),
+            Some(&q),
+        )?;
+    }

-    let results = if ctx.db.get_database_backend() == DatabaseBackend::Postgres {
-        let statement = Statement::from_sql_and_values(
-            DbBackend::Postgres,
-            search_sql(),
-            [q.clone().into(), (limit as i64).into()],
-        );
+    let limit = query.limit.unwrap_or(20).clamp(1, 100) as usize;
+    let settings = site_settings::load_current(&ctx).await.ok();
+    let synonym_groups = settings
+        .as_ref()
+        .map(|item| parse_synonym_groups(&item.search_synonyms))
+        .unwrap_or_default();

-        match SearchResult::find_by_statement(statement)
-            .all(&ctx.db)
-            .await
-        {
-            Ok(rows) if !rows.is_empty() => rows,
-            Ok(_) => fallback_search(&ctx, &q, limit).await?,
-            Err(_) => fallback_search(&ctx, &q, limit).await?,
+    let mut all_posts = posts::Entity::find()
+        .all(&ctx.db)
+        .await?
+        .into_iter()
+        .filter(|post| {
+            preview_search
+                || content::is_post_listed_publicly(post, chrono::Utc::now().fixed_offset())
+        })
+        .collect::<Vec<_>>();
+
+    if let Some(category) = query.category.as_deref().map(str::trim).filter(|value| !value.is_empty()) {
+        all_posts.retain(|post| {
+            post.category
+                .as_deref()
+                .map(|value| value.eq_ignore_ascii_case(category))
+                .unwrap_or(false)
+        });
+    }
+
+    if let Some(tag) = query.tag.as_deref().map(str::trim).filter(|value| !value.is_empty()) {
+        all_posts.retain(|post| post_has_tag(post, tag));
+    }
+
+    if let Some(post_type) = query.post_type.as_deref().map(str::trim).filter(|value| !value.is_empty()) {
+        all_posts.retain(|post| {
+            post.post_type
+                .as_deref()
+                .map(|value| value.eq_ignore_ascii_case(post_type))
+                .unwrap_or(false)
+        });
+    }
+
+    let mut expanded_terms = expand_search_terms(&q, &synonym_groups);
+    let mut results = all_posts
+        .iter()
+        .map(|post| (post, score_post(post, &q, &expanded_terms)))
+        .filter(|(_, rank)| *rank > 0.0)
+        .map(|(post, rank)| SearchResult {
+            id: post.id,
+            title: post.title.clone(),
+            slug: post.slug.clone(),
+            description: post.description.clone(),
+            content: post.content.clone(),
+            category: post.category.clone(),
+            tags: post.tags.clone(),
+            post_type: post.post_type.clone(),
+            pinned: post.pinned,
+            created_at: post.created_at.into(),
+            updated_at: post.updated_at.into(),
+            rank,
+        })
+        .collect::<Vec<_>>();
+
+    if results.is_empty() {
+        expanded_terms = find_spelling_fallback(&q, &all_posts, &synonym_groups);
+        if !expanded_terms.is_empty() {
+            results = all_posts
+                .iter()
+                .map(|post| (post, score_post(post, &q, &expanded_terms)))
+                .filter(|(_, rank)| *rank > 0.0)
+                .map(|(post, rank)| SearchResult {
+                    id: post.id,
+                    title: post.title.clone(),
+                    slug: post.slug.clone(),
+                    description: post.description.clone(),
+                    content: post.content.clone(),
+                    category: post.category.clone(),
+                    tags: post.tags.clone(),
+                    post_type: post.post_type.clone(),
+                    pinned: post.pinned,
+                    created_at: post.created_at.into(),
+                    updated_at: post.updated_at.into(),
+                    rank,
+                })
+                .collect::<Vec<_>>();
        }
-    } else {
-        fallback_search(&ctx, &q, limit).await?
-    };
+    }
+
+    results.sort_by(|left, right| {
+        right
+            .rank
+            .partial_cmp(&left.rank)
+            .unwrap_or(std::cmp::Ordering::Equal)
+            .then_with(|| right.created_at.cmp(&left.created_at))
+    });
+    results.truncate(limit);

    if !preview_search {
        analytics::record_search_event(