feat: ship blog platform admin and deploy stack

This commit is contained in:
2026-03-31 21:48:39 +08:00
parent a9a05aa105
commit 313f174fbc
210 changed files with 25476 additions and 5803 deletions

View File

@@ -1,12 +1,14 @@
use axum::http::HeaderMap;
use loco_rs::prelude::*;
use sea_orm::{ConnectionTrait, DatabaseBackend, DbBackend, FromQueryResult, Statement};
use serde::{Deserialize, Deserializer, Serialize};
use serde_json::Value;
use std::time::Instant;
use std::{collections::HashSet, time::Instant};
use crate::models::_entities::posts;
use crate::services::{analytics, content};
use crate::{
controllers::site_settings,
models::_entities::posts,
services::{abuse_guard, analytics, content},
};
fn deserialize_boolish_option<'de, D>(
deserializer: D,
@@ -26,6 +28,243 @@ where
.transpose()
}
fn normalize_text(value: &str) -> String {
value
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_ascii_lowercase()
}
fn tokenize(value: &str) -> Vec<String> {
value
.split(|ch: char| !ch.is_alphanumeric() && ch != '-' && ch != '_')
.map(normalize_text)
.filter(|item| !item.is_empty())
.collect()
}
fn levenshtein_distance(left: &str, right: &str) -> usize {
if left == right {
return 0;
}
if left.is_empty() {
return right.chars().count();
}
if right.is_empty() {
return left.chars().count();
}
let right_chars = right.chars().collect::<Vec<_>>();
let mut prev = (0..=right_chars.len()).collect::<Vec<_>>();
for (i, left_ch) in left.chars().enumerate() {
let mut curr = vec![i + 1; right_chars.len() + 1];
for (j, right_ch) in right_chars.iter().enumerate() {
let cost = usize::from(left_ch != *right_ch);
curr[j + 1] = (curr[j] + 1)
.min(prev[j + 1] + 1)
.min(prev[j] + cost);
}
prev = curr;
}
prev[right_chars.len()]
}
fn parse_synonym_groups(value: &Option<Value>) -> Vec<Vec<String>> {
value
.as_ref()
.and_then(Value::as_array)
.cloned()
.unwrap_or_default()
.into_iter()
.filter_map(|item| item.as_str().map(ToString::to_string))
.map(|item| {
let normalized = item.replace("=>", ",").replace('|', ",");
normalized
.split([',', ''])
.map(normalize_text)
.filter(|token| !token.is_empty())
.collect::<Vec<_>>()
})
.filter(|group| !group.is_empty())
.collect()
}
fn expand_search_terms(query: &str, synonym_groups: &[Vec<String>]) -> Vec<String> {
let normalized_query = normalize_text(query);
let query_tokens = tokenize(query);
let mut expanded = Vec::new();
let mut seen = HashSet::new();
if !normalized_query.is_empty() && seen.insert(normalized_query.clone()) {
expanded.push(normalized_query.clone());
}
for token in &query_tokens {
if seen.insert(token.clone()) {
expanded.push(token.clone());
}
}
for group in synonym_groups {
let matched = group.iter().any(|item| {
*item == normalized_query
|| query_tokens.iter().any(|token| token == item)
|| normalized_query.contains(item)
});
if matched {
for token in group {
if seen.insert(token.clone()) {
expanded.push(token.clone());
}
}
}
}
expanded
}
fn candidate_terms(posts: &[posts::Model]) -> Vec<String> {
let mut seen = HashSet::new();
let mut candidates = Vec::new();
for post in posts {
for source in [
post.title.as_deref().unwrap_or_default(),
post.category.as_deref().unwrap_or_default(),
&post.slug,
] {
for token in tokenize(source) {
if token.len() >= 3 && seen.insert(token.clone()) {
candidates.push(token);
}
}
}
if let Some(tags) = post.tags.as_ref().and_then(Value::as_array) {
for token in tags.iter().filter_map(Value::as_str).flat_map(tokenize) {
if token.len() >= 2 && seen.insert(token.clone()) {
candidates.push(token);
}
}
}
}
candidates
}
fn find_spelling_fallback(query: &str, posts: &[posts::Model], synonym_groups: &[Vec<String>]) -> Vec<String> {
let primary_token = tokenize(query).into_iter().next().unwrap_or_default();
if primary_token.len() < 3 {
return Vec::new();
}
let mut nearest = candidate_terms(posts)
.into_iter()
.map(|candidate| {
let distance = levenshtein_distance(&primary_token, &candidate);
(candidate, distance)
})
.filter(|(_, distance)| *distance <= 2)
.collect::<Vec<_>>();
nearest.sort_by(|left, right| left.1.cmp(&right.1).then_with(|| left.0.cmp(&right.0)));
nearest
.into_iter()
.take(3)
.flat_map(|(candidate, _)| expand_search_terms(&candidate, synonym_groups))
.collect()
}
fn post_has_tag(post: &posts::Model, wanted_tag: &str) -> bool {
let wanted = normalize_text(wanted_tag);
post.tags
.as_ref()
.and_then(Value::as_array)
.map(|tags| {
tags.iter()
.filter_map(Value::as_str)
.map(normalize_text)
.any(|tag| tag == wanted)
})
.unwrap_or(false)
}
fn score_post(post: &posts::Model, query: &str, terms: &[String]) -> f64 {
let normalized_query = normalize_text(query);
let title = normalize_text(post.title.as_deref().unwrap_or_default());
let description = normalize_text(post.description.as_deref().unwrap_or_default());
let content_text = normalize_text(post.content.as_deref().unwrap_or_default());
let category = normalize_text(post.category.as_deref().unwrap_or_default());
let slug = normalize_text(&post.slug);
let tags = post
.tags
.as_ref()
.and_then(Value::as_array)
.cloned()
.unwrap_or_default()
.into_iter()
.filter_map(|item| item.as_str().map(normalize_text))
.collect::<Vec<_>>();
let mut score = 0.0;
if !normalized_query.is_empty() {
if title.contains(&normalized_query) {
score += 6.0;
}
if description.contains(&normalized_query) {
score += 4.0;
}
if slug.contains(&normalized_query) {
score += 4.0;
}
if category.contains(&normalized_query) {
score += 3.0;
}
if tags.iter().any(|tag| tag.contains(&normalized_query)) {
score += 4.0;
}
if content_text.contains(&normalized_query) {
score += 2.0;
}
}
for term in terms {
if term.is_empty() {
continue;
}
if title.contains(term) {
score += 3.5;
}
if description.contains(term) {
score += 2.2;
}
if slug.contains(term) {
score += 2.0;
}
if category.contains(term) {
score += 1.8;
}
if tags.iter().any(|tag| tag == term) {
score += 2.5;
} else if tags.iter().any(|tag| tag.contains(term)) {
score += 1.5;
}
if content_text.contains(term) {
score += 0.8;
}
}
score
}
fn is_preview_search(query: &SearchQuery, headers: &HeaderMap) -> bool {
query.preview.unwrap_or(false)
|| headers
@@ -39,11 +278,15 @@ fn is_preview_search(query: &SearchQuery, headers: &HeaderMap) -> bool {
pub struct SearchQuery {
pub q: Option<String>,
pub limit: Option<u64>,
pub category: Option<String>,
pub tag: Option<String>,
#[serde(alias = "type")]
pub post_type: Option<String>,
#[serde(default, deserialize_with = "deserialize_boolish_option")]
pub preview: Option<bool>,
}
#[derive(Clone, Debug, Serialize, FromQueryResult)]
#[derive(Clone, Debug, Serialize)]
pub struct SearchResult {
pub id: i32,
pub title: Option<String>,
@@ -59,131 +302,6 @@ pub struct SearchResult {
pub rank: f64,
}
fn search_sql() -> &'static str {
r#"
SELECT
p.id,
p.title,
p.slug,
p.description,
p.content,
p.category,
p.tags,
p.post_type,
p.pinned,
p.created_at,
p.updated_at,
ts_rank_cd(
setweight(to_tsvector('simple', coalesce(p.title, '')), 'A') ||
setweight(to_tsvector('simple', coalesce(p.description, '')), 'B') ||
setweight(to_tsvector('simple', coalesce(p.category, '')), 'C') ||
setweight(to_tsvector('simple', coalesce(p.tags::text, '')), 'C') ||
setweight(to_tsvector('simple', coalesce(p.content, '')), 'D'),
plainto_tsquery('simple', $1)
)::float8 AS rank
FROM posts p
WHERE (
setweight(to_tsvector('simple', coalesce(p.title, '')), 'A') ||
setweight(to_tsvector('simple', coalesce(p.description, '')), 'B') ||
setweight(to_tsvector('simple', coalesce(p.category, '')), 'C') ||
setweight(to_tsvector('simple', coalesce(p.tags::text, '')), 'C') ||
setweight(to_tsvector('simple', coalesce(p.content, '')), 'D')
) @@ plainto_tsquery('simple', $1)
ORDER BY rank DESC, p.created_at DESC
LIMIT $2
"#
}
fn app_level_rank(post: &posts::Model, wanted: &str) -> f64 {
let wanted_lower = wanted.to_lowercase();
let mut rank = 0.0;
if post
.title
.as_deref()
.unwrap_or_default()
.to_lowercase()
.contains(&wanted_lower)
{
rank += 4.0;
}
if post
.description
.as_deref()
.unwrap_or_default()
.to_lowercase()
.contains(&wanted_lower)
{
rank += 2.5;
}
if post
.content
.as_deref()
.unwrap_or_default()
.to_lowercase()
.contains(&wanted_lower)
{
rank += 1.0;
}
if post
.category
.as_deref()
.unwrap_or_default()
.to_lowercase()
.contains(&wanted_lower)
{
rank += 1.5;
}
if post
.tags
.as_ref()
.and_then(Value::as_array)
.map(|tags| {
tags.iter()
.filter_map(Value::as_str)
.any(|tag| tag.to_lowercase().contains(&wanted_lower))
})
.unwrap_or(false)
{
rank += 2.0;
}
rank
}
async fn fallback_search(ctx: &AppContext, q: &str, limit: u64) -> Result<Vec<SearchResult>> {
let mut results = posts::Entity::find().all(&ctx.db).await?;
results.sort_by(|left, right| right.created_at.cmp(&left.created_at));
Ok(results
.into_iter()
.map(|post| {
let rank = app_level_rank(&post, q);
(post, rank)
})
.filter(|(_, rank)| *rank > 0.0)
.take(limit as usize)
.map(|(post, rank)| SearchResult {
id: post.id,
title: post.title,
slug: post.slug,
description: post.description,
content: post.content,
category: post.category,
tags: post.tags,
post_type: post.post_type,
pinned: post.pinned,
created_at: post.created_at.into(),
updated_at: post.updated_at.into(),
rank,
})
.collect())
}
#[debug_handler]
pub async fn search(
Query(query): Query<SearchQuery>,
@@ -199,26 +317,107 @@ pub async fn search(
return format::json(Vec::<SearchResult>::new());
}
let limit = query.limit.unwrap_or(20).clamp(1, 100);
if !preview_search {
abuse_guard::enforce_public_scope(
"search",
abuse_guard::detect_client_ip(&headers).as_deref(),
Some(&q),
)?;
}
let results = if ctx.db.get_database_backend() == DatabaseBackend::Postgres {
let statement = Statement::from_sql_and_values(
DbBackend::Postgres,
search_sql(),
[q.clone().into(), (limit as i64).into()],
);
let limit = query.limit.unwrap_or(20).clamp(1, 100) as usize;
let settings = site_settings::load_current(&ctx).await.ok();
let synonym_groups = settings
.as_ref()
.map(|item| parse_synonym_groups(&item.search_synonyms))
.unwrap_or_default();
match SearchResult::find_by_statement(statement)
.all(&ctx.db)
.await
{
Ok(rows) if !rows.is_empty() => rows,
Ok(_) => fallback_search(&ctx, &q, limit).await?,
Err(_) => fallback_search(&ctx, &q, limit).await?,
let mut all_posts = posts::Entity::find()
.all(&ctx.db)
.await?
.into_iter()
.filter(|post| {
preview_search
|| content::is_post_listed_publicly(post, chrono::Utc::now().fixed_offset())
})
.collect::<Vec<_>>();
if let Some(category) = query.category.as_deref().map(str::trim).filter(|value| !value.is_empty()) {
all_posts.retain(|post| {
post.category
.as_deref()
.map(|value| value.eq_ignore_ascii_case(category))
.unwrap_or(false)
});
}
if let Some(tag) = query.tag.as_deref().map(str::trim).filter(|value| !value.is_empty()) {
all_posts.retain(|post| post_has_tag(post, tag));
}
if let Some(post_type) = query.post_type.as_deref().map(str::trim).filter(|value| !value.is_empty()) {
all_posts.retain(|post| {
post.post_type
.as_deref()
.map(|value| value.eq_ignore_ascii_case(post_type))
.unwrap_or(false)
});
}
let mut expanded_terms = expand_search_terms(&q, &synonym_groups);
let mut results = all_posts
.iter()
.map(|post| (post, score_post(post, &q, &expanded_terms)))
.filter(|(_, rank)| *rank > 0.0)
.map(|(post, rank)| SearchResult {
id: post.id,
title: post.title.clone(),
slug: post.slug.clone(),
description: post.description.clone(),
content: post.content.clone(),
category: post.category.clone(),
tags: post.tags.clone(),
post_type: post.post_type.clone(),
pinned: post.pinned,
created_at: post.created_at.into(),
updated_at: post.updated_at.into(),
rank,
})
.collect::<Vec<_>>();
if results.is_empty() {
expanded_terms = find_spelling_fallback(&q, &all_posts, &synonym_groups);
if !expanded_terms.is_empty() {
results = all_posts
.iter()
.map(|post| (post, score_post(post, &q, &expanded_terms)))
.filter(|(_, rank)| *rank > 0.0)
.map(|(post, rank)| SearchResult {
id: post.id,
title: post.title.clone(),
slug: post.slug.clone(),
description: post.description.clone(),
content: post.content.clone(),
category: post.category.clone(),
tags: post.tags.clone(),
post_type: post.post_type.clone(),
pinned: post.pinned,
created_at: post.created_at.into(),
updated_at: post.updated_at.into(),
rank,
})
.collect::<Vec<_>>();
}
} else {
fallback_search(&ctx, &q, limit).await?
};
}
results.sort_by(|left, right| {
right
.rank
.partial_cmp(&left.rank)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| right.created_at.cmp(&left.created_at))
});
results.truncate(limit);
if !preview_search {
analytics::record_search_event(