Files
termi-blog/backend/src/controllers/search.rs
limitcool 7de4ddc3ee
All checks were successful
docker-images / build-and-push (admin, admin, termi-astro-admin, admin/Dockerfile) (push) Successful in 43s
docker-images / build-and-push (backend, backend, termi-astro-backend, backend/Dockerfile) (push) Successful in 25m9s
docker-images / build-and-push (frontend, frontend, termi-astro-frontend, frontend/Dockerfile) (push) Successful in 51s
feat: refresh content workflow and verification settings
2026-04-01 18:47:17 +08:00

617 lines
17 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
use axum::http::HeaderMap;
use loco_rs::prelude::*;
use serde::{Deserialize, Deserializer, Serialize};
use serde_json::Value;
use std::{collections::HashSet, time::Instant};
use crate::{
controllers::site_settings,
models::_entities::posts,
services::{abuse_guard, analytics, content},
};
fn deserialize_boolish_option<'de, D>(
deserializer: D,
) -> std::result::Result<Option<bool>, D::Error>
where
D: Deserializer<'de>,
{
let raw = Option::<String>::deserialize(deserializer)?;
raw.map(|value| match value.trim().to_ascii_lowercase().as_str() {
"1" | "true" | "yes" | "on" => Ok(true),
"0" | "false" | "no" | "off" => Ok(false),
other => Err(serde::de::Error::custom(format!(
"invalid boolean value `{other}`"
))),
})
.transpose()
}
fn normalize_text(value: &str) -> String {
value
.split_whitespace()
.collect::<Vec<_>>()
.join(" ")
.trim()
.to_ascii_lowercase()
}
fn tokenize(value: &str) -> Vec<String> {
value
.split(|ch: char| !ch.is_alphanumeric() && ch != '-' && ch != '_')
.map(normalize_text)
.filter(|item| !item.is_empty())
.collect()
}
fn levenshtein_distance(left: &str, right: &str) -> usize {
if left == right {
return 0;
}
if left.is_empty() {
return right.chars().count();
}
if right.is_empty() {
return left.chars().count();
}
let right_chars = right.chars().collect::<Vec<_>>();
let mut prev = (0..=right_chars.len()).collect::<Vec<_>>();
for (i, left_ch) in left.chars().enumerate() {
let mut curr = vec![i + 1; right_chars.len() + 1];
for (j, right_ch) in right_chars.iter().enumerate() {
let cost = usize::from(left_ch != *right_ch);
curr[j + 1] = (curr[j] + 1).min(prev[j + 1] + 1).min(prev[j] + cost);
}
prev = curr;
}
prev[right_chars.len()]
}
fn parse_synonym_groups(value: &Option<Value>) -> Vec<Vec<String>> {
value
.as_ref()
.and_then(Value::as_array)
.cloned()
.unwrap_or_default()
.into_iter()
.filter_map(|item| item.as_str().map(ToString::to_string))
.map(|item| {
let normalized = item.replace("=>", ",").replace('|', ",");
normalized
.split([',', ''])
.map(normalize_text)
.filter(|token| !token.is_empty())
.collect::<Vec<_>>()
})
.filter(|group| !group.is_empty())
.collect()
}
fn expand_search_terms(query: &str, synonym_groups: &[Vec<String>]) -> Vec<String> {
let normalized_query = normalize_text(query);
let query_tokens = tokenize(query);
let mut expanded = Vec::new();
let mut seen = HashSet::new();
if !normalized_query.is_empty() && seen.insert(normalized_query.clone()) {
expanded.push(normalized_query.clone());
}
for token in &query_tokens {
if seen.insert(token.clone()) {
expanded.push(token.clone());
}
}
for group in synonym_groups {
let matched = group.iter().any(|item| {
*item == normalized_query
|| query_tokens.iter().any(|token| token == item)
|| normalized_query.contains(item)
});
if matched {
for token in group {
if seen.insert(token.clone()) {
expanded.push(token.clone());
}
}
}
}
expanded
}
fn candidate_terms(posts: &[posts::Model]) -> Vec<String> {
let mut seen = HashSet::new();
let mut candidates = Vec::new();
for post in posts {
for source in [
post.title.as_deref().unwrap_or_default(),
post.category.as_deref().unwrap_or_default(),
&post.slug,
] {
for token in tokenize(source) {
if token.len() >= 3 && seen.insert(token.clone()) {
candidates.push(token);
}
}
}
if let Some(tags) = post.tags.as_ref().and_then(Value::as_array) {
for token in tags.iter().filter_map(Value::as_str).flat_map(tokenize) {
if token.len() >= 2 && seen.insert(token.clone()) {
candidates.push(token);
}
}
}
}
candidates
}
fn find_spelling_fallback(
query: &str,
posts: &[posts::Model],
synonym_groups: &[Vec<String>],
) -> Vec<String> {
let primary_token = tokenize(query).into_iter().next().unwrap_or_default();
if primary_token.len() < 3 {
return Vec::new();
}
let mut nearest = candidate_terms(posts)
.into_iter()
.map(|candidate| {
let distance = levenshtein_distance(&primary_token, &candidate);
(candidate, distance)
})
.filter(|(_, distance)| *distance <= 2)
.collect::<Vec<_>>();
nearest.sort_by(|left, right| left.1.cmp(&right.1).then_with(|| left.0.cmp(&right.0)));
nearest
.into_iter()
.take(3)
.flat_map(|(candidate, _)| expand_search_terms(&candidate, synonym_groups))
.collect()
}
fn post_has_tag(post: &posts::Model, wanted_tag: &str) -> bool {
let wanted = normalize_text(wanted_tag);
post.tags
.as_ref()
.and_then(Value::as_array)
.map(|tags| {
tags.iter()
.filter_map(Value::as_str)
.map(normalize_text)
.any(|tag| tag == wanted)
})
.unwrap_or(false)
}
fn score_post(post: &posts::Model, query: &str, terms: &[String]) -> f64 {
let normalized_query = normalize_text(query);
let title = normalize_text(post.title.as_deref().unwrap_or_default());
let description = normalize_text(post.description.as_deref().unwrap_or_default());
let content_text = normalize_text(post.content.as_deref().unwrap_or_default());
let category = normalize_text(post.category.as_deref().unwrap_or_default());
let slug = normalize_text(&post.slug);
let tags = post
.tags
.as_ref()
.and_then(Value::as_array)
.cloned()
.unwrap_or_default()
.into_iter()
.filter_map(|item| item.as_str().map(normalize_text))
.collect::<Vec<_>>();
let mut score = 0.0;
if !normalized_query.is_empty() {
if title.contains(&normalized_query) {
score += 6.0;
}
if description.contains(&normalized_query) {
score += 4.0;
}
if slug.contains(&normalized_query) {
score += 4.0;
}
if category.contains(&normalized_query) {
score += 3.0;
}
if tags.iter().any(|tag| tag.contains(&normalized_query)) {
score += 4.0;
}
if content_text.contains(&normalized_query) {
score += 2.0;
}
}
for term in terms {
if term.is_empty() {
continue;
}
if title.contains(term) {
score += 3.5;
}
if description.contains(term) {
score += 2.2;
}
if slug.contains(term) {
score += 2.0;
}
if category.contains(term) {
score += 1.8;
}
if tags.iter().any(|tag| tag == term) {
score += 2.5;
} else if tags.iter().any(|tag| tag.contains(term)) {
score += 1.5;
}
if content_text.contains(term) {
score += 0.8;
}
}
score
}
fn is_preview_search(query: &SearchQuery, headers: &HeaderMap) -> bool {
query.preview.unwrap_or(false)
|| headers
.get("x-termi-search-mode")
.and_then(|value| value.to_str().ok())
.map(|value| value.eq_ignore_ascii_case("preview"))
.unwrap_or(false)
}
fn normalize_search_sort_by(value: Option<&str>) -> String {
match value
.map(str::trim)
.unwrap_or_default()
.to_ascii_lowercase()
.as_str()
{
"newest" | "created_at" => "newest".to_string(),
"oldest" => "oldest".to_string(),
"title" => "title".to_string(),
_ => "relevance".to_string(),
}
}
fn normalize_sort_order(value: Option<&str>, sort_by: &str) -> String {
match value
.map(str::trim)
.unwrap_or_default()
.to_ascii_lowercase()
.as_str()
{
"asc" => "asc".to_string(),
"desc" => "desc".to_string(),
_ if sort_by == "title" => "asc".to_string(),
_ => "desc".to_string(),
}
}
fn sort_search_results(items: &mut [SearchResult], sort_by: &str, sort_order: &str) {
items.sort_by(|left, right| {
let ordering = match sort_by {
"newest" => right.created_at.cmp(&left.created_at),
"oldest" => left.created_at.cmp(&right.created_at),
"title" => left
.title
.as_deref()
.unwrap_or(&left.slug)
.to_ascii_lowercase()
.cmp(
&right
.title
.as_deref()
.unwrap_or(&right.slug)
.to_ascii_lowercase(),
),
_ => right
.rank
.partial_cmp(&left.rank)
.unwrap_or(std::cmp::Ordering::Equal)
.then_with(|| right.created_at.cmp(&left.created_at)),
};
if sort_by == "relevance" || sort_by == "newest" || sort_by == "oldest" {
return ordering;
}
let ordering = if sort_order == "asc" {
ordering
} else {
ordering.reverse()
};
ordering.then_with(|| left.slug.cmp(&right.slug))
});
}
#[derive(Clone, Debug, Default, Deserialize)]
pub struct SearchQuery {
pub q: Option<String>,
pub limit: Option<u64>,
pub category: Option<String>,
pub tag: Option<String>,
#[serde(alias = "type")]
pub post_type: Option<String>,
#[serde(default, deserialize_with = "deserialize_boolish_option")]
pub preview: Option<bool>,
}
#[derive(Clone, Debug, Default, Deserialize)]
pub struct SearchPageQuery {
#[serde(flatten)]
pub search: SearchQuery,
pub page: Option<u64>,
#[serde(alias = "page_size")]
pub page_size: Option<u64>,
pub sort_by: Option<String>,
pub sort_order: Option<String>,
}
#[derive(Clone, Debug, Serialize)]
pub struct SearchResult {
pub id: i32,
pub title: Option<String>,
pub slug: String,
pub description: Option<String>,
pub content: Option<String>,
pub category: Option<String>,
pub tags: Option<Value>,
pub post_type: Option<String>,
pub image: Option<String>,
pub pinned: Option<bool>,
pub created_at: chrono::DateTime<chrono::Utc>,
pub updated_at: chrono::DateTime<chrono::Utc>,
pub rank: f64,
}
#[derive(Clone, Debug, Serialize)]
pub struct PagedSearchResponse {
pub query: String,
pub items: Vec<SearchResult>,
pub page: u64,
pub page_size: u64,
pub total: usize,
pub total_pages: u64,
pub sort_by: String,
pub sort_order: String,
}
async fn build_search_results(
ctx: &AppContext,
query: &SearchQuery,
headers: &HeaderMap,
) -> Result<(String, bool, Vec<SearchResult>)> {
let preview_search = is_preview_search(query, headers);
let q = query.q.clone().unwrap_or_default().trim().to_string();
if q.is_empty() {
return Ok((q, preview_search, Vec::new()));
}
if !preview_search {
abuse_guard::enforce_public_scope(
"search",
abuse_guard::detect_client_ip(headers).as_deref(),
Some(&q),
)?;
}
let settings = site_settings::load_current(ctx).await.ok();
let synonym_groups = settings
.as_ref()
.map(|item| parse_synonym_groups(&item.search_synonyms))
.unwrap_or_default();
let mut all_posts = posts::Entity::find()
.all(&ctx.db)
.await?
.into_iter()
.filter(|post| {
preview_search
|| content::is_post_listed_publicly(post, chrono::Utc::now().fixed_offset())
})
.collect::<Vec<_>>();
if let Some(category) = query
.category
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
{
all_posts.retain(|post| {
post.category
.as_deref()
.map(|value| value.eq_ignore_ascii_case(category))
.unwrap_or(false)
});
}
if let Some(tag) = query
.tag
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
{
all_posts.retain(|post| post_has_tag(post, tag));
}
if let Some(post_type) = query
.post_type
.as_deref()
.map(str::trim)
.filter(|value| !value.is_empty())
{
all_posts.retain(|post| {
post.post_type
.as_deref()
.map(|value| value.eq_ignore_ascii_case(post_type))
.unwrap_or(false)
});
}
let mut expanded_terms = expand_search_terms(&q, &synonym_groups);
let mut results = all_posts
.iter()
.map(|post| (post, score_post(post, &q, &expanded_terms)))
.filter(|(_, rank)| *rank > 0.0)
.map(|(post, rank)| SearchResult {
id: post.id,
title: post.title.clone(),
slug: post.slug.clone(),
description: post.description.clone(),
content: post.content.clone(),
category: post.category.clone(),
tags: post.tags.clone(),
post_type: post.post_type.clone(),
image: post.image.clone(),
pinned: post.pinned,
created_at: post.created_at.into(),
updated_at: post.updated_at.into(),
rank,
})
.collect::<Vec<_>>();
if results.is_empty() {
expanded_terms = find_spelling_fallback(&q, &all_posts, &synonym_groups);
if !expanded_terms.is_empty() {
results = all_posts
.iter()
.map(|post| (post, score_post(post, &q, &expanded_terms)))
.filter(|(_, rank)| *rank > 0.0)
.map(|(post, rank)| SearchResult {
id: post.id,
title: post.title.clone(),
slug: post.slug.clone(),
description: post.description.clone(),
content: post.content.clone(),
category: post.category.clone(),
tags: post.tags.clone(),
post_type: post.post_type.clone(),
image: post.image.clone(),
pinned: post.pinned,
created_at: post.created_at.into(),
updated_at: post.updated_at.into(),
rank,
})
.collect::<Vec<_>>();
}
}
sort_search_results(&mut results, "relevance", "desc");
Ok((q, preview_search, results))
}
#[debug_handler]
pub async fn search(
Query(query): Query<SearchQuery>,
State(ctx): State<AppContext>,
headers: HeaderMap,
) -> Result<Response> {
let started_at = Instant::now();
let limit = query.limit.unwrap_or(20).clamp(1, 100) as usize;
let (q, preview_search, mut results) = build_search_results(&ctx, &query, &headers).await?;
if q.is_empty() {
return format::json(Vec::<SearchResult>::new());
}
results.truncate(limit);
if !preview_search {
analytics::record_search_event(
&ctx,
&q,
results.len(),
&headers,
started_at.elapsed().as_millis() as i64,
)
.await;
}
format::json(results)
}
#[debug_handler]
pub async fn search_page(
Query(query): Query<SearchPageQuery>,
State(ctx): State<AppContext>,
headers: HeaderMap,
) -> Result<Response> {
let started_at = Instant::now();
let page_size = query.page_size.unwrap_or(20).clamp(1, 100);
let sort_by = normalize_search_sort_by(query.sort_by.as_deref());
let sort_order = normalize_sort_order(query.sort_order.as_deref(), &sort_by);
let (q, preview_search, mut results) =
build_search_results(&ctx, &query.search, &headers).await?;
if q.is_empty() {
return format::json(PagedSearchResponse {
query: q,
items: Vec::new(),
page: 1,
page_size,
total: 0,
total_pages: 1,
sort_by,
sort_order,
});
}
sort_search_results(&mut results, &sort_by, &sort_order);
let total = results.len();
let total_pages = std::cmp::max(1, ((total as u64) + page_size - 1) / page_size);
let page = query.page.unwrap_or(1).clamp(1, total_pages);
let start = ((page - 1) * page_size) as usize;
let end = std::cmp::min(start + page_size as usize, total);
let items = if start >= total {
Vec::new()
} else {
results[start..end].to_vec()
};
if !preview_search {
analytics::record_search_event(
&ctx,
&q,
total,
&headers,
started_at.elapsed().as_millis() as i64,
)
.await;
}
format::json(PagedSearchResponse {
query: q,
items,
page,
page_size,
total,
total_pages,
sort_by,
sort_order,
})
}
pub fn routes() -> Routes {
Routes::new()
.prefix("api/search/")
.add("page", get(search_page))
.add("/", get(search))
}