feat: ship blog platform admin and deploy stack
This commit is contained in:
@@ -1,12 +1,14 @@
|
||||
use axum::http::HeaderMap;
|
||||
use loco_rs::prelude::*;
|
||||
use sea_orm::{ConnectionTrait, DatabaseBackend, DbBackend, FromQueryResult, Statement};
|
||||
use serde::{Deserialize, Deserializer, Serialize};
|
||||
use serde_json::Value;
|
||||
use std::time::Instant;
|
||||
use std::{collections::HashSet, time::Instant};
|
||||
|
||||
use crate::models::_entities::posts;
|
||||
use crate::services::{analytics, content};
|
||||
use crate::{
|
||||
controllers::site_settings,
|
||||
models::_entities::posts,
|
||||
services::{abuse_guard, analytics, content},
|
||||
};
|
||||
|
||||
fn deserialize_boolish_option<'de, D>(
|
||||
deserializer: D,
|
||||
@@ -26,6 +28,243 @@ where
|
||||
.transpose()
|
||||
}
|
||||
|
||||
fn normalize_text(value: &str) -> String {
|
||||
value
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
.trim()
|
||||
.to_ascii_lowercase()
|
||||
}
|
||||
|
||||
fn tokenize(value: &str) -> Vec<String> {
|
||||
value
|
||||
.split(|ch: char| !ch.is_alphanumeric() && ch != '-' && ch != '_')
|
||||
.map(normalize_text)
|
||||
.filter(|item| !item.is_empty())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn levenshtein_distance(left: &str, right: &str) -> usize {
|
||||
if left == right {
|
||||
return 0;
|
||||
}
|
||||
if left.is_empty() {
|
||||
return right.chars().count();
|
||||
}
|
||||
if right.is_empty() {
|
||||
return left.chars().count();
|
||||
}
|
||||
|
||||
let right_chars = right.chars().collect::<Vec<_>>();
|
||||
let mut prev = (0..=right_chars.len()).collect::<Vec<_>>();
|
||||
|
||||
for (i, left_ch) in left.chars().enumerate() {
|
||||
let mut curr = vec![i + 1; right_chars.len() + 1];
|
||||
for (j, right_ch) in right_chars.iter().enumerate() {
|
||||
let cost = usize::from(left_ch != *right_ch);
|
||||
curr[j + 1] = (curr[j] + 1)
|
||||
.min(prev[j + 1] + 1)
|
||||
.min(prev[j] + cost);
|
||||
}
|
||||
prev = curr;
|
||||
}
|
||||
|
||||
prev[right_chars.len()]
|
||||
}
|
||||
|
||||
fn parse_synonym_groups(value: &Option<Value>) -> Vec<Vec<String>> {
|
||||
value
|
||||
.as_ref()
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter_map(|item| item.as_str().map(ToString::to_string))
|
||||
.map(|item| {
|
||||
let normalized = item.replace("=>", ",").replace('|', ",");
|
||||
normalized
|
||||
.split([',', ','])
|
||||
.map(normalize_text)
|
||||
.filter(|token| !token.is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
})
|
||||
.filter(|group| !group.is_empty())
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn expand_search_terms(query: &str, synonym_groups: &[Vec<String>]) -> Vec<String> {
|
||||
let normalized_query = normalize_text(query);
|
||||
let query_tokens = tokenize(query);
|
||||
let mut expanded = Vec::new();
|
||||
let mut seen = HashSet::new();
|
||||
|
||||
if !normalized_query.is_empty() && seen.insert(normalized_query.clone()) {
|
||||
expanded.push(normalized_query.clone());
|
||||
}
|
||||
|
||||
for token in &query_tokens {
|
||||
if seen.insert(token.clone()) {
|
||||
expanded.push(token.clone());
|
||||
}
|
||||
}
|
||||
|
||||
for group in synonym_groups {
|
||||
let matched = group.iter().any(|item| {
|
||||
*item == normalized_query
|
||||
|| query_tokens.iter().any(|token| token == item)
|
||||
|| normalized_query.contains(item)
|
||||
});
|
||||
|
||||
if matched {
|
||||
for token in group {
|
||||
if seen.insert(token.clone()) {
|
||||
expanded.push(token.clone());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
expanded
|
||||
}
|
||||
|
||||
fn candidate_terms(posts: &[posts::Model]) -> Vec<String> {
|
||||
let mut seen = HashSet::new();
|
||||
let mut candidates = Vec::new();
|
||||
|
||||
for post in posts {
|
||||
for source in [
|
||||
post.title.as_deref().unwrap_or_default(),
|
||||
post.category.as_deref().unwrap_or_default(),
|
||||
&post.slug,
|
||||
] {
|
||||
for token in tokenize(source) {
|
||||
if token.len() >= 3 && seen.insert(token.clone()) {
|
||||
candidates.push(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(tags) = post.tags.as_ref().and_then(Value::as_array) {
|
||||
for token in tags.iter().filter_map(Value::as_str).flat_map(tokenize) {
|
||||
if token.len() >= 2 && seen.insert(token.clone()) {
|
||||
candidates.push(token);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
candidates
|
||||
}
|
||||
|
||||
fn find_spelling_fallback(query: &str, posts: &[posts::Model], synonym_groups: &[Vec<String>]) -> Vec<String> {
|
||||
let primary_token = tokenize(query).into_iter().next().unwrap_or_default();
|
||||
if primary_token.len() < 3 {
|
||||
return Vec::new();
|
||||
}
|
||||
|
||||
let mut nearest = candidate_terms(posts)
|
||||
.into_iter()
|
||||
.map(|candidate| {
|
||||
let distance = levenshtein_distance(&primary_token, &candidate);
|
||||
(candidate, distance)
|
||||
})
|
||||
.filter(|(_, distance)| *distance <= 2)
|
||||
.collect::<Vec<_>>();
|
||||
nearest.sort_by(|left, right| left.1.cmp(&right.1).then_with(|| left.0.cmp(&right.0)));
|
||||
|
||||
nearest
|
||||
.into_iter()
|
||||
.take(3)
|
||||
.flat_map(|(candidate, _)| expand_search_terms(&candidate, synonym_groups))
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn post_has_tag(post: &posts::Model, wanted_tag: &str) -> bool {
|
||||
let wanted = normalize_text(wanted_tag);
|
||||
|
||||
post.tags
|
||||
.as_ref()
|
||||
.and_then(Value::as_array)
|
||||
.map(|tags| {
|
||||
tags.iter()
|
||||
.filter_map(Value::as_str)
|
||||
.map(normalize_text)
|
||||
.any(|tag| tag == wanted)
|
||||
})
|
||||
.unwrap_or(false)
|
||||
}
|
||||
|
||||
fn score_post(post: &posts::Model, query: &str, terms: &[String]) -> f64 {
|
||||
let normalized_query = normalize_text(query);
|
||||
let title = normalize_text(post.title.as_deref().unwrap_or_default());
|
||||
let description = normalize_text(post.description.as_deref().unwrap_or_default());
|
||||
let content_text = normalize_text(post.content.as_deref().unwrap_or_default());
|
||||
let category = normalize_text(post.category.as_deref().unwrap_or_default());
|
||||
let slug = normalize_text(&post.slug);
|
||||
let tags = post
|
||||
.tags
|
||||
.as_ref()
|
||||
.and_then(Value::as_array)
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.filter_map(|item| item.as_str().map(normalize_text))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut score = 0.0;
|
||||
|
||||
if !normalized_query.is_empty() {
|
||||
if title.contains(&normalized_query) {
|
||||
score += 6.0;
|
||||
}
|
||||
if description.contains(&normalized_query) {
|
||||
score += 4.0;
|
||||
}
|
||||
if slug.contains(&normalized_query) {
|
||||
score += 4.0;
|
||||
}
|
||||
if category.contains(&normalized_query) {
|
||||
score += 3.0;
|
||||
}
|
||||
if tags.iter().any(|tag| tag.contains(&normalized_query)) {
|
||||
score += 4.0;
|
||||
}
|
||||
if content_text.contains(&normalized_query) {
|
||||
score += 2.0;
|
||||
}
|
||||
}
|
||||
|
||||
for term in terms {
|
||||
if term.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
if title.contains(term) {
|
||||
score += 3.5;
|
||||
}
|
||||
if description.contains(term) {
|
||||
score += 2.2;
|
||||
}
|
||||
if slug.contains(term) {
|
||||
score += 2.0;
|
||||
}
|
||||
if category.contains(term) {
|
||||
score += 1.8;
|
||||
}
|
||||
if tags.iter().any(|tag| tag == term) {
|
||||
score += 2.5;
|
||||
} else if tags.iter().any(|tag| tag.contains(term)) {
|
||||
score += 1.5;
|
||||
}
|
||||
if content_text.contains(term) {
|
||||
score += 0.8;
|
||||
}
|
||||
}
|
||||
|
||||
score
|
||||
}
|
||||
|
||||
fn is_preview_search(query: &SearchQuery, headers: &HeaderMap) -> bool {
|
||||
query.preview.unwrap_or(false)
|
||||
|| headers
|
||||
@@ -39,11 +278,15 @@ fn is_preview_search(query: &SearchQuery, headers: &HeaderMap) -> bool {
|
||||
pub struct SearchQuery {
|
||||
pub q: Option<String>,
|
||||
pub limit: Option<u64>,
|
||||
pub category: Option<String>,
|
||||
pub tag: Option<String>,
|
||||
#[serde(alias = "type")]
|
||||
pub post_type: Option<String>,
|
||||
#[serde(default, deserialize_with = "deserialize_boolish_option")]
|
||||
pub preview: Option<bool>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, FromQueryResult)]
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct SearchResult {
|
||||
pub id: i32,
|
||||
pub title: Option<String>,
|
||||
@@ -59,131 +302,6 @@ pub struct SearchResult {
|
||||
pub rank: f64,
|
||||
}
|
||||
|
||||
fn search_sql() -> &'static str {
|
||||
r#"
|
||||
SELECT
|
||||
p.id,
|
||||
p.title,
|
||||
p.slug,
|
||||
p.description,
|
||||
p.content,
|
||||
p.category,
|
||||
p.tags,
|
||||
p.post_type,
|
||||
p.pinned,
|
||||
p.created_at,
|
||||
p.updated_at,
|
||||
ts_rank_cd(
|
||||
setweight(to_tsvector('simple', coalesce(p.title, '')), 'A') ||
|
||||
setweight(to_tsvector('simple', coalesce(p.description, '')), 'B') ||
|
||||
setweight(to_tsvector('simple', coalesce(p.category, '')), 'C') ||
|
||||
setweight(to_tsvector('simple', coalesce(p.tags::text, '')), 'C') ||
|
||||
setweight(to_tsvector('simple', coalesce(p.content, '')), 'D'),
|
||||
plainto_tsquery('simple', $1)
|
||||
)::float8 AS rank
|
||||
FROM posts p
|
||||
WHERE (
|
||||
setweight(to_tsvector('simple', coalesce(p.title, '')), 'A') ||
|
||||
setweight(to_tsvector('simple', coalesce(p.description, '')), 'B') ||
|
||||
setweight(to_tsvector('simple', coalesce(p.category, '')), 'C') ||
|
||||
setweight(to_tsvector('simple', coalesce(p.tags::text, '')), 'C') ||
|
||||
setweight(to_tsvector('simple', coalesce(p.content, '')), 'D')
|
||||
) @@ plainto_tsquery('simple', $1)
|
||||
ORDER BY rank DESC, p.created_at DESC
|
||||
LIMIT $2
|
||||
"#
|
||||
}
|
||||
|
||||
fn app_level_rank(post: &posts::Model, wanted: &str) -> f64 {
|
||||
let wanted_lower = wanted.to_lowercase();
|
||||
let mut rank = 0.0;
|
||||
|
||||
if post
|
||||
.title
|
||||
.as_deref()
|
||||
.unwrap_or_default()
|
||||
.to_lowercase()
|
||||
.contains(&wanted_lower)
|
||||
{
|
||||
rank += 4.0;
|
||||
}
|
||||
|
||||
if post
|
||||
.description
|
||||
.as_deref()
|
||||
.unwrap_or_default()
|
||||
.to_lowercase()
|
||||
.contains(&wanted_lower)
|
||||
{
|
||||
rank += 2.5;
|
||||
}
|
||||
|
||||
if post
|
||||
.content
|
||||
.as_deref()
|
||||
.unwrap_or_default()
|
||||
.to_lowercase()
|
||||
.contains(&wanted_lower)
|
||||
{
|
||||
rank += 1.0;
|
||||
}
|
||||
|
||||
if post
|
||||
.category
|
||||
.as_deref()
|
||||
.unwrap_or_default()
|
||||
.to_lowercase()
|
||||
.contains(&wanted_lower)
|
||||
{
|
||||
rank += 1.5;
|
||||
}
|
||||
|
||||
if post
|
||||
.tags
|
||||
.as_ref()
|
||||
.and_then(Value::as_array)
|
||||
.map(|tags| {
|
||||
tags.iter()
|
||||
.filter_map(Value::as_str)
|
||||
.any(|tag| tag.to_lowercase().contains(&wanted_lower))
|
||||
})
|
||||
.unwrap_or(false)
|
||||
{
|
||||
rank += 2.0;
|
||||
}
|
||||
|
||||
rank
|
||||
}
|
||||
|
||||
async fn fallback_search(ctx: &AppContext, q: &str, limit: u64) -> Result<Vec<SearchResult>> {
|
||||
let mut results = posts::Entity::find().all(&ctx.db).await?;
|
||||
results.sort_by(|left, right| right.created_at.cmp(&left.created_at));
|
||||
|
||||
Ok(results
|
||||
.into_iter()
|
||||
.map(|post| {
|
||||
let rank = app_level_rank(&post, q);
|
||||
(post, rank)
|
||||
})
|
||||
.filter(|(_, rank)| *rank > 0.0)
|
||||
.take(limit as usize)
|
||||
.map(|(post, rank)| SearchResult {
|
||||
id: post.id,
|
||||
title: post.title,
|
||||
slug: post.slug,
|
||||
description: post.description,
|
||||
content: post.content,
|
||||
category: post.category,
|
||||
tags: post.tags,
|
||||
post_type: post.post_type,
|
||||
pinned: post.pinned,
|
||||
created_at: post.created_at.into(),
|
||||
updated_at: post.updated_at.into(),
|
||||
rank,
|
||||
})
|
||||
.collect())
|
||||
}
|
||||
|
||||
#[debug_handler]
|
||||
pub async fn search(
|
||||
Query(query): Query<SearchQuery>,
|
||||
@@ -199,26 +317,107 @@ pub async fn search(
|
||||
return format::json(Vec::<SearchResult>::new());
|
||||
}
|
||||
|
||||
let limit = query.limit.unwrap_or(20).clamp(1, 100);
|
||||
if !preview_search {
|
||||
abuse_guard::enforce_public_scope(
|
||||
"search",
|
||||
abuse_guard::detect_client_ip(&headers).as_deref(),
|
||||
Some(&q),
|
||||
)?;
|
||||
}
|
||||
|
||||
let results = if ctx.db.get_database_backend() == DatabaseBackend::Postgres {
|
||||
let statement = Statement::from_sql_and_values(
|
||||
DbBackend::Postgres,
|
||||
search_sql(),
|
||||
[q.clone().into(), (limit as i64).into()],
|
||||
);
|
||||
let limit = query.limit.unwrap_or(20).clamp(1, 100) as usize;
|
||||
let settings = site_settings::load_current(&ctx).await.ok();
|
||||
let synonym_groups = settings
|
||||
.as_ref()
|
||||
.map(|item| parse_synonym_groups(&item.search_synonyms))
|
||||
.unwrap_or_default();
|
||||
|
||||
match SearchResult::find_by_statement(statement)
|
||||
.all(&ctx.db)
|
||||
.await
|
||||
{
|
||||
Ok(rows) if !rows.is_empty() => rows,
|
||||
Ok(_) => fallback_search(&ctx, &q, limit).await?,
|
||||
Err(_) => fallback_search(&ctx, &q, limit).await?,
|
||||
let mut all_posts = posts::Entity::find()
|
||||
.all(&ctx.db)
|
||||
.await?
|
||||
.into_iter()
|
||||
.filter(|post| {
|
||||
preview_search
|
||||
|| content::is_post_listed_publicly(post, chrono::Utc::now().fixed_offset())
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if let Some(category) = query.category.as_deref().map(str::trim).filter(|value| !value.is_empty()) {
|
||||
all_posts.retain(|post| {
|
||||
post.category
|
||||
.as_deref()
|
||||
.map(|value| value.eq_ignore_ascii_case(category))
|
||||
.unwrap_or(false)
|
||||
});
|
||||
}
|
||||
|
||||
if let Some(tag) = query.tag.as_deref().map(str::trim).filter(|value| !value.is_empty()) {
|
||||
all_posts.retain(|post| post_has_tag(post, tag));
|
||||
}
|
||||
|
||||
if let Some(post_type) = query.post_type.as_deref().map(str::trim).filter(|value| !value.is_empty()) {
|
||||
all_posts.retain(|post| {
|
||||
post.post_type
|
||||
.as_deref()
|
||||
.map(|value| value.eq_ignore_ascii_case(post_type))
|
||||
.unwrap_or(false)
|
||||
});
|
||||
}
|
||||
|
||||
let mut expanded_terms = expand_search_terms(&q, &synonym_groups);
|
||||
let mut results = all_posts
|
||||
.iter()
|
||||
.map(|post| (post, score_post(post, &q, &expanded_terms)))
|
||||
.filter(|(_, rank)| *rank > 0.0)
|
||||
.map(|(post, rank)| SearchResult {
|
||||
id: post.id,
|
||||
title: post.title.clone(),
|
||||
slug: post.slug.clone(),
|
||||
description: post.description.clone(),
|
||||
content: post.content.clone(),
|
||||
category: post.category.clone(),
|
||||
tags: post.tags.clone(),
|
||||
post_type: post.post_type.clone(),
|
||||
pinned: post.pinned,
|
||||
created_at: post.created_at.into(),
|
||||
updated_at: post.updated_at.into(),
|
||||
rank,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
if results.is_empty() {
|
||||
expanded_terms = find_spelling_fallback(&q, &all_posts, &synonym_groups);
|
||||
if !expanded_terms.is_empty() {
|
||||
results = all_posts
|
||||
.iter()
|
||||
.map(|post| (post, score_post(post, &q, &expanded_terms)))
|
||||
.filter(|(_, rank)| *rank > 0.0)
|
||||
.map(|(post, rank)| SearchResult {
|
||||
id: post.id,
|
||||
title: post.title.clone(),
|
||||
slug: post.slug.clone(),
|
||||
description: post.description.clone(),
|
||||
content: post.content.clone(),
|
||||
category: post.category.clone(),
|
||||
tags: post.tags.clone(),
|
||||
post_type: post.post_type.clone(),
|
||||
pinned: post.pinned,
|
||||
created_at: post.created_at.into(),
|
||||
updated_at: post.updated_at.into(),
|
||||
rank,
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
}
|
||||
} else {
|
||||
fallback_search(&ctx, &q, limit).await?
|
||||
};
|
||||
}
|
||||
|
||||
results.sort_by(|left, right| {
|
||||
right
|
||||
.rank
|
||||
.partial_cmp(&left.rank)
|
||||
.unwrap_or(std::cmp::Ordering::Equal)
|
||||
.then_with(|| right.created_at.cmp(&left.created_at))
|
||||
});
|
||||
results.truncate(limit);
|
||||
|
||||
if !preview_search {
|
||||
analytics::record_search_event(
|
||||
|
||||
Reference in New Issue
Block a user