chore: checkpoint ai search comments and i18n foundation
This commit is contained in:
993
backend/src/services/ai.rs
Normal file
993
backend/src/services/ai.rs
Normal file
@@ -0,0 +1,993 @@
|
||||
use chrono::{DateTime, Utc};
|
||||
use fastembed::{
|
||||
InitOptionsUserDefined, Pooling, TextEmbedding, TokenizerFiles, UserDefinedEmbeddingModel,
|
||||
};
|
||||
use loco_rs::prelude::*;
|
||||
use reqwest::Client;
|
||||
use sea_orm::{
|
||||
ActiveModelTrait, ConnectionTrait, DbBackend, EntityTrait, FromQueryResult, IntoActiveModel,
|
||||
PaginatorTrait, QueryOrder, Set, Statement,
|
||||
};
|
||||
use serde::Serialize;
|
||||
use serde_json::{json, Value};
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
use std::sync::{Mutex, OnceLock};
|
||||
|
||||
use crate::{
|
||||
models::_entities::{ai_chunks, site_settings},
|
||||
services::content,
|
||||
};
|
||||
|
||||
const DEFAULT_AI_PROVIDER: &str = "newapi";
|
||||
const DEFAULT_AI_API_BASE: &str = "http://localhost:8317/v1";
|
||||
const DEFAULT_AI_API_KEY: &str = "your-api-key-1";
|
||||
const DEFAULT_CHAT_MODEL: &str = "gpt-5.4";
|
||||
const DEFAULT_REASONING_EFFORT: &str = "medium";
|
||||
const DEFAULT_DISABLE_RESPONSE_STORAGE: bool = true;
|
||||
const DEFAULT_TOP_K: usize = 4;
|
||||
const DEFAULT_CHUNK_SIZE: usize = 1200;
|
||||
const DEFAULT_SYSTEM_PROMPT: &str =
|
||||
"你是这个博客的站内 AI 助手。请严格基于提供的博客上下文回答,优先给出准确结论,再补充细节;如果上下文不足,请明确说明。";
|
||||
const EMBEDDING_BATCH_SIZE: usize = 32;
|
||||
const EMBEDDING_DIMENSION: usize = 384;
|
||||
const LOCAL_EMBEDDING_MODEL_LABEL: &str = "fastembed / local all-MiniLM-L6-v2";
|
||||
const LOCAL_EMBEDDING_CACHE_DIR: &str = "storage/ai_embedding_models/all-minilm-l6-v2";
|
||||
const LOCAL_EMBEDDING_BASE_URL: &str =
|
||||
"https://huggingface.co/Qdrant/all-MiniLM-L6-v2-onnx/resolve/main";
|
||||
const LOCAL_EMBEDDING_FILES: [&str; 5] = [
|
||||
"model.onnx",
|
||||
"tokenizer.json",
|
||||
"config.json",
|
||||
"special_tokens_map.json",
|
||||
"tokenizer_config.json",
|
||||
];
|
||||
|
||||
static TEXT_EMBEDDING_MODEL: OnceLock<Mutex<TextEmbedding>> = OnceLock::new();
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct AiRuntimeSettings {
|
||||
raw: site_settings::Model,
|
||||
provider: String,
|
||||
api_base: Option<String>,
|
||||
api_key: Option<String>,
|
||||
chat_model: String,
|
||||
system_prompt: String,
|
||||
top_k: usize,
|
||||
chunk_size: usize,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct ChunkDraft {
|
||||
source_slug: String,
|
||||
source_title: Option<String>,
|
||||
source_path: Option<String>,
|
||||
source_type: String,
|
||||
chunk_index: i32,
|
||||
content: String,
|
||||
content_preview: Option<String>,
|
||||
word_count: Option<i32>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
struct ScoredChunk {
|
||||
score: f64,
|
||||
row: ai_chunks::Model,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, FromQueryResult)]
|
||||
struct SimilarChunkRow {
|
||||
source_slug: String,
|
||||
source_title: Option<String>,
|
||||
chunk_index: i32,
|
||||
content: String,
|
||||
content_preview: Option<String>,
|
||||
word_count: Option<i32>,
|
||||
score: f64,
|
||||
}
|
||||
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
enum EmbeddingKind {
|
||||
Passage,
|
||||
Query,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize)]
|
||||
pub struct AiSource {
|
||||
pub slug: String,
|
||||
pub title: String,
|
||||
pub excerpt: String,
|
||||
pub score: f64,
|
||||
pub chunk_index: i32,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AiAnswer {
|
||||
pub answer: String,
|
||||
pub sources: Vec<AiSource>,
|
||||
pub indexed_chunks: usize,
|
||||
pub last_indexed_at: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct AiProviderRequest {
|
||||
pub(crate) provider: String,
|
||||
pub(crate) api_base: String,
|
||||
pub(crate) api_key: String,
|
||||
pub(crate) chat_model: String,
|
||||
pub(crate) system_prompt: String,
|
||||
pub(crate) prompt: String,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub(crate) struct PreparedAiAnswer {
|
||||
pub(crate) question: String,
|
||||
pub(crate) provider_request: Option<AiProviderRequest>,
|
||||
pub(crate) immediate_answer: Option<String>,
|
||||
pub(crate) sources: Vec<AiSource>,
|
||||
pub(crate) indexed_chunks: usize,
|
||||
pub(crate) last_indexed_at: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct AiIndexSummary {
|
||||
pub indexed_chunks: usize,
|
||||
pub last_indexed_at: Option<DateTime<Utc>>,
|
||||
}
|
||||
|
||||
fn trim_to_option(value: Option<String>) -> Option<String> {
|
||||
value.and_then(|item| {
|
||||
let trimmed = item.trim().to_string();
|
||||
if trimmed.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(trimmed)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
fn preview_text(content: &str, limit: usize) -> Option<String> {
|
||||
let flattened = content
|
||||
.split_whitespace()
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
.trim()
|
||||
.to_string();
|
||||
|
||||
if flattened.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let preview = flattened.chars().take(limit).collect::<String>();
|
||||
Some(preview)
|
||||
}
|
||||
|
||||
fn build_endpoint(api_base: &str, path: &str) -> String {
|
||||
format!(
|
||||
"{}/{}",
|
||||
api_base.trim_end_matches('/'),
|
||||
path.trim_start_matches('/')
|
||||
)
|
||||
}
|
||||
|
||||
fn local_embedding_dir() -> PathBuf {
|
||||
PathBuf::from(LOCAL_EMBEDDING_CACHE_DIR)
|
||||
}
|
||||
|
||||
fn download_embedding_file(
|
||||
client: &reqwest::blocking::Client,
|
||||
directory: &Path,
|
||||
file_name: &str,
|
||||
) -> Result<()> {
|
||||
let target_path = directory.join(file_name);
|
||||
if target_path.exists() {
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
let url = format!("{LOCAL_EMBEDDING_BASE_URL}/{file_name}");
|
||||
let bytes = client
|
||||
.get(url)
|
||||
.send()
|
||||
.and_then(reqwest::blocking::Response::error_for_status)
|
||||
.map_err(|error| Error::BadRequest(format!("下载本地 embedding 文件失败: {error}")))?
|
||||
.bytes()
|
||||
.map_err(|error| Error::BadRequest(format!("读取本地 embedding 文件失败: {error}")))?;
|
||||
|
||||
fs::write(&target_path, &bytes)
|
||||
.map_err(|error| Error::BadRequest(format!("写入本地 embedding 文件失败: {error}")))?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn ensure_local_embedding_files() -> Result<PathBuf> {
|
||||
let directory = local_embedding_dir();
|
||||
fs::create_dir_all(&directory)
|
||||
.map_err(|error| Error::BadRequest(format!("创建本地 embedding 目录失败: {error}")))?;
|
||||
|
||||
let client = reqwest::blocking::Client::builder()
|
||||
.build()
|
||||
.map_err(|error| {
|
||||
Error::BadRequest(format!("创建本地 embedding 下载客户端失败: {error}"))
|
||||
})?;
|
||||
|
||||
for file_name in LOCAL_EMBEDDING_FILES {
|
||||
download_embedding_file(&client, &directory, file_name)?;
|
||||
}
|
||||
|
||||
Ok(directory)
|
||||
}
|
||||
|
||||
fn load_local_embedding_model() -> Result<TextEmbedding> {
|
||||
let directory = ensure_local_embedding_files()?;
|
||||
let tokenizer_files = TokenizerFiles {
|
||||
tokenizer_file: fs::read(directory.join("tokenizer.json"))
|
||||
.map_err(|error| Error::BadRequest(format!("读取 tokenizer.json 失败: {error}")))?,
|
||||
config_file: fs::read(directory.join("config.json"))
|
||||
.map_err(|error| Error::BadRequest(format!("读取 config.json 失败: {error}")))?,
|
||||
special_tokens_map_file: fs::read(directory.join("special_tokens_map.json")).map_err(
|
||||
|error| Error::BadRequest(format!("读取 special_tokens_map.json 失败: {error}")),
|
||||
)?,
|
||||
tokenizer_config_file: fs::read(directory.join("tokenizer_config.json")).map_err(
|
||||
|error| Error::BadRequest(format!("读取 tokenizer_config.json 失败: {error}")),
|
||||
)?,
|
||||
};
|
||||
|
||||
let model = UserDefinedEmbeddingModel::new(
|
||||
fs::read(directory.join("model.onnx"))
|
||||
.map_err(|error| Error::BadRequest(format!("读取 model.onnx 失败: {error}")))?,
|
||||
tokenizer_files,
|
||||
)
|
||||
.with_pooling(Pooling::Mean);
|
||||
|
||||
TextEmbedding::try_new_from_user_defined(model, InitOptionsUserDefined::default())
|
||||
.map_err(|error| Error::BadRequest(format!("本地 embedding 模型初始化失败: {error}")))
|
||||
}
|
||||
|
||||
fn local_embedding_engine() -> Result<&'static Mutex<TextEmbedding>> {
|
||||
if let Some(model) = TEXT_EMBEDDING_MODEL.get() {
|
||||
return Ok(model);
|
||||
}
|
||||
|
||||
let model = load_local_embedding_model()?;
|
||||
|
||||
let _ = TEXT_EMBEDDING_MODEL.set(Mutex::new(model));
|
||||
|
||||
TEXT_EMBEDDING_MODEL
|
||||
.get()
|
||||
.ok_or_else(|| Error::BadRequest("本地 embedding 模型未能成功缓存".to_string()))
|
||||
}
|
||||
|
||||
fn vector_literal(embedding: &[f64]) -> Result<String> {
|
||||
if embedding.len() != EMBEDDING_DIMENSION {
|
||||
return Err(Error::BadRequest(format!(
|
||||
"embedding 维度异常,期望 {EMBEDDING_DIMENSION},实际 {}",
|
||||
embedding.len()
|
||||
)));
|
||||
}
|
||||
|
||||
Ok(format!(
|
||||
"[{}]",
|
||||
embedding
|
||||
.iter()
|
||||
.map(|value| value.to_string())
|
||||
.collect::<Vec<_>>()
|
||||
.join(",")
|
||||
))
|
||||
}
|
||||
|
||||
fn prepare_embedding_text(kind: EmbeddingKind, text: &str) -> String {
|
||||
match kind {
|
||||
EmbeddingKind::Passage | EmbeddingKind::Query => text.trim().to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn split_long_text(text: &str, chunk_size: usize) -> Vec<String> {
|
||||
let mut parts = Vec::new();
|
||||
let mut current = String::new();
|
||||
|
||||
for line in text.lines() {
|
||||
let candidate = if current.is_empty() {
|
||||
line.to_string()
|
||||
} else {
|
||||
format!("{current}\n{line}")
|
||||
};
|
||||
|
||||
if candidate.chars().count() > chunk_size && !current.is_empty() {
|
||||
parts.push(current.trim().to_string());
|
||||
current = line.to_string();
|
||||
} else {
|
||||
current = candidate;
|
||||
}
|
||||
}
|
||||
|
||||
if !current.trim().is_empty() {
|
||||
parts.push(current.trim().to_string());
|
||||
}
|
||||
|
||||
parts
|
||||
}
|
||||
|
||||
fn build_chunks(posts: &[content::MarkdownPost], chunk_size: usize) -> Vec<ChunkDraft> {
|
||||
let mut chunks = Vec::new();
|
||||
|
||||
for post in posts.iter().filter(|post| post.published) {
|
||||
let mut sections = Vec::new();
|
||||
sections.push(format!("# {}", post.title));
|
||||
if let Some(description) = post
|
||||
.description
|
||||
.as_deref()
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
{
|
||||
sections.push(description.trim().to_string());
|
||||
}
|
||||
sections.push(post.content.trim().to_string());
|
||||
|
||||
let source_text = sections
|
||||
.into_iter()
|
||||
.filter(|item| !item.trim().is_empty())
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n");
|
||||
|
||||
let paragraphs = source_text
|
||||
.split("\n\n")
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut buffer = String::new();
|
||||
let mut chunk_index = 0_i32;
|
||||
|
||||
for paragraph in paragraphs {
|
||||
if paragraph.chars().count() > chunk_size {
|
||||
if !buffer.trim().is_empty() {
|
||||
chunks.push(ChunkDraft {
|
||||
source_slug: post.slug.clone(),
|
||||
source_title: Some(post.title.clone()),
|
||||
source_path: Some(post.file_path.clone()),
|
||||
source_type: "post".to_string(),
|
||||
chunk_index,
|
||||
content: buffer.trim().to_string(),
|
||||
content_preview: preview_text(&buffer, 180),
|
||||
word_count: Some(buffer.split_whitespace().count() as i32),
|
||||
});
|
||||
chunk_index += 1;
|
||||
buffer.clear();
|
||||
}
|
||||
|
||||
for part in split_long_text(paragraph, chunk_size) {
|
||||
if part.trim().is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
||||
chunks.push(ChunkDraft {
|
||||
source_slug: post.slug.clone(),
|
||||
source_title: Some(post.title.clone()),
|
||||
source_path: Some(post.file_path.clone()),
|
||||
source_type: "post".to_string(),
|
||||
chunk_index,
|
||||
content_preview: preview_text(&part, 180),
|
||||
word_count: Some(part.split_whitespace().count() as i32),
|
||||
content: part,
|
||||
});
|
||||
chunk_index += 1;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let candidate = if buffer.is_empty() {
|
||||
paragraph.to_string()
|
||||
} else {
|
||||
format!("{buffer}\n\n{paragraph}")
|
||||
};
|
||||
|
||||
if candidate.chars().count() > chunk_size && !buffer.trim().is_empty() {
|
||||
chunks.push(ChunkDraft {
|
||||
source_slug: post.slug.clone(),
|
||||
source_title: Some(post.title.clone()),
|
||||
source_path: Some(post.file_path.clone()),
|
||||
source_type: "post".to_string(),
|
||||
chunk_index,
|
||||
content_preview: preview_text(&buffer, 180),
|
||||
word_count: Some(buffer.split_whitespace().count() as i32),
|
||||
content: buffer.trim().to_string(),
|
||||
});
|
||||
chunk_index += 1;
|
||||
buffer = paragraph.to_string();
|
||||
} else {
|
||||
buffer = candidate;
|
||||
}
|
||||
}
|
||||
|
||||
if !buffer.trim().is_empty() {
|
||||
chunks.push(ChunkDraft {
|
||||
source_slug: post.slug.clone(),
|
||||
source_title: Some(post.title.clone()),
|
||||
source_path: Some(post.file_path.clone()),
|
||||
source_type: "post".to_string(),
|
||||
chunk_index,
|
||||
content_preview: preview_text(&buffer, 180),
|
||||
word_count: Some(buffer.split_whitespace().count() as i32),
|
||||
content: buffer.trim().to_string(),
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
chunks
|
||||
}
|
||||
|
||||
async fn request_json(client: &Client, url: &str, api_key: &str, payload: Value) -> Result<Value> {
|
||||
let response = client
|
||||
.post(url)
|
||||
.bearer_auth(api_key)
|
||||
.header("Accept", "application/json")
|
||||
.json(&payload)
|
||||
.send()
|
||||
.await
|
||||
.map_err(|error| Error::BadRequest(format!("AI request failed: {error}")))?;
|
||||
|
||||
let status = response.status();
|
||||
let body = response
|
||||
.text()
|
||||
.await
|
||||
.map_err(|error| Error::BadRequest(format!("AI response read failed: {error}")))?;
|
||||
|
||||
if !status.is_success() {
|
||||
return Err(Error::BadRequest(format!(
|
||||
"AI provider returned {status}: {body}"
|
||||
)));
|
||||
}
|
||||
|
||||
serde_json::from_str(&body)
|
||||
.map_err(|error| Error::BadRequest(format!("AI response parse failed: {error}")))
|
||||
}
|
||||
|
||||
fn provider_uses_responses(provider: &str) -> bool {
|
||||
provider.eq_ignore_ascii_case("newapi")
|
||||
}
|
||||
|
||||
async fn embed_texts_locally(inputs: Vec<String>, kind: EmbeddingKind) -> Result<Vec<Vec<f64>>> {
|
||||
tokio::task::spawn_blocking(move || {
|
||||
let model = local_embedding_engine()?;
|
||||
let prepared = inputs
|
||||
.iter()
|
||||
.map(|item| prepare_embedding_text(kind, item))
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
let mut guard = model.lock().map_err(|_| {
|
||||
Error::BadRequest("本地 embedding 模型当前不可用,请稍后重试".to_string())
|
||||
})?;
|
||||
|
||||
let embeddings = guard
|
||||
.embed(prepared, Some(EMBEDDING_BATCH_SIZE))
|
||||
.map_err(|error| Error::BadRequest(format!("本地 embedding 生成失败: {error}")))?;
|
||||
|
||||
Ok(embeddings
|
||||
.into_iter()
|
||||
.map(|embedding| embedding.into_iter().map(f64::from).collect::<Vec<_>>())
|
||||
.collect::<Vec<_>>())
|
||||
})
|
||||
.await
|
||||
.map_err(|error| Error::BadRequest(format!("本地 embedding 任务执行失败: {error}")))?
|
||||
}
|
||||
|
||||
fn extract_message_content(value: &Value) -> Option<String> {
|
||||
if let Some(content) = value
|
||||
.get("choices")
|
||||
.and_then(Value::as_array)
|
||||
.and_then(|choices| choices.first())
|
||||
.and_then(|choice| choice.get("message"))
|
||||
.and_then(|message| message.get("content"))
|
||||
{
|
||||
if let Some(text) = content.as_str() {
|
||||
return Some(text.trim().to_string());
|
||||
}
|
||||
|
||||
if let Some(parts) = content.as_array() {
|
||||
let merged = parts
|
||||
.iter()
|
||||
.filter_map(|part| part.get("text").and_then(Value::as_str))
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
if !merged.trim().is_empty() {
|
||||
return Some(merged.trim().to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
None
|
||||
}
|
||||
|
||||
fn merge_text_segments(parts: Vec<String>) -> Option<String> {
|
||||
let merged = parts
|
||||
.into_iter()
|
||||
.filter_map(|part| {
|
||||
let trimmed = part.trim().to_string();
|
||||
if trimmed.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(trimmed)
|
||||
}
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
if merged.trim().is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(merged)
|
||||
}
|
||||
}
|
||||
|
||||
fn extract_response_output(value: &Value) -> Option<String> {
|
||||
if let Some(text) = value.get("output_text").and_then(Value::as_str) {
|
||||
let trimmed = text.trim();
|
||||
if !trimmed.is_empty() {
|
||||
return Some(trimmed.to_string());
|
||||
}
|
||||
}
|
||||
|
||||
let output_items = value.get("output").and_then(Value::as_array)?;
|
||||
let mut segments = Vec::new();
|
||||
|
||||
for item in output_items {
|
||||
let Some(content_items) = item.get("content").and_then(Value::as_array) else {
|
||||
continue;
|
||||
};
|
||||
|
||||
for content in content_items {
|
||||
if let Some(text) = content.get("text").and_then(Value::as_str) {
|
||||
segments.push(text.to_string());
|
||||
continue;
|
||||
}
|
||||
|
||||
if let Some(text) = content
|
||||
.get("output_text")
|
||||
.and_then(|output_text| output_text.get("text"))
|
||||
.and_then(Value::as_str)
|
||||
{
|
||||
segments.push(text.to_string());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
merge_text_segments(segments)
|
||||
}
|
||||
|
||||
fn build_chat_prompt(question: &str, matches: &[ScoredChunk]) -> String {
|
||||
let context_blocks = matches
|
||||
.iter()
|
||||
.enumerate()
|
||||
.map(|(index, item)| {
|
||||
format!(
|
||||
"[资料 {}]\n标题: {}\nSlug: {}\n相似度: {:.4}\n内容:\n{}",
|
||||
index + 1,
|
||||
item.row
|
||||
.source_title
|
||||
.as_deref()
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
.unwrap_or("未命名内容"),
|
||||
item.row.source_slug,
|
||||
item.score,
|
||||
item.row.content
|
||||
)
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n\n");
|
||||
|
||||
format!(
|
||||
"请仅根据下面提供的资料回答用户问题。\n\
|
||||
如果资料不足以支撑结论,请直接说明“我在当前博客资料里没有找到足够信息”。\n\
|
||||
回答要求:\n\
|
||||
1. 使用中文。\n\
|
||||
2. 使用 Markdown 输出,必要时用短列表或小标题,不要输出 HTML。\n\
|
||||
3. 先给直接结论,再补充关键点,整体尽量精炼。\n\
|
||||
4. 不要编造未在资料中出现的事实。\n\
|
||||
5. 如果回答引用了具体资料,可自然地提及文章标题。\n\n\
|
||||
用户问题:{question}\n\n\
|
||||
可用资料:\n{context_blocks}"
|
||||
)
|
||||
}
|
||||
|
||||
fn build_sources(matches: &[ScoredChunk]) -> Vec<AiSource> {
|
||||
matches
|
||||
.iter()
|
||||
.map(|item| AiSource {
|
||||
slug: item.row.source_slug.clone(),
|
||||
title: item
|
||||
.row
|
||||
.source_title
|
||||
.as_deref()
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
.unwrap_or("未命名内容")
|
||||
.to_string(),
|
||||
excerpt: item
|
||||
.row
|
||||
.content_preview
|
||||
.clone()
|
||||
.unwrap_or_else(|| preview_text(&item.row.content, 180).unwrap_or_default()),
|
||||
score: (item.score * 10000.0).round() / 10000.0,
|
||||
chunk_index: item.row.chunk_index,
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
}
|
||||
|
||||
pub(crate) fn build_provider_payload(request: &AiProviderRequest, stream: bool) -> Value {
|
||||
if provider_uses_responses(&request.provider) {
|
||||
json!({
|
||||
"model": request.chat_model,
|
||||
"input": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": request.system_prompt
|
||||
}
|
||||
]
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "input_text",
|
||||
"text": request.prompt
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
"reasoning": {
|
||||
"effort": DEFAULT_REASONING_EFFORT
|
||||
},
|
||||
"max_output_tokens": 520,
|
||||
"store": !DEFAULT_DISABLE_RESPONSE_STORAGE,
|
||||
"stream": stream
|
||||
})
|
||||
} else {
|
||||
json!({
|
||||
"model": request.chat_model,
|
||||
"temperature": 0.2,
|
||||
"stream": stream,
|
||||
"messages": [
|
||||
{
|
||||
"role": "system",
|
||||
"content": request.system_prompt,
|
||||
},
|
||||
{
|
||||
"role": "user",
|
||||
"content": request.prompt,
|
||||
}
|
||||
]
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn build_provider_url(request: &AiProviderRequest) -> String {
|
||||
let path = if provider_uses_responses(&request.provider) {
|
||||
"/responses"
|
||||
} else {
|
||||
"/chat/completions"
|
||||
};
|
||||
|
||||
build_endpoint(&request.api_base, path)
|
||||
}
|
||||
|
||||
pub(crate) fn extract_provider_text(value: &Value) -> Option<String> {
|
||||
extract_response_output(value).or_else(|| extract_message_content(value))
|
||||
}
|
||||
|
||||
async fn request_chat_answer(request: &AiProviderRequest) -> Result<String> {
|
||||
let client = Client::new();
|
||||
let response = request_json(
|
||||
&client,
|
||||
&build_provider_url(request),
|
||||
&request.api_key,
|
||||
build_provider_payload(request, false),
|
||||
)
|
||||
.await?;
|
||||
|
||||
extract_provider_text(&response).ok_or_else(|| {
|
||||
Error::BadRequest("AI chat response did not contain readable content".to_string())
|
||||
})
|
||||
}
|
||||
|
||||
pub(crate) async fn prepare_answer(ctx: &AppContext, question: &str) -> Result<PreparedAiAnswer> {
|
||||
let trimmed_question = question.trim();
|
||||
if trimmed_question.is_empty() {
|
||||
return Err(Error::BadRequest("问题不能为空".to_string()));
|
||||
}
|
||||
|
||||
let settings = load_runtime_settings(ctx, true).await?;
|
||||
let (matches, indexed_chunks, last_indexed_at) =
|
||||
retrieve_matches(ctx, &settings, trimmed_question).await?;
|
||||
|
||||
if matches.is_empty() {
|
||||
return Ok(PreparedAiAnswer {
|
||||
question: trimmed_question.to_string(),
|
||||
provider_request: None,
|
||||
immediate_answer: Some(
|
||||
"我在当前博客资料里没有找到足够信息。你可以换个更具体的问题,或者先去后台重建一下 AI 索引。"
|
||||
.to_string(),
|
||||
),
|
||||
sources: Vec::new(),
|
||||
indexed_chunks,
|
||||
last_indexed_at,
|
||||
});
|
||||
}
|
||||
|
||||
let sources = build_sources(&matches);
|
||||
let provider_request = match (settings.api_base.clone(), settings.api_key.clone()) {
|
||||
(Some(api_base), Some(api_key)) => Some(AiProviderRequest {
|
||||
provider: settings.provider.clone(),
|
||||
api_base,
|
||||
api_key,
|
||||
chat_model: settings.chat_model.clone(),
|
||||
system_prompt: settings.system_prompt.clone(),
|
||||
prompt: build_chat_prompt(trimmed_question, &matches),
|
||||
}),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let immediate_answer = provider_request
|
||||
.is_none()
|
||||
.then(|| retrieval_only_answer(&matches));
|
||||
|
||||
Ok(PreparedAiAnswer {
|
||||
question: trimmed_question.to_string(),
|
||||
provider_request,
|
||||
immediate_answer,
|
||||
sources,
|
||||
indexed_chunks,
|
||||
last_indexed_at,
|
||||
})
|
||||
}
|
||||
|
||||
fn retrieval_only_answer(matches: &[ScoredChunk]) -> String {
|
||||
let summary = matches
|
||||
.iter()
|
||||
.take(3)
|
||||
.map(|item| {
|
||||
let title = item
|
||||
.row
|
||||
.source_title
|
||||
.as_deref()
|
||||
.filter(|value| !value.trim().is_empty())
|
||||
.unwrap_or("未命名内容");
|
||||
let excerpt = item
|
||||
.row
|
||||
.content_preview
|
||||
.clone()
|
||||
.unwrap_or_else(|| preview_text(&item.row.content, 120).unwrap_or_default());
|
||||
|
||||
format!("《{title}》: {excerpt}")
|
||||
})
|
||||
.collect::<Vec<_>>()
|
||||
.join("\n");
|
||||
|
||||
format!(
|
||||
"本地知识检索已经完成,但后台还没有配置聊天模型 API,所以我先返回最相关的资料摘要:\n{summary}\n\n\
|
||||
如果你希望得到完整的自然语言回答,请在后台补上聊天模型的 API Base / API Key。"
|
||||
)
|
||||
}
|
||||
|
||||
async fn load_runtime_settings(
|
||||
ctx: &AppContext,
|
||||
require_enabled: bool,
|
||||
) -> Result<AiRuntimeSettings> {
|
||||
let raw = site_settings::Entity::find()
|
||||
.order_by_asc(site_settings::Column::Id)
|
||||
.one(&ctx.db)
|
||||
.await?
|
||||
.ok_or(Error::NotFound)?;
|
||||
|
||||
if require_enabled && !raw.ai_enabled.unwrap_or(false) {
|
||||
return Err(Error::NotFound);
|
||||
}
|
||||
|
||||
Ok(AiRuntimeSettings {
|
||||
provider: provider_name(raw.ai_provider.as_deref()),
|
||||
api_base: trim_to_option(raw.ai_api_base.clone()),
|
||||
api_key: trim_to_option(raw.ai_api_key.clone()),
|
||||
chat_model: trim_to_option(raw.ai_chat_model.clone())
|
||||
.unwrap_or_else(|| DEFAULT_CHAT_MODEL.to_string()),
|
||||
system_prompt: trim_to_option(raw.ai_system_prompt.clone())
|
||||
.unwrap_or_else(|| DEFAULT_SYSTEM_PROMPT.to_string()),
|
||||
top_k: raw
|
||||
.ai_top_k
|
||||
.map(|value| value.clamp(1, 12) as usize)
|
||||
.unwrap_or(DEFAULT_TOP_K),
|
||||
chunk_size: raw
|
||||
.ai_chunk_size
|
||||
.map(|value| value.clamp(400, 4000) as usize)
|
||||
.unwrap_or(DEFAULT_CHUNK_SIZE),
|
||||
raw,
|
||||
})
|
||||
}
|
||||
|
||||
async fn update_indexed_at(
|
||||
ctx: &AppContext,
|
||||
settings: &site_settings::Model,
|
||||
) -> Result<DateTime<Utc>> {
|
||||
let now = Utc::now();
|
||||
let mut model = settings.clone().into_active_model();
|
||||
model.ai_last_indexed_at = Set(Some(now.into()));
|
||||
let _ = model.update(&ctx.db).await?;
|
||||
Ok(now)
|
||||
}
|
||||
|
||||
async fn retrieve_matches(
|
||||
ctx: &AppContext,
|
||||
settings: &AiRuntimeSettings,
|
||||
question: &str,
|
||||
) -> Result<(Vec<ScoredChunk>, usize, Option<DateTime<Utc>>)> {
|
||||
let mut indexed_chunks = ai_chunks::Entity::find().count(&ctx.db).await? as usize;
|
||||
let mut last_indexed_at = settings.raw.ai_last_indexed_at.map(Into::into);
|
||||
|
||||
if indexed_chunks == 0 {
|
||||
let summary = rebuild_index(ctx).await?;
|
||||
indexed_chunks = summary.indexed_chunks;
|
||||
last_indexed_at = summary.last_indexed_at;
|
||||
}
|
||||
|
||||
if indexed_chunks == 0 {
|
||||
return Ok((Vec::new(), 0, last_indexed_at));
|
||||
}
|
||||
|
||||
let question_embedding =
|
||||
embed_texts_locally(vec![question.trim().to_string()], EmbeddingKind::Query)
|
||||
.await?
|
||||
.into_iter()
|
||||
.next()
|
||||
.unwrap_or_default();
|
||||
let query_vector = vector_literal(&question_embedding)?;
|
||||
|
||||
let statement = Statement::from_sql_and_values(
|
||||
DbBackend::Postgres,
|
||||
r#"
|
||||
SELECT
|
||||
source_slug,
|
||||
source_title,
|
||||
chunk_index,
|
||||
content,
|
||||
content_preview,
|
||||
word_count,
|
||||
(1 - (embedding <=> $1::vector))::float8 AS score
|
||||
FROM ai_chunks
|
||||
WHERE embedding IS NOT NULL
|
||||
ORDER BY embedding <=> $1::vector
|
||||
LIMIT $2
|
||||
"#,
|
||||
[query_vector.into(), (settings.top_k as i64).into()],
|
||||
);
|
||||
|
||||
let matches = SimilarChunkRow::find_by_statement(statement)
|
||||
.all(&ctx.db)
|
||||
.await?
|
||||
.into_iter()
|
||||
.map(|row| ScoredChunk {
|
||||
score: row.score,
|
||||
row: ai_chunks::Model {
|
||||
created_at: Utc::now().into(),
|
||||
updated_at: Utc::now().into(),
|
||||
id: 0,
|
||||
source_slug: row.source_slug,
|
||||
source_title: row.source_title,
|
||||
source_path: None,
|
||||
source_type: "post".to_string(),
|
||||
chunk_index: row.chunk_index,
|
||||
content: row.content,
|
||||
content_preview: row.content_preview,
|
||||
embedding: None,
|
||||
word_count: row.word_count,
|
||||
},
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
Ok((matches, indexed_chunks, last_indexed_at))
|
||||
}
|
||||
|
||||
pub async fn rebuild_index(ctx: &AppContext) -> Result<AiIndexSummary> {
|
||||
let settings = load_runtime_settings(ctx, false).await?;
|
||||
let posts = content::sync_markdown_posts(ctx).await?;
|
||||
let chunk_drafts = build_chunks(&posts, settings.chunk_size);
|
||||
let embeddings = if chunk_drafts.is_empty() {
|
||||
Vec::new()
|
||||
} else {
|
||||
embed_texts_locally(
|
||||
chunk_drafts
|
||||
.iter()
|
||||
.map(|chunk| chunk.content.clone())
|
||||
.collect::<Vec<_>>(),
|
||||
EmbeddingKind::Passage,
|
||||
)
|
||||
.await?
|
||||
};
|
||||
|
||||
ctx.db
|
||||
.execute(Statement::from_string(
|
||||
DbBackend::Postgres,
|
||||
"TRUNCATE TABLE ai_chunks RESTART IDENTITY".to_string(),
|
||||
))
|
||||
.await?;
|
||||
|
||||
for (draft, embedding) in chunk_drafts.iter().zip(embeddings.into_iter()) {
|
||||
let embedding_literal = vector_literal(&embedding)?;
|
||||
let statement = Statement::from_sql_and_values(
|
||||
DbBackend::Postgres,
|
||||
r#"
|
||||
INSERT INTO ai_chunks (
|
||||
source_slug,
|
||||
source_title,
|
||||
source_path,
|
||||
source_type,
|
||||
chunk_index,
|
||||
content,
|
||||
content_preview,
|
||||
embedding,
|
||||
word_count
|
||||
) VALUES (
|
||||
$1, $2, $3, $4, $5, $6, $7, $8::vector, $9
|
||||
)
|
||||
"#,
|
||||
vec![
|
||||
draft.source_slug.clone().into(),
|
||||
draft.source_title.clone().into(),
|
||||
draft.source_path.clone().into(),
|
||||
draft.source_type.clone().into(),
|
||||
draft.chunk_index.into(),
|
||||
draft.content.clone().into(),
|
||||
draft.content_preview.clone().into(),
|
||||
embedding_literal.into(),
|
||||
draft.word_count.into(),
|
||||
],
|
||||
);
|
||||
ctx.db.execute(statement).await?;
|
||||
}
|
||||
|
||||
let last_indexed_at = update_indexed_at(ctx, &settings.raw).await?;
|
||||
|
||||
Ok(AiIndexSummary {
|
||||
indexed_chunks: chunk_drafts.len(),
|
||||
last_indexed_at: Some(last_indexed_at),
|
||||
})
|
||||
}
|
||||
|
||||
pub async fn answer_question(ctx: &AppContext, question: &str) -> Result<AiAnswer> {
|
||||
let prepared = prepare_answer(ctx, question).await?;
|
||||
let answer = if let Some(immediate_answer) = prepared.immediate_answer.clone() {
|
||||
immediate_answer
|
||||
} else {
|
||||
let request = prepared.provider_request.as_ref().ok_or_else(|| {
|
||||
Error::BadRequest("AI provider request was not prepared".to_string())
|
||||
})?;
|
||||
request_chat_answer(request).await?
|
||||
};
|
||||
|
||||
Ok(AiAnswer {
|
||||
answer,
|
||||
sources: prepared.sources,
|
||||
indexed_chunks: prepared.indexed_chunks,
|
||||
last_indexed_at: prepared.last_indexed_at,
|
||||
})
|
||||
}
|
||||
|
||||
pub fn provider_name(value: Option<&str>) -> String {
|
||||
trim_to_option(value.map(ToString::to_string))
|
||||
.unwrap_or_else(|| DEFAULT_AI_PROVIDER.to_string())
|
||||
}
|
||||
|
||||
pub fn default_api_base() -> &'static str {
|
||||
DEFAULT_AI_API_BASE
|
||||
}
|
||||
|
||||
pub fn default_api_key() -> &'static str {
|
||||
DEFAULT_AI_API_KEY
|
||||
}
|
||||
|
||||
pub fn default_chat_model() -> &'static str {
|
||||
DEFAULT_CHAT_MODEL
|
||||
}
|
||||
|
||||
pub fn local_embedding_label() -> &'static str {
|
||||
LOCAL_EMBEDDING_MODEL_LABEL
|
||||
}
|
||||
@@ -1,13 +1,14 @@
|
||||
use loco_rs::prelude::*;
|
||||
use sea_orm::{
|
||||
ActiveModelTrait, ColumnTrait, EntityTrait, IntoActiveModel, QueryFilter, QueryOrder, Set,
|
||||
ActiveModelTrait, ColumnTrait, Condition, EntityTrait, IntoActiveModel, QueryFilter,
|
||||
QueryOrder, Set,
|
||||
};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use serde_json::Value;
|
||||
use std::fs;
|
||||
use std::path::{Path, PathBuf};
|
||||
|
||||
use crate::models::_entities::{categories, posts, tags};
|
||||
use crate::models::_entities::{categories, comments, posts, tags};
|
||||
|
||||
pub const MARKDOWN_POSTS_DIR: &str = "content/posts";
|
||||
const FIXTURE_POSTS_FILE: &str = "src/fixtures/posts.yaml";
|
||||
@@ -120,6 +121,19 @@ fn slugify(value: &str) -> String {
|
||||
slug.trim_matches('-').to_string()
|
||||
}
|
||||
|
||||
fn normalized_match_key(value: &str) -> String {
|
||||
value.trim().to_lowercase()
|
||||
}
|
||||
|
||||
fn same_text(left: &str, right: &str) -> bool {
|
||||
normalized_match_key(left) == normalized_match_key(right)
|
||||
}
|
||||
|
||||
fn text_matches_any(value: &str, keys: &[String]) -> bool {
|
||||
let current = normalized_match_key(value);
|
||||
!current.is_empty() && keys.iter().any(|key| current == *key)
|
||||
}
|
||||
|
||||
fn excerpt_from_content(content: &str) -> Option<String> {
|
||||
let mut in_code_block = false;
|
||||
|
||||
@@ -135,7 +149,11 @@ fn excerpt_from_content(content: &str) -> Option<String> {
|
||||
}
|
||||
|
||||
let excerpt = trimmed.chars().take(180).collect::<String>();
|
||||
return if excerpt.is_empty() { None } else { Some(excerpt) };
|
||||
return if excerpt.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(excerpt)
|
||||
};
|
||||
}
|
||||
|
||||
None
|
||||
@@ -188,7 +206,8 @@ fn parse_markdown_source(file_stem: &str, raw: &str, file_path: &str) -> Result<
|
||||
let title = trim_to_option(frontmatter.title.clone())
|
||||
.or_else(|| title_from_content(&content))
|
||||
.unwrap_or_else(|| slug.clone());
|
||||
let description = trim_to_option(frontmatter.description.clone()).or_else(|| excerpt_from_content(&content));
|
||||
let description =
|
||||
trim_to_option(frontmatter.description.clone()).or_else(|| excerpt_from_content(&content));
|
||||
let category = trim_to_option(frontmatter.category.clone());
|
||||
let tags = frontmatter
|
||||
.tags
|
||||
@@ -205,7 +224,8 @@ fn parse_markdown_source(file_stem: &str, raw: &str, file_path: &str) -> Result<
|
||||
content: content.trim_start_matches('\n').to_string(),
|
||||
category,
|
||||
tags,
|
||||
post_type: trim_to_option(frontmatter.post_type.clone()).unwrap_or_else(|| "article".to_string()),
|
||||
post_type: trim_to_option(frontmatter.post_type.clone())
|
||||
.unwrap_or_else(|| "article".to_string()),
|
||||
image: trim_to_option(frontmatter.image.clone()),
|
||||
pinned: frontmatter.pinned.unwrap_or(false),
|
||||
published: frontmatter.published.unwrap_or(true),
|
||||
@@ -216,7 +236,12 @@ fn parse_markdown_source(file_stem: &str, raw: &str, file_path: &str) -> Result<
|
||||
fn build_markdown_document(post: &MarkdownPost) -> String {
|
||||
let mut lines = vec![
|
||||
"---".to_string(),
|
||||
format!("title: {}", serde_yaml::to_string(&post.title).unwrap_or_else(|_| format!("{:?}", post.title)).trim()),
|
||||
format!(
|
||||
"title: {}",
|
||||
serde_yaml::to_string(&post.title)
|
||||
.unwrap_or_else(|_| format!("{:?}", post.title))
|
||||
.trim()
|
||||
),
|
||||
format!("slug: {}", post.slug),
|
||||
];
|
||||
|
||||
@@ -284,10 +309,16 @@ fn ensure_markdown_posts_bootstrapped() -> Result<()> {
|
||||
image: None,
|
||||
pinned: fixture.pinned.unwrap_or(false),
|
||||
published: fixture.published.unwrap_or(true),
|
||||
file_path: markdown_post_path(&fixture.slug).to_string_lossy().to_string(),
|
||||
file_path: markdown_post_path(&fixture.slug)
|
||||
.to_string_lossy()
|
||||
.to_string(),
|
||||
};
|
||||
|
||||
fs::write(markdown_post_path(&fixture.slug), build_markdown_document(&post)).map_err(io_error)?;
|
||||
fs::write(
|
||||
markdown_post_path(&fixture.slug),
|
||||
build_markdown_document(&post),
|
||||
)
|
||||
.map_err(io_error)?;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -312,14 +343,19 @@ async fn sync_tags_from_posts(ctx: &AppContext, posts: &[MarkdownPost]) -> Resul
|
||||
for post in posts {
|
||||
for tag_name in &post.tags {
|
||||
let slug = slugify(tag_name);
|
||||
let trimmed = tag_name.trim();
|
||||
let existing = tags::Entity::find()
|
||||
.filter(tags::Column::Slug.eq(&slug))
|
||||
.filter(
|
||||
Condition::any()
|
||||
.add(tags::Column::Slug.eq(&slug))
|
||||
.add(tags::Column::Name.eq(trimmed)),
|
||||
)
|
||||
.one(&ctx.db)
|
||||
.await?;
|
||||
|
||||
if existing.is_none() {
|
||||
let item = tags::ActiveModel {
|
||||
name: Set(Some(tag_name.clone())),
|
||||
name: Set(Some(trimmed.to_string())),
|
||||
slug: Set(slug),
|
||||
..Default::default()
|
||||
};
|
||||
@@ -339,12 +375,21 @@ async fn ensure_category(ctx: &AppContext, raw_name: &str) -> Result<Option<Stri
|
||||
|
||||
let slug = slugify(name);
|
||||
let existing = categories::Entity::find()
|
||||
.filter(categories::Column::Slug.eq(&slug))
|
||||
.filter(
|
||||
Condition::any()
|
||||
.add(categories::Column::Slug.eq(&slug))
|
||||
.add(categories::Column::Name.eq(name)),
|
||||
)
|
||||
.one(&ctx.db)
|
||||
.await?;
|
||||
|
||||
if let Some(category) = existing {
|
||||
if let Some(existing_name) = category.name.as_deref().map(str::trim).filter(|value| !value.is_empty()) {
|
||||
if let Some(existing_name) = category
|
||||
.name
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
{
|
||||
return Ok(Some(existing_name.to_string()));
|
||||
}
|
||||
|
||||
@@ -381,12 +426,21 @@ async fn canonicalize_tags(ctx: &AppContext, raw_tags: &[String]) -> Result<Vec<
|
||||
}
|
||||
|
||||
let existing = tags::Entity::find()
|
||||
.filter(tags::Column::Slug.eq(&slug))
|
||||
.filter(
|
||||
Condition::any()
|
||||
.add(tags::Column::Slug.eq(&slug))
|
||||
.add(tags::Column::Name.eq(trimmed)),
|
||||
)
|
||||
.one(&ctx.db)
|
||||
.await?;
|
||||
|
||||
let canonical_name = if let Some(tag) = existing {
|
||||
if let Some(existing_name) = tag.name.as_deref().map(str::trim).filter(|value| !value.is_empty()) {
|
||||
if let Some(existing_name) = tag
|
||||
.name
|
||||
.as_deref()
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
{
|
||||
existing_name.to_string()
|
||||
} else {
|
||||
let mut tag_model = tag.into_active_model();
|
||||
@@ -415,6 +469,132 @@ async fn canonicalize_tags(ctx: &AppContext, raw_tags: &[String]) -> Result<Vec<
|
||||
Ok(canonical_tags)
|
||||
}
|
||||
|
||||
fn write_markdown_post_to_disk(post: &MarkdownPost) -> Result<()> {
|
||||
fs::write(markdown_post_path(&post.slug), build_markdown_document(post)).map_err(io_error)
|
||||
}
|
||||
|
||||
pub fn rewrite_category_references(
|
||||
current_name: Option<&str>,
|
||||
current_slug: &str,
|
||||
next_name: Option<&str>,
|
||||
) -> Result<usize> {
|
||||
ensure_markdown_posts_bootstrapped()?;
|
||||
|
||||
let mut match_keys = Vec::new();
|
||||
if let Some(name) = current_name {
|
||||
let normalized = normalized_match_key(name);
|
||||
if !normalized.is_empty() {
|
||||
match_keys.push(normalized);
|
||||
}
|
||||
}
|
||||
|
||||
let normalized_slug = normalized_match_key(current_slug);
|
||||
if !normalized_slug.is_empty() {
|
||||
match_keys.push(normalized_slug);
|
||||
}
|
||||
|
||||
if match_keys.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let next_category = next_name
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(ToString::to_string);
|
||||
let mut changed = 0_usize;
|
||||
let mut posts = load_markdown_posts_from_disk()?;
|
||||
|
||||
for post in &mut posts {
|
||||
let Some(category) = post.category.as_deref() else {
|
||||
continue;
|
||||
};
|
||||
|
||||
if !text_matches_any(category, &match_keys) {
|
||||
continue;
|
||||
}
|
||||
|
||||
match &next_category {
|
||||
Some(updated_name) if same_text(category, updated_name) => {}
|
||||
Some(updated_name) => {
|
||||
post.category = Some(updated_name.clone());
|
||||
write_markdown_post_to_disk(post)?;
|
||||
changed += 1;
|
||||
}
|
||||
None => {
|
||||
post.category = None;
|
||||
write_markdown_post_to_disk(post)?;
|
||||
changed += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(changed)
|
||||
}
|
||||
|
||||
pub fn rewrite_tag_references(
|
||||
current_name: Option<&str>,
|
||||
current_slug: &str,
|
||||
next_name: Option<&str>,
|
||||
) -> Result<usize> {
|
||||
ensure_markdown_posts_bootstrapped()?;
|
||||
|
||||
let mut match_keys = Vec::new();
|
||||
if let Some(name) = current_name {
|
||||
let normalized = normalized_match_key(name);
|
||||
if !normalized.is_empty() {
|
||||
match_keys.push(normalized);
|
||||
}
|
||||
}
|
||||
|
||||
let normalized_slug = normalized_match_key(current_slug);
|
||||
if !normalized_slug.is_empty() {
|
||||
match_keys.push(normalized_slug);
|
||||
}
|
||||
|
||||
if match_keys.is_empty() {
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let next_tag = next_name
|
||||
.map(str::trim)
|
||||
.filter(|value| !value.is_empty())
|
||||
.map(ToString::to_string);
|
||||
let mut changed = 0_usize;
|
||||
let mut posts = load_markdown_posts_from_disk()?;
|
||||
|
||||
for post in &mut posts {
|
||||
let mut updated_tags = Vec::new();
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
let mut post_changed = false;
|
||||
|
||||
for tag in &post.tags {
|
||||
if text_matches_any(tag, &match_keys) {
|
||||
post_changed = true;
|
||||
if let Some(next_tag_name) = &next_tag {
|
||||
let normalized = normalized_match_key(next_tag_name);
|
||||
if seen.insert(normalized) {
|
||||
updated_tags.push(next_tag_name.clone());
|
||||
}
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
let normalized = normalized_match_key(tag);
|
||||
if seen.insert(normalized) {
|
||||
updated_tags.push(tag.clone());
|
||||
}
|
||||
}
|
||||
|
||||
if post_changed {
|
||||
post.tags = updated_tags;
|
||||
write_markdown_post_to_disk(post)?;
|
||||
changed += 1;
|
||||
}
|
||||
}
|
||||
|
||||
Ok(changed)
|
||||
}
|
||||
|
||||
async fn dedupe_tags(ctx: &AppContext) -> Result<()> {
|
||||
let existing_tags = tags::Entity::find()
|
||||
.order_by_asc(tags::Column::Id)
|
||||
@@ -425,10 +605,7 @@ async fn dedupe_tags(ctx: &AppContext) -> Result<()> {
|
||||
|
||||
for tag in existing_tags {
|
||||
let key = if tag.slug.trim().is_empty() {
|
||||
tag.name
|
||||
.as_deref()
|
||||
.map(slugify)
|
||||
.unwrap_or_default()
|
||||
tag.name.as_deref().map(slugify).unwrap_or_default()
|
||||
} else {
|
||||
slugify(&tag.slug)
|
||||
};
|
||||
@@ -453,11 +630,7 @@ async fn dedupe_categories(ctx: &AppContext) -> Result<()> {
|
||||
|
||||
for category in existing_categories {
|
||||
let key = if category.slug.trim().is_empty() {
|
||||
category
|
||||
.name
|
||||
.as_deref()
|
||||
.map(slugify)
|
||||
.unwrap_or_default()
|
||||
category.name.as_deref().map(slugify).unwrap_or_default()
|
||||
} else {
|
||||
slugify(&category.slug)
|
||||
};
|
||||
@@ -474,6 +647,28 @@ async fn dedupe_categories(ctx: &AppContext) -> Result<()> {
|
||||
|
||||
pub async fn sync_markdown_posts(ctx: &AppContext) -> Result<Vec<MarkdownPost>> {
|
||||
let markdown_posts = load_markdown_posts_from_disk()?;
|
||||
let markdown_slugs = markdown_posts
|
||||
.iter()
|
||||
.map(|post| post.slug.clone())
|
||||
.collect::<std::collections::HashSet<_>>();
|
||||
let existing_posts = posts::Entity::find().all(&ctx.db).await?;
|
||||
|
||||
for stale_post in existing_posts
|
||||
.into_iter()
|
||||
.filter(|post| !markdown_slugs.contains(&post.slug))
|
||||
{
|
||||
let stale_slug = stale_post.slug.clone();
|
||||
let related_comments = comments::Entity::find()
|
||||
.filter(comments::Column::PostSlug.eq(&stale_slug))
|
||||
.all(&ctx.db)
|
||||
.await?;
|
||||
|
||||
for comment in related_comments {
|
||||
let _ = comment.delete(&ctx.db).await;
|
||||
}
|
||||
|
||||
let _ = stale_post.delete(&ctx.db).await;
|
||||
}
|
||||
|
||||
for post in &markdown_posts {
|
||||
let canonical_category = match post.category.as_deref() {
|
||||
@@ -545,6 +740,18 @@ pub async fn write_markdown_document(
|
||||
Ok(updated)
|
||||
}
|
||||
|
||||
pub async fn delete_markdown_post(ctx: &AppContext, slug: &str) -> Result<()> {
|
||||
ensure_markdown_posts_bootstrapped()?;
|
||||
let path = markdown_post_path(slug);
|
||||
if !path.exists() {
|
||||
return Err(Error::NotFound);
|
||||
}
|
||||
|
||||
fs::remove_file(&path).map_err(io_error)?;
|
||||
sync_markdown_posts(ctx).await?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub async fn create_markdown_post(
|
||||
ctx: &AppContext,
|
||||
draft: MarkdownPostDraft,
|
||||
@@ -594,9 +801,16 @@ pub async fn create_markdown_post(
|
||||
file_path: markdown_post_path(&slug).to_string_lossy().to_string(),
|
||||
};
|
||||
|
||||
fs::write(markdown_post_path(&slug), build_markdown_document(&post)).map_err(io_error)?;
|
||||
let path = markdown_post_path(&slug);
|
||||
if path.exists() {
|
||||
return Err(Error::BadRequest(format!(
|
||||
"markdown post already exists for slug: {slug}"
|
||||
)));
|
||||
}
|
||||
|
||||
fs::write(&path, build_markdown_document(&post)).map_err(io_error)?;
|
||||
sync_markdown_posts(ctx).await?;
|
||||
parse_markdown_post(&markdown_post_path(&slug))
|
||||
parse_markdown_post(&path)
|
||||
}
|
||||
|
||||
pub async fn import_markdown_documents(
|
||||
@@ -635,7 +849,8 @@ pub async fn import_markdown_documents(
|
||||
continue;
|
||||
}
|
||||
|
||||
fs::write(markdown_post_path(&slug), normalize_newlines(&file.content)).map_err(io_error)?;
|
||||
fs::write(markdown_post_path(&slug), normalize_newlines(&file.content))
|
||||
.map_err(io_error)?;
|
||||
imported_slugs.push(slug);
|
||||
}
|
||||
|
||||
|
||||
@@ -1 +1,2 @@
|
||||
pub mod ai;
|
||||
pub mod content;
|
||||
|
||||
Reference in New Issue
Block a user