Skip to content

Commit edb87cd

Browse files
committed
feat: support semantic search in AI chat and embedding ability
1 parent ff997cb commit edb87cd

22 files changed

Lines changed: 1307 additions & 24 deletions

File tree

cmd/wire_gen.go

Lines changed: 13 additions & 9 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

docs/docs.go

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11712,13 +11712,36 @@ const docTemplate = `{
1171211712
"type": "string",
1171311713
"maxLength": 256
1171411714
},
11715+
"embedding_crontab": {
11716+
"type": "string",
11717+
"maxLength": 100
11718+
},
11719+
"embedding_dimensions": {
11720+
"type": "integer"
11721+
},
11722+
"embedding_level": {
11723+
"type": "string",
11724+
"enum": [
11725+
"question",
11726+
"answer"
11727+
]
11728+
},
11729+
"embedding_model": {
11730+
"type": "string",
11731+
"maxLength": 100
11732+
},
1171511733
"model": {
1171611734
"type": "string",
1171711735
"maxLength": 100
1171811736
},
1171911737
"provider": {
1172011738
"type": "string",
1172111739
"maxLength": 50
11740+
},
11741+
"similarity_threshold": {
11742+
"type": "number",
11743+
"maximum": 1,
11744+
"minimum": 0
1172211745
}
1172311746
}
1172411747
},

docs/swagger.json

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11685,13 +11685,36 @@
1168511685
"type": "string",
1168611686
"maxLength": 256
1168711687
},
11688+
"embedding_crontab": {
11689+
"type": "string",
11690+
"maxLength": 100
11691+
},
11692+
"embedding_dimensions": {
11693+
"type": "integer"
11694+
},
11695+
"embedding_level": {
11696+
"type": "string",
11697+
"enum": [
11698+
"question",
11699+
"answer"
11700+
]
11701+
},
11702+
"embedding_model": {
11703+
"type": "string",
11704+
"maxLength": 100
11705+
},
1168811706
"model": {
1168911707
"type": "string",
1169011708
"maxLength": 100
1169111709
},
1169211710
"provider": {
1169311711
"type": "string",
1169411712
"maxLength": 50
11713+
},
11714+
"similarity_threshold": {
11715+
"type": "number",
11716+
"maximum": 1,
11717+
"minimum": 0
1169511718
}
1169611719
}
1169711720
},

docs/swagger.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2250,12 +2250,29 @@ definitions:
22502250
api_key:
22512251
maxLength: 256
22522252
type: string
2253+
embedding_crontab:
2254+
maxLength: 100
2255+
type: string
2256+
embedding_dimensions:
2257+
type: integer
2258+
embedding_level:
2259+
enum:
2260+
- question
2261+
- answer
2262+
type: string
2263+
embedding_model:
2264+
maxLength: 100
2265+
type: string
22532266
model:
22542267
maxLength: 100
22552268
type: string
22562269
provider:
22572270
maxLength: 50
22582271
type: string
2272+
similarity_threshold:
2273+
maximum: 1
2274+
minimum: 0
2275+
type: number
22592276
type: object
22602277
schema.SiteAIReq:
22612278
properties:

i18n/en_US.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2355,6 +2355,24 @@ ui:
23552355
label: Model
23562356
msg: Model is required
23572357
add_success: AI settings updated successfully.
2358+
embedding_settings: Embedding Settings
2359+
embedding_model:
2360+
label: Embedding model
2361+
text: "The model used to generate vector embeddings for semantic search (e.g. text-embedding-3-small)."
2362+
embedding_dimensions:
2363+
label: Embedding dimensions
2364+
text: "The number of dimensions for the embedding vectors (e.g. 1536 for text-embedding-3-small)."
2365+
embedding_level:
2366+
label: Embedding level
2367+
text: "Choose whether to create embeddings at the question level (question + all answers + comments) or answer level (each answer separately)."
2368+
question: Question level
2369+
answer: Answer level
2370+
embedding_crontab:
2371+
label: Embedding schedule
2372+
text: "Cron expression for periodic embedding calculation (e.g. '0 */6 * * *' for every 6 hours). Leave empty to disable automatic indexing."
2373+
similarity_threshold:
2374+
label: Similarity threshold
2375+
text: "Minimum cosine similarity score (0-1) for semantic search results. Only results with a score above this threshold will be returned. Default is 0 (no filtering)."
23582376
conversations:
23592377
topic: Topic
23602378
helpful: Helpful

i18n/zh_CN.yaml

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2319,6 +2319,24 @@ ui:
23192319
label: 模型
23202320
msg: 模型是必需的
23212321
add_success: AI 设置更新成功。
2322+
embedding_settings: Embedding 设置
2323+
embedding_model:
2324+
label: Embedding 模型
2325+
text: "用于生成语义搜索向量 Embedding 的模型(例如 text-embedding-3-small)。"
2326+
embedding_dimensions:
2327+
label: Embedding 维度
2328+
text: "Embedding 向量的维度数(例如 text-embedding-3-small 为 1536)。"
2329+
embedding_level:
2330+
label: Embedding 级别
2331+
text: "选择在问题级别(问题 + 所有回答 + 评论)还是回答级别(每个回答单独)创建 Embedding。"
2332+
question: 问题级别
2333+
answer: 回答级别
2334+
embedding_crontab:
2335+
label: Embedding 计划
2336+
text: "定期计算 Embedding 的 Cron 表达式(例如 '0 */6 * * *' 表示每 6 小时)。留空则禁用自动索引。"
2337+
similarity_threshold:
2338+
label: 相似度阈值
2339+
text: "语义搜索结果的最低余弦相似度分数(0-1)。只有分数高于此阈值的结果才会被返回。默认值为 0(不过滤)。"
23222340
conversations:
23232341
topic: 主题
23242342
helpful: 有帮助

internal/base/constant/ai_config.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ const (
3333
- get_tags: 搜索标签信息
3434
- get_tag_detail: 获取特定标签的详细信息
3535
- get_user: 搜索用户信息
36+
- semantic_search: 通过语义相似度搜索问题和答案。当用户的问题与现有内容概念相关但可能不匹配确切关键词时使用此工具。当 get_questions 关键词搜索返回较差结果时,请使用 semantic_search。
3637
3738
请根据用户的问题智能地使用这些工具来提供准确的答案。如果需要查询系统信息,请先使用相应的工具获取数据。`
3839
DefaultAIPromptConfigEnUS = `You are an intelligent assistant that can help users query information in the system. User question: %s
@@ -44,6 +45,7 @@ You can use the following tools to query system information:
4445
- get_tags: Search for tag information
4546
- get_tag_detail: Get detailed information about a specific tag
4647
- get_user: Search for user information
48+
- semantic_search: Search questions and answers by semantic meaning. Use this when the user's question relates conceptually to existing content but may not match exact keywords. When get_questions keyword search returns poor results, use semantic_search instead.
4749
4850
Please intelligently use these tools based on the user's question to provide accurate answers. If you need to query system information, please use the appropriate tools to get the data first.`
4951
)

internal/controller/ai_controller.go

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -446,6 +446,7 @@ func (c *AIController) handleAIConversation(ctx *gin.Context, w http.ResponseWri
446446
toolCalls, newMessages, finished, aiResponse := c.processAIStream(ctx, w, id, conversationCtx.Model, client, aiReq, messages)
447447
messages = newMessages
448448

449+
log.Debugf("Round %d: toolCalls=%v", round+1, toolCalls)
449450
if aiResponse != "" {
450451
conversationCtx.Messages = append(conversationCtx.Messages, &ai_conversation.ConversationMessage{
451452
Role: "assistant",
@@ -497,6 +498,10 @@ func (c *AIController) processAIStream(
497498
break
498499
}
499500

501+
if len(response.Choices) == 0 {
502+
continue
503+
}
504+
500505
choice := response.Choices[0]
501506

502507
if len(choice.Delta.ToolCalls) > 0 {
@@ -735,6 +740,8 @@ func (c *AIController) callMCPTool(ctx context.Context, toolName string, argumen
735740
result, err = c.mcpController.MCPTagDetailsHandler()(ctx, request)
736741
case "get_user":
737742
result, err = c.mcpController.MCPUserDetailsHandler()(ctx, request)
743+
case "semantic_search":
744+
result, err = c.mcpController.MCPSemanticSearchHandler()(ctx, request)
738745
default:
739746
return "", fmt.Errorf("unknown tool: %s", toolName)
740747
}

0 commit comments

Comments
 (0)