File tree Expand file tree Collapse file tree 3 files changed +20
-5
lines changed
components/chatbot/assistantEngine Expand file tree Collapse file tree 3 files changed +20
-5
lines changed Original file line number Diff line number Diff line change @@ -43,10 +43,10 @@ NVIDIA_API_KEY=your_nvidia_api_key_here
4343NVIDIA_BASE_URL = https://integrate.api.nvidia.com/v1
4444NVIDIA_MODEL = moonshotai/kimi-k2-instruct
4545NVIDIA_MODEL_FALLBACKS =
46- NVIDIA_TIMEOUT_MS = 15000
46+ NVIDIA_TIMEOUT_MS = 7000
4747NVIDIA_TEMPERATURE = 0.6
4848NVIDIA_TOP_P = 0.9
49- NVIDIA_MAX_TOKENS = 1024
49+ NVIDIA_MAX_TOKENS = 512
5050
5151# Chat moderation and CORS controls
5252NEXT_PUBLIC_CHAT_ABUSE_THRESHOLD = 3
Original file line number Diff line number Diff line change @@ -9,10 +9,10 @@ const MODEL_FALLBACKS = (process.env.NVIDIA_MODEL_FALLBACKS || '')
99 . map ( model => model . trim ( ) )
1010 . filter ( Boolean ) ;
1111
12- const REQUEST_TIMEOUT_MS = Number ( process . env . NVIDIA_TIMEOUT_MS || 15000 ) ;
12+ const REQUEST_TIMEOUT_MS = Number ( process . env . NVIDIA_TIMEOUT_MS || 7000 ) ;
1313const TEMPERATURE = Number ( process . env . NVIDIA_TEMPERATURE || 0.6 ) ;
1414const TOP_P = Number ( process . env . NVIDIA_TOP_P || 0.9 ) ;
15- const MAX_TOKENS = Number ( process . env . NVIDIA_MAX_TOKENS || 1024 ) ;
15+ const MAX_TOKENS = Math . min ( Number ( process . env . NVIDIA_MAX_TOKENS || 512 ) , 512 ) ;
1616const ABUSE_THRESHOLD = Number ( process . env . CHAT_ABUSE_THRESHOLD || 3 ) ;
1717const ABUSE_WINDOW_MS = Number (
1818 process . env . CHAT_ABUSE_WINDOW_MS || 10 * 60 * 1000
@@ -256,8 +256,12 @@ export async function POST(req) {
256256 const modelsToTry = getModelsToTry ( ) ;
257257 let completion = null ;
258258 let lastError = null ;
259+ const requestStart = Date . now ( ) ;
259260
260261 for ( const model of modelsToTry ) {
262+ if ( Date . now ( ) - requestStart > REQUEST_TIMEOUT_MS - 500 ) {
263+ break ;
264+ }
261265 try {
262266 completion = await client . chat . completions . create ( {
263267 model,
@@ -276,6 +280,14 @@ export async function POST(req) {
276280 error ?. message || error
277281 ) ;
278282 }
283+ const isTimeoutError =
284+ error ?. name ?. toLowerCase ?. ( ) . includes ( 'timeout' ) ||
285+ String ( error ?. message || '' )
286+ . toLowerCase ( )
287+ . includes ( 'timeout' ) ;
288+ if ( isTimeoutError ) {
289+ break ;
290+ }
279291 }
280292 }
281293
Original file line number Diff line number Diff line change @@ -58,7 +58,7 @@ Current sorting context:
5858
5959 try {
6060 const controller = new AbortController ( ) ;
61- const timeoutId = setTimeout ( ( ) => controller . abort ( ) , 10000 ) ;
61+ const timeoutId = setTimeout ( ( ) => controller . abort ( ) , 8500 ) ;
6262
6363 const res = await fetch ( CHAT_API_ENDPOINT , {
6464 method : 'POST' ,
@@ -114,6 +114,9 @@ Current sorting context:
114114 if ( err . message . includes ( 'API Error: 500' ) ) {
115115 throw new Error ( 'SERVER_ERROR' , { cause : err } ) ;
116116 }
117+ if ( err . message . includes ( 'API Error: 504' ) ) {
118+ throw new Error ( 'TIMEOUT_ERROR' , { cause : err } ) ;
119+ }
117120 if ( err . message . includes ( 'API Error: 429' ) ) {
118121 throw new Error ( 'RATE_LIMIT' , { cause : err } ) ;
119122 }
You can’t perform that action at this time.
0 commit comments