From fa57ebe52704f0785c51201c14a3c7fa32f8866c Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Mon, 19 Aug 2024 18:29:57 +0800 Subject: [PATCH] fix: Improve LLM stream handling in devchatComplete function - Refactor stream processing for better chunk handling - Add max_tokens parameter to API payload - Implement simulated stream receive for testing purposes --- src/contributes/codecomplete/llm.ts | 95 +++++++++++++++++------------ 1 file changed, 57 insertions(+), 38 deletions(-) diff --git a/src/contributes/codecomplete/llm.ts b/src/contributes/codecomplete/llm.ts index 2e93407..f5763de 100644 --- a/src/contributes/codecomplete/llm.ts +++ b/src/contributes/codecomplete/llm.ts @@ -184,6 +184,7 @@ export async function * ollamaDeepseekComplete(prompt: string) : AsyncGenerator< } + export async function * devchatComplete(prompt: string) : AsyncGenerator { const devchatEndpoint = DevChatConfig.getInstance().get("providers.devchat.api_base"); const llmApiBase = DevChatConfig.getInstance().get("complete_api_base"); @@ -199,24 +200,26 @@ export async function * devchatComplete(prompt: string) : AsyncGenerator", "<|EOT|>", "", "\n\n"], - temperature: 0.2 - }; + const headers = { + 'Content-Type': 'application/json' + }; + const payload = { + model: model, + prompt: prompt, + stream: true, + stop: ["<|endoftext|>", "<|EOT|>", "", "\n\n"], + temperature: 0.2, + max_tokens: 200 + }; - let idResponse = undefined; + // 内部实现的 sleep 函数 + const sleep = (ms: number) => new Promise(resolve => setTimeout(resolve, ms)); try { const response = await fetch(completionApiBase, { method: 'POST', - headers, - body: JSON.stringify(payload), + headers, + body: JSON.stringify(payload), }); if (response.ok && response.body) { @@ -227,8 +230,27 @@ export async function * devchatComplete(prompt: string) : AsyncGenerator { + for await (const chunk of stream) { + const chunkSize = chunk.length; + const numParts = Math.ceil(Math.random() * 3) + 1; // 随机将chunk分成1-4部分 + const partSize = Math.ceil(chunkSize / numParts); + + for (let i = 0; i < chunkSize; i += partSize) { + const part = chunk.slice(i, Math.min(i + partSize, chunkSize)); + logger.channel()?.debug(`Simulated receiving part ${i / partSize + 1}/${numParts} of chunk, size: ${part.length} bytes`); + yield part; + await sleep(Math.random() * 100); // 模拟网络延迟,0-100ms + } + } + } + for await (const chunk of stream) { if (!hasFirstChunk) { hasFirstChunk = true; @@ -236,33 +258,25 @@ export async function * devchatComplete(prompt: string) : AsyncGenerator