From 9924472c1aafa4442110e46429c80189c07ecf30 Mon Sep 17 00:00:00 2001 From: "bobo.yang" Date: Mon, 8 Apr 2024 22:10:56 +0800 Subject: [PATCH] stage changes --- package.json | 17 +- src/contributes/codecomplete/README.md | 3 + src/contributes/codecomplete/cache.ts | 74 +++++++++ src/contributes/codecomplete/chunkFilter.ts | 148 +++++++++++++++++ src/contributes/codecomplete/codecomplete.ts | 154 ++++++++++++++++++ src/contributes/codecomplete/debouncer.ts | 31 ++++ src/contributes/codecomplete/llm.ts | 86 ++++++++++ src/contributes/codecomplete/modelConfig.ts | 23 +++ src/contributes/codecomplete/promptCreator.ts | 69 ++++++++ src/contributes/codecomplete/recentEdits.ts | 4 + src/contributes/codecomplete/status.ts | 0 src/contributes/codecomplete/symbols.ts | 5 + src/extension.ts | 6 + 13 files changed, 619 insertions(+), 1 deletion(-) create mode 100644 src/contributes/codecomplete/README.md create mode 100644 src/contributes/codecomplete/cache.ts create mode 100644 src/contributes/codecomplete/chunkFilter.ts create mode 100644 src/contributes/codecomplete/codecomplete.ts create mode 100644 src/contributes/codecomplete/debouncer.ts create mode 100644 src/contributes/codecomplete/llm.ts create mode 100644 src/contributes/codecomplete/modelConfig.ts create mode 100644 src/contributes/codecomplete/promptCreator.ts create mode 100644 src/contributes/codecomplete/recentEdits.ts create mode 100644 src/contributes/codecomplete/status.ts create mode 100644 src/contributes/codecomplete/symbols.ts diff --git a/package.json b/package.json index e5182e6..c92b527 100644 --- a/package.json +++ b/package.json @@ -724,6 +724,16 @@ { "command": "devchat.fix_chinese", "title": "Devchat:修复此" + }, + { + "command": "DevChat.codecomplete", + "title": "Codecomplete", + "category": "DevChat" + }, + { + "command": "DevChat.codecomplete_callback", + "title": "Codecomplete Callback", + "category": "DevChat" } ], "keybindings": [ @@ -731,7 +741,12 @@ "command": "devchat.openChatPanel", "key": "ctrl+shift+/", "mac": "cmd+shift+/" - } + }, + { + "command": "DevChat.codecomplete", + "key": "ctrl+shift+'", + "mac": "cmd+shift+'" + } ], "menus": { "editor/title": [ diff --git a/src/contributes/codecomplete/README.md b/src/contributes/codecomplete/README.md new file mode 100644 index 0000000..840ec09 --- /dev/null +++ b/src/contributes/codecomplete/README.md @@ -0,0 +1,3 @@ + +status.ts: 代码补全状态表达接口。预期有三种状态:未就绪、就绪、代码补全中。 + diff --git a/src/contributes/codecomplete/cache.ts b/src/contributes/codecomplete/cache.ts new file mode 100644 index 0000000..57b81d2 --- /dev/null +++ b/src/contributes/codecomplete/cache.ts @@ -0,0 +1,74 @@ +/* + 使用内存存储最近的代码补全结果 +*/ + +type CacheItem = { + value: any; + timestamp: number; +}; + +class MemoryCacheManager { + private static maxCapacity: number = 5; + private cache: Map; + + constructor() { + this.cache = new Map(); + } + + /** + * 添加或更新缓存 + */ + set(key: string, value: any): void { + // 首先检查缓存中是否已经有了该键值对,若有,则更新;若没有,则添加 + if (this.cache.has(key)) { + this.cache.set(key, { value, timestamp: Date.now() }); + } else { + // 先确保缓存没有超出最大容量 + if (this.cache.size >= MemoryCacheManager.maxCapacity) { + this.evict(); + } + this.cache.set(key, { value, timestamp: Date.now() }); + } + } + + /** + * 获取缓存 + */ + get(key: string): any | undefined { + const item = this.cache.get(key); + if (item) { + // 更新timestamp以反映最近一次访问 + item.timestamp = Date.now(); + return item.value; + } + return undefined; + } + + /** + * 删除指定的缓存项 + */ + delete(key: string): boolean { + return this.cache.delete(key); + } + + /** + * 依据时间顺序(最久未使用)删除缓存项 + */ + private evict(): void { + let oldestKey: string | null = null; + let oldestTimestamp: number = Infinity; + + for (const [key, item] of this.cache.entries()) { + if (item.timestamp < oldestTimestamp) { + oldestTimestamp = item.timestamp; + oldestKey = key; + } + } + + if (oldestKey !== null) { + this.cache.delete(oldestKey); + } + } +} + +export default MemoryCacheManager; diff --git a/src/contributes/codecomplete/chunkFilter.ts b/src/contributes/codecomplete/chunkFilter.ts new file mode 100644 index 0000000..ff85bca --- /dev/null +++ b/src/contributes/codecomplete/chunkFilter.ts @@ -0,0 +1,148 @@ +/* + 对实时返回的chunk进行过滤,判断当前是否已经满足停止条件,避免无效代码占用补全时间 +*/ +import * as vscode from 'vscode'; + +import { logger } from '../../util/logger'; +import { CodeCompletionChunk, streamComplete } from './llm'; + + +// 代码补全返回结果定义 +export interface CodeCompleteResult { + prompt: string; + code: string; + id: string; +} + + +export class LLMStreamComplete { + private token: vscode.CancellationToken; + private curlineIndent: number = 0; + constructor(token: vscode.CancellationToken, curlineIndent: number) { + this.token = token; + this.curlineIndent = curlineIndent; + } + + async * chunkStopCanceled(chunks: AsyncIterable) { + for await (const chunk of chunks) { + if (this.token.isCancellationRequested) { + break; + } + yield chunk; + } + } + + // 过滤第一个就是换行符的chunk,避免补全时出现空行 + async * stopWhenFirstCharIsNewLine(chunks: AsyncIterable) { + let isFirst = true; + for await (const chunk of chunks) { + if (chunk.text.length === 0) { + yield chunk; + } + + if (isFirst && chunk.text[0] === "\n") { + break; + } + isFirst = false; + yield chunk; + } + } + + // 当前chunk中字符串不是以行为单位,需要重新整合为以行为单位。 + async * toLines(chunks: AsyncIterable) { + let line = ""; + let id = ""; + for await (const chunk of chunks) { + if (chunk.id) { + id = chunk.id; + } + + line += chunk.text; + while (line.indexOf("\n") !== -1) { + const index = line.indexOf("\n"); + yield { + text: line.slice(0, index + 1), + id + }; + line = line.slice(index + 1); + } + } + + if (line.length > 0) { + yield { text: line, id }; + } + } + + // async * stopAtLineEnd(chunks: AsyncIterable) { + // for await (const chunk of chunks) { + // if (chunk.text.indexOf("\n") !== -1) { + // chunk.text = chunk.text.slice(0, chunk.text.indexOf("\n")); + // yield chunk; + // break; + // } + // yield chunk; + // } + // } + + async * stopAtSameBlock(chunks: AsyncIterable) { + let index = 0; + let preIndent = -1; + let hasIndentBigger = false; + let sameIndentTimes = 0; + for await (const chunk of chunks) { + let lineIndent = chunk.text.search(/\S/); + if (index === 0) { + lineIndent = this.curlineIndent; + } + + if (index > 0 && chunk.text.trim().length > 0 && lineIndent < this.curlineIndent ) { + break; + } + if (index > 0 && preIndent === 0 && lineIndent === 0) { + break; + } + if (index > 0 && hasIndentBigger && lineIndent === this.curlineIndent && chunk.text.trim().length > 3) { + break; + } + if (index > 0 && preIndent === lineIndent) { + sameIndentTimes += 1; + } else { + sameIndentTimes = 0; + } + + if (sameIndentTimes > 1) { + break; + } + if (lineIndent > this.curlineIndent) { + hasIndentBigger = true; + } + + preIndent = lineIndent; + + index += 1; + yield chunk; + } + } + + async llmStreamComplete(prompt: string) : Promise { + // TODO + // 对LLM的异常进行捕获,避免中断代码补全 + + const chunks = streamComplete(prompt); + const chunks2 = this.chunkStopCanceled(chunks); + const chunks3 = this.toLines(chunks2); + const chunks4 = this.stopAtSameBlock(chunks3); + + let id = ""; + let completionCode = ""; + for await (const chunk of chunks4) { + completionCode += chunk.text; + if (chunk.id) { + id = chunk.id; + } + } + + logger.channel()?.info("code:", completionCode); + return { prompt, code: completionCode, id }; + } +} diff --git a/src/contributes/codecomplete/codecomplete.ts b/src/contributes/codecomplete/codecomplete.ts new file mode 100644 index 0000000..b3f0ad9 --- /dev/null +++ b/src/contributes/codecomplete/codecomplete.ts @@ -0,0 +1,154 @@ +import * as vscode from 'vscode'; + +import { logger } from '../../util/logger'; +import Debouncer from './debouncer'; +import MemoryCacheManager from './cache'; +import { createPrompt } from './promptCreator'; +import { CodeCompleteResult, LLMStreamComplete } from './chunkFilter'; +import { nvidiaStarcoderComplete } from './llm'; + + +export function registerCodeCompleteCallbackCommand(context: vscode.ExtensionContext) { + let disposable = vscode.commands.registerCommand( + "DevChat.codecomplete_callback", + async (callback: any) => { + callback(); + } + ); + + context.subscriptions.push(disposable); +} + +export class InlineCompletionProvider implements vscode.InlineCompletionItemProvider { + private debouncer: Debouncer; + private cache: MemoryCacheManager; + + constructor() { + // TODO + // Read delay time from config + this.debouncer = new Debouncer(500); + this.cache = new MemoryCacheManager(); + } + + async codeComplete(document: vscode.TextDocument, position: vscode.Position, context: vscode.InlineCompletionContext, token: vscode.CancellationToken): Promise { + // TODO + // create prompt + const fsPath = document.uri.fsPath; + const fileContent = document.getText(); + const prompt = await createPrompt(fsPath, fileContent, position.line, position.character); + + // check cache + const result = await this.cache.get(prompt); + if(result) { + return result; + } + + // TODO + // call code_completion + const lines = fileContent.split('\n'); + let curlineIndent = lines[position.line].search(/\S/); + if (curlineIndent === -1) { + curlineIndent = 0; + } + const completor = new LLMStreamComplete(token, curlineIndent); + const response = await completor.llmStreamComplete(prompt); + if (!response || response.code.length === 0) { + return undefined; + } + + if (token.isCancellationRequested) { + return undefined; + } + + // cache result + this.cache.set(prompt, response); + return response; + } + + async provideInlineCompletionItems(document: vscode.TextDocument, position: vscode.Position, context: vscode.InlineCompletionContext, token: vscode.CancellationToken): Promise { + const result = await this.debouncer.debounce(); + if(!result) { + return []; + } + + const response: CodeCompleteResult | undefined = await this.codeComplete(document, position, context, token); + if(!response) { + return []; + } + + if (token.isCancellationRequested) { + return []; + } + + // TODO + // 代码补全建议是否已经被用户看到,这个需要更加准确的方式来识别。 + logger.channel()?.info("code complete show."); + + const logRejectionTimeout: NodeJS.Timeout = setTimeout(() => { + logger.channel()?.info("code complete not accept."); + }, 10_000); + + // 代码补全回调处理 + const callback = () => { + logger.channel()?.info("accept:", response.id); + // delete cache + this.cache.delete(response.prompt); + // delete timer + clearTimeout(logRejectionTimeout); + }; + + return [ + new vscode.InlineCompletionItem( + response.code, + new vscode.Range( + position, + position + ), + { + title: "code complete accept", + command: "DevChat.codecomplete_callback", + arguments: [callback], + } + ), + ]; + + // // 等待时间(单位:毫秒),可根据需要调整 + // const delayTime = 5000; + + // // 创建一个新的Promise,用于实现等待逻辑 + // await new Promise((resolve) => { + // const timer = setTimeout(resolve, delayTime); + + // // 如果请求在等待时间结束前被取消,则清除定时器 + // token.onCancellationRequested(() => clearTimeout(timer)); + // }); + // logger.channel()?.info("----->"); + + // // 如果请求已被取消,则直接返回null + // if (token.isCancellationRequested) { + // logger.channel()?.info("request cancelled before completion"); + // return []; + // } + + // // 根据文档和位置计算补全项(这里仅作示例,实际实现可能会有所不同) + // // 获取position前文本 + // const documentText = document.getText(); + // const offsetPos = document.offsetAt(position); + + // // 获取position前文本 + // const prefix = documentText.substring(0, offsetPos); + // const suffix = documentText.substring(offsetPos); + + // const prompt = "" + prefix + "" + suffix + ""; + + // // call code_completion + // const response = await code_completion(prompt); + // if (!response) { + // logger.channel()?.info("finish provideInlineCompletionItems"); + // return []; + // } + + // logger.channel()?.info("finish provideInlineCompletionItems"); + // return [new vscode.InlineCompletionItem(response[0], new vscode.Range(position, position))]; + } +} diff --git a/src/contributes/codecomplete/debouncer.ts b/src/contributes/codecomplete/debouncer.ts new file mode 100644 index 0000000..9c38c87 --- /dev/null +++ b/src/contributes/codecomplete/debouncer.ts @@ -0,0 +1,31 @@ +export class Debouncer { + private debouncing = false; + private debounceTimeout?: NodeJS.Timeout; + private lastTimeStampt?: string; + + constructor(private debounceDelay: number) { } + + async debounce(): Promise { + const timestampt = Date.now().toString(); + this.lastTimeStampt = timestampt; + + if (this.debouncing) { + this.debounceTimeout?.refresh(); + const lastTimestampt = await new Promise((resolve) => + setTimeout(() => { + resolve(this.lastTimeStampt); + }, this.debounceDelay) + ); + return timestampt === lastTimestampt; + } else { + this.debouncing = true; + this.lastTimeStampt = timestampt; + this.debounceTimeout = setTimeout(() => { + this.debouncing = false; + }, this.debounceDelay); + return true; + } + } +} + +export default Debouncer; \ No newline at end of file diff --git a/src/contributes/codecomplete/llm.ts b/src/contributes/codecomplete/llm.ts new file mode 100644 index 0000000..a05a6e8 --- /dev/null +++ b/src/contributes/codecomplete/llm.ts @@ -0,0 +1,86 @@ + +/* + 通过LLM模型生成代码补全 +*/ +import axios from 'axios'; + +import { logger } from "../../util/logger"; +import { Chunk } from 'webpack'; + + +// 定义代码补全chunk结构内容 +export interface CodeCompletionChunk { + text: string; + id: string; +} + +export async function* streamComplete(prompt: string): AsyncGenerator { + for await (const chunk of nvidiaStarcoderComplete(prompt)) { + yield chunk; + } +} + +export async function * nvidiaStarcoderComplete(prompt: string) : AsyncGenerator { + const invokeUrl = 'https://api.nvcf.nvidia.com/v2/nvcf/pexec/functions/6acada03-fe2f-4e4d-9e0a-e711b9fd1b59'; + + const headers = { + "Authorization": "Bearer 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", + "Accept": "text/event-stream", + "Content-Type": "application/json", + }; + + const payload = { + "prompt": prompt, + "temperature": 0.2, + "top_p": 0.7, + "max_tokens": 1024, + "seed": 42, + "bad": null, + "stop": [""], + "stream": true + }; + + try { + const response = await fetch(invokeUrl, { + method: 'POST', + headers, + body: JSON.stringify(payload), + }); + + if (response.ok && response.body) { + const stream = response.body as any; + const decoder = new TextDecoder("utf-8"); + + for await (const chunk of stream) { + const chunkText = decoder.decode(chunk).trim(); + // data: {"id":"5d3376e0-2abc-4230-b796-c6fc9ae91cd4","choices":[{"index":0,"delta":"-","finish_reason":null}]}\n\n + if (!chunkText.startsWith("data:")) { + // log unexpected data + logger.channel()?.info("Unexpected data: " + chunkText); + return; + } + + const jsonData = chunkText.substring(5).trim(); + if (jsonData === "[DONE]") { + return; + } + + + try { + const data = JSON.parse(chunkText.substring(5).trim()); + yield { + text: data.choices[0].delta, + id: data.id + }; + } catch (e: any) { + logger.channel()?.info("receve:", chunkText); + logger.channel()?.error("JSON Parsing Error:", e.message); + } + } + } else { + logger.channel()?.error("Error making request:", response.statusText); + } + } catch (error: any) { + logger.channel()?.error("Error making request:", error.message); + } +} diff --git a/src/contributes/codecomplete/modelConfig.ts b/src/contributes/codecomplete/modelConfig.ts new file mode 100644 index 0000000..ce543e3 --- /dev/null +++ b/src/contributes/codecomplete/modelConfig.ts @@ -0,0 +1,23 @@ + + +export interface ModelConfigTemplate { + template: string; + stop: string[]; +} + +const stableCodeTemplate: ModelConfigTemplate = { + template: "{{{prefix}}}{{{suffix}}}", + stop: ["", "", "", "<|endoftext|>"], +}; + +const MODLE_COMPLETE_CONFIG = { + 'starcoder': stableCodeTemplate, + 'starcoder2': stableCodeTemplate, +}; + +export function getModelConfigTemplate(modelName: string): ModelConfigTemplate | undefined { + if (modelName in MODLE_COMPLETE_CONFIG) { + return MODLE_COMPLETE_CONFIG[modelName]; + } + return undefined; +} \ No newline at end of file diff --git a/src/contributes/codecomplete/promptCreator.ts b/src/contributes/codecomplete/promptCreator.ts new file mode 100644 index 0000000..fccf00a --- /dev/null +++ b/src/contributes/codecomplete/promptCreator.ts @@ -0,0 +1,69 @@ +/* + 针对代码补全功能,构建prompt + + prompt的好坏,取决于提供的上下文信息。 + 通过AST获取相对完整的信息,可能会增加提示的准确度,但也会增加代码提示的复杂度。 + */ + +import { logger } from "../../util/logger"; +import { log } from "console"; + + +const PREFIX_MAX_SIZE: number = 600; +const SUFFIX_MAX_SIZE: number = 400; + +// 尽量获取一个完整的代码片段作为代码补全的上下文 +// 解析AST是一个好的方法,但还是会有点偏重计算。先尝试通过缩进来定位合适的块。 +// 整体范围保持在30行代码以内。 +async function curfilePrompt(filePath: string, fileContent: string, line: number, column: number) { + // 以line, column为中心,向前后扩展, 按行找出符合PREFIX_MAX_SIZE, SUFFIX_MAX_SIZE长度显示的prefix, suffix + // 分割文件内容为行数组 + const lines = fileContent.split('\n'); + + // 初始化prefix和suffix内容及长度 + let prefix = ''; + let suffix = ''; + let prefixSize = 0; + let suffixSize = 0; + + // 从光标所在行开始,向上构建前缀 + for (let i = line; i >= 0; i--) { + let lineText: string = lines[i] + '\n'; + if (i === line) { + lineText = lines[i].substring(0, column); + } + + prefix = lineText + prefix; + prefixSize += lineText.length; + if (prefixSize > PREFIX_MAX_SIZE) { + break; + } + } + + // 从光标所在行下一行开始,向下构建后缀 + for (let i = line; i < lines.length; i++) { + let lineText = lines[i] + '\n'; + if (i === line) { + lineText = lines[i].substring(column, lines[i].length) + '\n'; + } + + suffix += lineText; + suffixSize += lineText.length; + if (suffixSize > PREFIX_MAX_SIZE) { + break; + } + } + + // 返回前缀和后缀 + return { + prefix, + suffix + }; +} + +export async function createPrompt(filePath: string, fileContent: string, line: number, column: number) { + const { prefix, suffix } = await curfilePrompt(filePath, fileContent, line, column); + const prompt = "" + prefix + "" + suffix + ""; + + return prompt; +} \ No newline at end of file diff --git a/src/contributes/codecomplete/recentEdits.ts b/src/contributes/codecomplete/recentEdits.ts new file mode 100644 index 0000000..0525821 --- /dev/null +++ b/src/contributes/codecomplete/recentEdits.ts @@ -0,0 +1,4 @@ +/* + 记录最近修改的内容,用于代码补全 +*/ + diff --git a/src/contributes/codecomplete/status.ts b/src/contributes/codecomplete/status.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/contributes/codecomplete/symbols.ts b/src/contributes/codecomplete/symbols.ts new file mode 100644 index 0000000..d66a430 --- /dev/null +++ b/src/contributes/codecomplete/symbols.ts @@ -0,0 +1,5 @@ +/* + 获取符号名称、符号类型(函数、变量、类等)、符号所在文件路径、符号所在行数等信息 + 猜想这些信息会有助于代码补全功能的准确率提升 +*/ + diff --git a/src/extension.ts b/src/extension.ts index ef34ae7..c974462 100644 --- a/src/extension.ts +++ b/src/extension.ts @@ -130,6 +130,12 @@ async function activate(context: vscode.ExtensionContext) { regLanguageContext(); regDevChatView(context); + + const provider = new InlineCompletionProvider(); + const selector = { pattern: "**" }; + context.subscriptions.push(vscode.languages.registerInlineCompletionItemProvider(selector, provider)); + registerCodeCompleteCallbackCommand(context); + registerOpenChatPanelCommand(context); registerAddContextCommand(context);