code completion for vscode 0.1
This commit is contained in:
parent
6518abb20b
commit
5f113d10a1
43276
package-lock.json
generated
43276
package-lock.json
generated
File diff suppressed because it is too large
Load Diff
23
package.json
23
package.json
@ -2,7 +2,7 @@
|
||||
"name": "devchat",
|
||||
"displayName": "DevChat",
|
||||
"description": "Write prompts, not code",
|
||||
"version": "0.1.65",
|
||||
"version": "0.1.74",
|
||||
"icon": "assets/devchat.png",
|
||||
"publisher": "merico",
|
||||
"engines": {
|
||||
@ -725,11 +725,11 @@
|
||||
"command": "devchat.fix_chinese",
|
||||
"title": "Devchat:修复此"
|
||||
},
|
||||
{
|
||||
"command": "DevChat.codecomplete",
|
||||
"title": "Codecomplete",
|
||||
"category": "DevChat"
|
||||
},
|
||||
{
|
||||
"command": "DevChat.codecomplete",
|
||||
"title": "Codecomplete",
|
||||
"category": "DevChat"
|
||||
},
|
||||
{
|
||||
"command": "DevChat.codecomplete_callback",
|
||||
"title": "Codecomplete Callback",
|
||||
@ -743,10 +743,10 @@
|
||||
"mac": "cmd+shift+/"
|
||||
},
|
||||
{
|
||||
"command": "DevChat.codecomplete",
|
||||
"key": "ctrl+shift+'",
|
||||
"mac": "cmd+shift+'"
|
||||
}
|
||||
"command": "DevChat.codecomplete",
|
||||
"key": "ctrl+shift+'",
|
||||
"mac": "cmd+shift+'"
|
||||
}
|
||||
],
|
||||
"menus": {
|
||||
"editor/title": [
|
||||
@ -924,6 +924,7 @@
|
||||
"dayjs": "^1.11.10",
|
||||
"dotenv": "^16.0.3",
|
||||
"js-yaml": "^4.1.0",
|
||||
"llama-tokenizer-js": "^1.2.1",
|
||||
"mdast": "^3.0.0",
|
||||
"mobx": "^6.12.0",
|
||||
"ncp": "^2.0.0",
|
||||
@ -934,9 +935,11 @@
|
||||
"shell-escape": "^0.2.0",
|
||||
"string-argv": "^0.3.2",
|
||||
"tree-kill": "^1.2.2",
|
||||
"tree-sitter-wasms": "^0.1.7",
|
||||
"unified": "^11.0.3",
|
||||
"unist-util-visit": "^5.0.0",
|
||||
"uuid": "^9.0.0",
|
||||
"web-tree-sitter": "^0.22.2",
|
||||
"yaml": "^2.3.2"
|
||||
}
|
||||
}
|
||||
|
137
src/contributes/codecomplete/ast/ast.ts
Normal file
137
src/contributes/codecomplete/ast/ast.ts
Normal file
@ -0,0 +1,137 @@
|
||||
import * as path from "path";
|
||||
import Parser from "web-tree-sitter";
|
||||
import { getParserForFile } from "./treeSitter";
|
||||
import MemoryCacheManager from "../cache";
|
||||
import * as crypto from 'crypto';
|
||||
import { UiUtilWrapper } from "../../../util/uiUtil";
|
||||
|
||||
|
||||
|
||||
export interface RangeInFileWithContents {
|
||||
filepath: string;
|
||||
range: {
|
||||
start: { line: number; character: number };
|
||||
end: { line: number; character: number };
|
||||
};
|
||||
contents: string;
|
||||
}
|
||||
|
||||
// cache ast results
|
||||
const astCache: MemoryCacheManager = new MemoryCacheManager(30);
|
||||
|
||||
export async function getAst(
|
||||
filepath: string,
|
||||
fileContents: string,
|
||||
cacheEnable: boolean = true
|
||||
): Promise<Parser.Tree | undefined> {
|
||||
// calulate hash for file contents, then use that hash as cache key
|
||||
const hash = crypto.createHash('sha256');
|
||||
hash.update(fileContents);
|
||||
const cacheKey = hash.digest('hex');
|
||||
|
||||
const cachedAst = astCache.get(cacheKey);
|
||||
if (cachedAst) {
|
||||
return cachedAst;
|
||||
}
|
||||
|
||||
const parser = await getParserForFile(filepath);
|
||||
if (!parser) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
const ast = parser.parse(fileContents);
|
||||
if (cacheEnable) {
|
||||
astCache.set(cacheKey, ast);
|
||||
}
|
||||
return ast;
|
||||
} catch (e) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export async function getTreePathAtCursor(
|
||||
ast: Parser.Tree,
|
||||
cursorIndex: number,
|
||||
): Promise<Parser.SyntaxNode[] | undefined> {
|
||||
const path = [ast.rootNode];
|
||||
while (path[path.length - 1].childCount > 0) {
|
||||
let foundChild = false;
|
||||
for (let child of path[path.length - 1].children) {
|
||||
if (child.startIndex <= cursorIndex && child.endIndex >= cursorIndex) {
|
||||
path.push(child);
|
||||
foundChild = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundChild) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return path;
|
||||
}
|
||||
|
||||
export async function getAstNodeByRange( ast: Parser.Tree, line: number, character: number): Promise<Parser.SyntaxNode | undefined> {
|
||||
let node = ast.rootNode;
|
||||
|
||||
if (node.childCount > 0) {
|
||||
for (let child of node.children) {
|
||||
if (child.startPosition.row <= line && child.endPosition.row >= line) {
|
||||
return child;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return undefined;
|
||||
}
|
||||
|
||||
export async function getScopeAroundRange(
|
||||
range: RangeInFileWithContents,
|
||||
): Promise<RangeInFileWithContents | undefined> {
|
||||
const ast = await getAst(range.filepath, range.contents);
|
||||
if (!ast) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const { start: s, end: e } = range.range;
|
||||
const lines = range.contents.split("\n");
|
||||
const startIndex =
|
||||
lines.slice(0, s.line).join("\n").length +
|
||||
(lines[s.line]?.slice(s.character).length ?? 0);
|
||||
const endIndex =
|
||||
lines.slice(0, e.line).join("\n").length +
|
||||
(lines[e.line]?.slice(0, e.character).length ?? 0);
|
||||
|
||||
let node = ast.rootNode;
|
||||
while (node.childCount > 0) {
|
||||
let foundChild = false;
|
||||
for (let child of node.children) {
|
||||
if (child.startIndex < startIndex && child.endIndex > endIndex) {
|
||||
node = child;
|
||||
foundChild = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (!foundChild) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return {
|
||||
contents: node.text,
|
||||
filepath: range.filepath,
|
||||
range: {
|
||||
start: {
|
||||
line: node.startPosition.row,
|
||||
character: node.startPosition.column,
|
||||
},
|
||||
end: {
|
||||
line: node.endPosition.row,
|
||||
character: node.endPosition.column,
|
||||
},
|
||||
},
|
||||
};
|
||||
}
|
68
src/contributes/codecomplete/ast/collapseBlock.ts
Normal file
68
src/contributes/codecomplete/ast/collapseBlock.ts
Normal file
@ -0,0 +1,68 @@
|
||||
/*
|
||||
针对代码补全功能,构建prompt
|
||||
|
||||
prompt的好坏,取决于提供的上下文信息。
|
||||
通过AST获取相对完整的信息,可能会增加提示的准确度,但也会增加代码提示的复杂度。
|
||||
*/
|
||||
|
||||
import { logger } from "../../../util/logger";
|
||||
import { getAst, getTreePathAtCursor, RangeInFileWithContents } from "./ast";
|
||||
import Parser from "web-tree-sitter";
|
||||
import { getCommentPrefix, getLangageFunctionConfig, LanguageFunctionsConfig } from "./language";
|
||||
import { findFunctionRanges, FunctionRange } from "./findFunctions";
|
||||
|
||||
|
||||
export async function collapseFile(
|
||||
filepath: string,
|
||||
contents: string,
|
||||
) : Promise< string > {
|
||||
const ast = await getAst(filepath, contents);
|
||||
if (!ast) {
|
||||
return "";
|
||||
}
|
||||
|
||||
const functionRanges = await findFunctionRanges(filepath, ast.rootNode);
|
||||
return await collapseAllCodeBlock(functionRanges, filepath, contents);
|
||||
}
|
||||
|
||||
export async function collapseAllCodeBlock(functions: FunctionRange[], filepath: string, contents: string) {
|
||||
const commentPrefix = await getCommentPrefix(filepath);
|
||||
const lines = contents.split("\n");
|
||||
|
||||
// visit functions in reverse order
|
||||
for (const func of functions.reverse()) {
|
||||
const funcDefine = func.define;
|
||||
const funcBody = func.body;
|
||||
|
||||
if (funcBody.start === funcBody.end) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let bodyStartLine = funcBody.start.row;
|
||||
let bodyEndLine = funcBody.end.row;
|
||||
if (funcDefine.start.row === funcBody.start.row) {
|
||||
bodyStartLine = funcBody.start.row + 1;
|
||||
bodyEndLine = funcBody.end.row - 1;
|
||||
}
|
||||
const lineBeforeBodyStart = lines[funcBody.start.row].slice(0, funcBody.start.column);
|
||||
if (lineBeforeBodyStart.trim() !== "") {
|
||||
bodyStartLine = funcBody.start.row + 1;
|
||||
bodyEndLine = funcBody.end.row - 1;
|
||||
}
|
||||
|
||||
if (bodyEndLine - bodyStartLine <= 3) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// replace lines from bodyStartLine to bodyEndLine with "..."
|
||||
// 获取bodyStartLine这一行的缩进字符,需要在"..."之前添加对应的缩进
|
||||
let indent = lines[bodyStartLine].search(/\S/);
|
||||
if (indent === -1) {
|
||||
indent = lines[bodyStartLine].length;
|
||||
}
|
||||
const indentStr = " ".repeat(indent);
|
||||
lines.splice(bodyStartLine, bodyEndLine - bodyStartLine + 1, `${indentStr}${commentPrefix}...`);
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
74
src/contributes/codecomplete/ast/findFunctions.ts
Normal file
74
src/contributes/codecomplete/ast/findFunctions.ts
Normal file
@ -0,0 +1,74 @@
|
||||
/*
|
||||
针对代码补全功能,构建prompt
|
||||
|
||||
prompt的好坏,取决于提供的上下文信息。
|
||||
通过AST获取相对完整的信息,可能会增加提示的准确度,但也会增加代码提示的复杂度。
|
||||
*/
|
||||
|
||||
import { logger } from "../../../util/logger";
|
||||
import { getAst, getTreePathAtCursor, RangeInFileWithContents } from "./ast";
|
||||
import Parser from "web-tree-sitter";
|
||||
import { getCommentPrefix, getLangageFunctionConfig, LanguageFunctionsConfig } from "./language";
|
||||
import { getLanguageForFile, getQueryFunctionsSource } from "./treeSitter";
|
||||
|
||||
|
||||
export interface FunctionRange {
|
||||
define: {
|
||||
start: { row: number, column: number },
|
||||
end: { row: number, column: number }
|
||||
},
|
||||
body: {
|
||||
start: { row: number, column: number },
|
||||
end: { row: number, column: number }
|
||||
}
|
||||
}
|
||||
|
||||
export async function findFunctionRanges(filepath: string, node: Parser.SyntaxNode): Promise<FunctionRange[]> {
|
||||
const lang = await getLanguageForFile(filepath);
|
||||
if (!lang) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const querySource = await getQueryFunctionsSource(filepath);
|
||||
if (!querySource) {
|
||||
return [];
|
||||
}
|
||||
const query = lang?.query(querySource);
|
||||
const matches = query?.matches(node);
|
||||
|
||||
return (
|
||||
matches?.flatMap((match) => {
|
||||
// find functionNode through tag name
|
||||
const functionNode = match.captures.find((capture) => capture.name === "function")?.node;
|
||||
const bodyNode = match.captures.find((capture) => capture.name === "function.body")?.node;
|
||||
// const nameNode = match.captures.find((capture) => capture.name === "function.name")?.node;
|
||||
if (!functionNode ||!bodyNode) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const results = {
|
||||
define: {
|
||||
start: {
|
||||
row: functionNode.startPosition.row,
|
||||
column: functionNode.startPosition.column,
|
||||
},
|
||||
end: {
|
||||
row: functionNode.endPosition.row,
|
||||
column: functionNode.endPosition.column,
|
||||
},
|
||||
},
|
||||
body: {
|
||||
start: {
|
||||
row: bodyNode.startPosition.row,
|
||||
column: bodyNode.startPosition.column,
|
||||
},
|
||||
end: {
|
||||
row: bodyNode.endPosition.row,
|
||||
column: bodyNode.endPosition.column,
|
||||
},
|
||||
}
|
||||
};
|
||||
return results;
|
||||
}) ?? []
|
||||
);
|
||||
}
|
160
src/contributes/codecomplete/ast/language.ts
Normal file
160
src/contributes/codecomplete/ast/language.ts
Normal file
@ -0,0 +1,160 @@
|
||||
/*
|
||||
不同源码语言有不同语法信息,例如注释标记。
|
||||
*/
|
||||
|
||||
export const supportedLanguages: { [key: string]: string } = {
|
||||
bash: "bash", // tree-sitter-bash.wasm
|
||||
sh: "bash", // tree-sitter-bash.wasm
|
||||
c: "c", // tree-sitter-c.wasm
|
||||
h: "c", // tree-sitter-c.wasm
|
||||
cs: "c_sharp", // tree-sitter-c_sharp.wasm
|
||||
cpp: "cpp", // tree-sitter-cpp.wasm
|
||||
hpp: "cpp", // tree-sitter-cpp.wasm
|
||||
cc: "cpp", // tree-sitter-cpp.wasm
|
||||
cxx: "cpp", // tree-sitter-cpp.wasm
|
||||
hxx: "cpp", // tree-sitter-cpp.wasm
|
||||
cp: "cpp", // tree-sitter-cpp.wasm
|
||||
hh: "cpp", // tree-sitter-cpp.wasm
|
||||
inc: "cpp", // tree-sitter-cpp.wasm
|
||||
css: "css", // tree-sitter-css.wasm
|
||||
elm: "elm", // tree-sitter-elm.wasm
|
||||
el: "elisp", // tree-sitter-elisp.wasm
|
||||
emacs: "elisp",// tree-sitter-elisp.wasm
|
||||
ex: "elixir", // tree-sitter-elixir.wasm
|
||||
exs: "elixir", // tree-sitter-elixir.wasm
|
||||
eex: "embedded_template", // tree-sitter-embedded_template.wasm
|
||||
heex: "embedded_template", // tree-sitter-embedded_template.wasm
|
||||
leex: "embedded_template", // tree-sitter-embedded_template.wasm
|
||||
go: "go", // tree-sitter-go.wasm
|
||||
html: "html", // tree-sitter-html.wasm
|
||||
htm: "html", // tree-sitter-html.wasm
|
||||
java: "java", // tree-sitter-java.wasm
|
||||
ts: "typescript", // tree-sitter-typescript.wasm
|
||||
mts: "typescript", // tree-sitter-typescript.wasm
|
||||
cts: "typescript", // tree-sitter-typescript.wasm
|
||||
js: "javascript", // tree-sitter-javascript.wasm
|
||||
jsx: "javascript", // tree-sitter-javascript.wasm
|
||||
mjs: "javascript", // tree-sitter-javascript.wasm
|
||||
cjs: "javascript", // tree-sitter-javascript.wasm
|
||||
json: "json", // tree-sitter-json.wasm
|
||||
kt: "kotlin", // tree-sitter-kotlin.wasm
|
||||
lua: "lua", // tree-sitter-lua.wasm
|
||||
// tree-sitter-objc.wasm
|
||||
ocaml: "ocaml", // tree-sitter-ocaml.wasm
|
||||
ml: "ocaml", // tree-sitter-ocaml.wasm
|
||||
mli: "ocaml", // tree-sitter-ocaml.wasm
|
||||
php: "php", // tree-sitter-php.wasm
|
||||
phtml: "php", // tree-sitter-php.wasm
|
||||
php3: "php", // tree-sitter-php.wasm
|
||||
php4: "php", // tree-sitter-php.wasm
|
||||
php5: "php", // tree-sitter-php.wasm
|
||||
php7: "php", // tree-sitter-php.wasm
|
||||
phps: "php", // tree-sitter-php.wasm
|
||||
"php-s": "php", // tree-sitter-php.wasm
|
||||
py: "python", // tree-sitter-python.wasm
|
||||
pyw: "python", // tree-sitter-python.wasm
|
||||
pyi: "python", // tree-sitter-python.wasm
|
||||
ql: "ql", // tree-sitter-ql.wasm
|
||||
res: "rescript", // tree-sitter-rescript.wasm
|
||||
resi: "rescript", // tree-sitter-rescript.wasm
|
||||
rb: "ruby", // tree-sitter-ruby.wasm
|
||||
erb: "ruby", // tree-sitter-ruby.wasm
|
||||
rs: "rust", // tree-sitter-rust.wasm
|
||||
scala: "scala", // tree-sitter-scala.wasm
|
||||
swift: "swift", // tree-sitter-swift.wasm
|
||||
rdl: "systemrdl", // tree-sitter-systemrdl.wasm
|
||||
toml: "toml", // tree-sitter-toml.wasm
|
||||
tsx: "tsx", // tree-sitter-tsx.wasm
|
||||
vue: "vue", // tree-sitter-vue.wasm
|
||||
};
|
||||
|
||||
|
||||
// tree-sitter tag to find funtions
|
||||
const LANG_CONFIG = {
|
||||
"cpp": {
|
||||
"commentPrefix": "//",
|
||||
"endOfLine": [";", ",", ")", "}", "]"],
|
||||
},
|
||||
"python": {
|
||||
"commentPrefix": "#",
|
||||
"endOfLine": [",", ")", "}", "]"],
|
||||
},
|
||||
"javascript": {
|
||||
"commentPrefix": "//",
|
||||
"endOfLine": [";", ",", ")", "}", "]"],
|
||||
},
|
||||
"typescript": {
|
||||
"commentPrefix": "//",
|
||||
"endOfLine": [";", ",", ")", "}", "]"],
|
||||
},
|
||||
"java": {
|
||||
"commentPrefix": "//",
|
||||
"endOfLine": [";", ",", ")", "}", "]"],
|
||||
},
|
||||
"c_sharp": {
|
||||
"commentPrefix": "//",
|
||||
"endOfLine": [";", ",", ")", "}", "]"],
|
||||
},
|
||||
"go": {
|
||||
"commentPrefix": "//",
|
||||
"endOfLine": [",", ")", "}", "]"],
|
||||
},
|
||||
"rust": {
|
||||
"commentPrefix": "//",
|
||||
"endOfLine": [";", ",", ")", "}", "]"],
|
||||
},
|
||||
"dart": {
|
||||
"commentPrefix": "//",
|
||||
"endOfLine": [";", ",", ")", "}", "]"],
|
||||
},
|
||||
};
|
||||
|
||||
|
||||
export interface LanguageFunctionsConfig {
|
||||
parent: string,
|
||||
body: string
|
||||
}
|
||||
|
||||
export async function getLanguageFullName(filepath: string): Promise<string | undefined> {
|
||||
const extension = filepath.split('.').pop() || '';
|
||||
return supportedLanguages[extension];
|
||||
}
|
||||
|
||||
export async function getLangageFunctionConfig(filepath: string): Promise<LanguageFunctionsConfig[]> {
|
||||
const extension = filepath.split('.').pop() || '';
|
||||
const extensionLang = supportedLanguages[extension];
|
||||
if (!extensionLang) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!LANG_CONFIG[extensionLang]) {
|
||||
return [];
|
||||
}
|
||||
return LANG_CONFIG[extensionLang]["functions"];
|
||||
}
|
||||
|
||||
export async function getCommentPrefix(filepath: string): Promise<string> {
|
||||
const extension = filepath.split('.').pop() || '';
|
||||
const extensionLang = supportedLanguages[extension];
|
||||
if (!extensionLang) {
|
||||
return "//";
|
||||
}
|
||||
|
||||
if (!LANG_CONFIG[extensionLang]) {
|
||||
return "//";
|
||||
}
|
||||
return LANG_CONFIG[extensionLang]["commentPrefix"];
|
||||
}
|
||||
|
||||
export async function getEndOfLine(filepath: string): Promise<string[]> {
|
||||
const extension = filepath.split('.').pop() || '';
|
||||
const extensionLang = supportedLanguages[extension];
|
||||
if (!extensionLang) {
|
||||
return [];
|
||||
}
|
||||
|
||||
if (!LANG_CONFIG[extensionLang]) {
|
||||
return [];
|
||||
}
|
||||
return LANG_CONFIG[extensionLang]["endOfLine"];
|
||||
}
|
123
src/contributes/codecomplete/ast/treeSitter.ts
Normal file
123
src/contributes/codecomplete/ast/treeSitter.ts
Normal file
@ -0,0 +1,123 @@
|
||||
/*
|
||||
This file is copied from Continut repo.
|
||||
*/
|
||||
|
||||
import { logger } from "../../../util/logger";
|
||||
import { UiUtilWrapper } from "../../../util/uiUtil";
|
||||
import * as path from "path";
|
||||
import * as fs from "fs";
|
||||
import { Language } from "web-tree-sitter";
|
||||
import Parser = require("web-tree-sitter");
|
||||
import { getLanguageFullName, supportedLanguages } from "./language";
|
||||
import MemoryCacheManager from "../cache";
|
||||
|
||||
|
||||
const parserCache: MemoryCacheManager = new MemoryCacheManager(4);
|
||||
const langCache: MemoryCacheManager = new MemoryCacheManager(4);
|
||||
|
||||
export async function getParserForFile(filepath: string) {
|
||||
if (process.env.IS_BINARY) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
try {
|
||||
const extension = filepath.split('.').pop() || '';
|
||||
const cachedParser = parserCache.get(extension);
|
||||
if (cachedParser) {
|
||||
return cachedParser;
|
||||
}
|
||||
|
||||
await Parser.init({
|
||||
locateFile(filename) {
|
||||
if (filename === 'tree-sitter.wasm') {
|
||||
// Return the path where you have placed the tree-sitter.wasm file
|
||||
const wasmPath = path.join(
|
||||
UiUtilWrapper.extensionPath(),
|
||||
"tools",
|
||||
"tree-sitter-wasms",
|
||||
`tree-sitter.wasm`,
|
||||
);
|
||||
return wasmPath;
|
||||
}
|
||||
return filename;
|
||||
}
|
||||
});
|
||||
const parser = new Parser();
|
||||
|
||||
const language = await getLanguageForFile(filepath);
|
||||
parser.setLanguage(language);
|
||||
|
||||
parserCache.set(extension, parser);
|
||||
return parser;
|
||||
} catch (e) {
|
||||
logger.channel()?.error("Unable to load language for file", filepath, e);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export async function getLanguageForFile(
|
||||
filepath: string,
|
||||
): Promise<Language | undefined> {
|
||||
try {
|
||||
await Parser.init();
|
||||
const extension = filepath.split('.').pop() || '';
|
||||
const cachedLang = langCache.get(extension);
|
||||
if (cachedLang) {
|
||||
return cachedLang;
|
||||
}
|
||||
|
||||
if (!supportedLanguages[extension]) {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
const wasmPath = path.join(
|
||||
UiUtilWrapper.extensionPath(),
|
||||
"tools",
|
||||
"tree-sitter-wasms",
|
||||
`tree-sitter-${supportedLanguages[extension]}.wasm`,
|
||||
);
|
||||
const language = await Parser.Language.load(wasmPath);
|
||||
|
||||
langCache.set(extension, language);
|
||||
return language;
|
||||
} catch (e) {
|
||||
logger.channel()?.error("Unable to load language for file:", filepath, e);
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
|
||||
export async function getQueryVariablesSource(filepath: string) {
|
||||
const fullLangName = await getLanguageFullName(filepath);
|
||||
if (!fullLangName) {
|
||||
return "";
|
||||
}
|
||||
const sourcePath = path.join(
|
||||
UiUtilWrapper.extensionPath(),
|
||||
"tools",
|
||||
"tree-sitter-queries",
|
||||
fullLangName,
|
||||
"variables.scm",
|
||||
);
|
||||
if (!fs.existsSync(sourcePath)) {
|
||||
return "";
|
||||
}
|
||||
return fs.readFileSync(sourcePath).toString();
|
||||
}
|
||||
|
||||
export async function getQueryFunctionsSource(filepath: string) {
|
||||
const fullLangName = await getLanguageFullName(filepath);
|
||||
if (!fullLangName) {
|
||||
return "";
|
||||
}
|
||||
const sourcePath = path.join(
|
||||
UiUtilWrapper.extensionPath(),
|
||||
"tools",
|
||||
"tree-sitter-queries",
|
||||
fullLangName,
|
||||
"functions.scm",
|
||||
);
|
||||
if (!fs.existsSync(sourcePath)) {
|
||||
return "";
|
||||
}
|
||||
return fs.readFileSync(sourcePath).toString();
|
||||
}
|
36
src/contributes/codecomplete/astTest.ts
Normal file
36
src/contributes/codecomplete/astTest.ts
Normal file
@ -0,0 +1,36 @@
|
||||
import * as path from 'path';
|
||||
import * as fs from 'fs';
|
||||
import { getAst, getTreePathAtCursor, RangeInFileWithContents } from "./ast/ast";
|
||||
import Parser from "web-tree-sitter";
|
||||
import { logger } from "../../util/logger";
|
||||
import { getCommentPrefix, getLangageFunctionConfig, getLanguageFullName, LanguageFunctionsConfig } from "./ast/language";
|
||||
import { getLanguageForFile, getQueryVariablesSource } from './ast/treeSitter';
|
||||
|
||||
|
||||
function printTree(node: Parser.SyntaxNode, indent: number = 0) {
|
||||
let treeText = `${' '.repeat(indent)}Node type: ${node.type}, Position: ${node.startPosition.row}:${node.startPosition.column} - ${node.endPosition.row}:${node.endPosition.column}\n`;
|
||||
|
||||
// 遍历子节点
|
||||
for (let i = 0; i < node.namedChildCount; i++) {
|
||||
const child = node.namedChild(i);
|
||||
treeText += printTree(child!, indent + 2); // 增加缩进
|
||||
}
|
||||
return treeText;
|
||||
}
|
||||
|
||||
export async function outputAst(
|
||||
filepath: string,
|
||||
contents: string,
|
||||
cursorIndex: number
|
||||
) {
|
||||
const ast = await getAst(filepath, contents);
|
||||
if (!ast) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// output ast
|
||||
const treeText = "\n" + printTree(ast.rootNode, 0);
|
||||
if (process.env.COMPLETE_DEBUG) {
|
||||
logger.channel()?.info(treeText);
|
||||
}
|
||||
}
|
@ -8,10 +8,11 @@ type CacheItem = {
|
||||
};
|
||||
|
||||
class MemoryCacheManager {
|
||||
private static maxCapacity: number = 5;
|
||||
private maxCapacity: number = 5;
|
||||
private cache: Map<string, CacheItem>;
|
||||
|
||||
constructor() {
|
||||
constructor(maxCapacity: number = 5) {
|
||||
this.maxCapacity = maxCapacity;
|
||||
this.cache = new Map<string, CacheItem>();
|
||||
}
|
||||
|
||||
@ -24,7 +25,7 @@ class MemoryCacheManager {
|
||||
this.cache.set(key, { value, timestamp: Date.now() });
|
||||
} else {
|
||||
// 先确保缓存没有超出最大容量
|
||||
if (this.cache.size >= MemoryCacheManager.maxCapacity) {
|
||||
if (this.cache.size >= this.maxCapacity) {
|
||||
this.evict();
|
||||
}
|
||||
this.cache.set(key, { value, timestamp: Date.now() });
|
||||
|
@ -18,9 +18,15 @@ export interface CodeCompleteResult {
|
||||
export class LLMStreamComplete {
|
||||
private token: vscode.CancellationToken;
|
||||
private curlineIndent: number = 0;
|
||||
constructor(token: vscode.CancellationToken, curlineIndent: number) {
|
||||
private nextLine: string = "";
|
||||
private curLine: string = "";
|
||||
private curColumn: number = 0;
|
||||
constructor(token: vscode.CancellationToken, curlineIndent: number, nextLine: string, curLine: string, curColumn: number) {
|
||||
this.token = token;
|
||||
this.curlineIndent = curlineIndent;
|
||||
this.nextLine = nextLine;
|
||||
this.curLine = curLine;
|
||||
this.curColumn = curColumn;
|
||||
}
|
||||
|
||||
async * chunkStopCanceled(chunks: AsyncIterable<CodeCompletionChunk>) {
|
||||
@ -84,6 +90,61 @@ export class LLMStreamComplete {
|
||||
// }
|
||||
// }
|
||||
|
||||
async * stopAtFirstBrace(chunks: AsyncIterable<CodeCompletionChunk>) {
|
||||
let firstChunk = true;
|
||||
for await (const chunk of chunks) {
|
||||
if (firstChunk) {
|
||||
if (["}", "]", ")"].includes(chunk.text.trim())) {
|
||||
break;
|
||||
}
|
||||
if (chunk.text.trim().length > 0) {
|
||||
firstChunk = false;
|
||||
}
|
||||
}
|
||||
|
||||
yield chunk;
|
||||
}
|
||||
}
|
||||
|
||||
async * stopWhenSameWithNext(chunks: AsyncIterable<CodeCompletionChunk>) {
|
||||
let firstChunk: boolean = true;
|
||||
for await (const chunk of chunks) {
|
||||
if (firstChunk) {
|
||||
const curlineText = this.curLine + chunk.text;
|
||||
if (curlineText.trim() === this.nextLine.trim() && this.nextLine.trim().length > 5) {
|
||||
break;
|
||||
}
|
||||
firstChunk = false;
|
||||
}
|
||||
|
||||
if (chunk.text.trim() === this.nextLine.trim() && this.nextLine.trim().length > 5) {
|
||||
break;
|
||||
}
|
||||
|
||||
yield chunk;
|
||||
}
|
||||
}
|
||||
|
||||
async * stopFirstLineWhenInMiddleLine(chunks: AsyncIterable<CodeCompletionChunk>) {
|
||||
let inMiddleLine = false;
|
||||
const prefixLine = this.curLine.slice(0, this.curColumn).trim();
|
||||
if (prefixLine.length > 0) {
|
||||
inMiddleLine = true;
|
||||
}
|
||||
|
||||
let firstChunk = true;
|
||||
for await (const chunk of chunks) {
|
||||
yield chunk;
|
||||
|
||||
if (firstChunk) {
|
||||
firstChunk = false;
|
||||
if (inMiddleLine) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
async * stopAtSameBlock(chunks: AsyncIterable<CodeCompletionChunk>) {
|
||||
let index = 0;
|
||||
let preIndent = -1;
|
||||
@ -91,6 +152,9 @@ export class LLMStreamComplete {
|
||||
let sameIndentTimes = 0;
|
||||
for await (const chunk of chunks) {
|
||||
let lineIndent = chunk.text.search(/\S/);
|
||||
if (lineIndent === -1) {
|
||||
lineIndent = this.curlineIndent;
|
||||
}
|
||||
if (index === 0) {
|
||||
lineIndent = this.curlineIndent;
|
||||
}
|
||||
@ -124,6 +188,44 @@ export class LLMStreamComplete {
|
||||
}
|
||||
}
|
||||
|
||||
async removeEmptyEndlines(lines: string[]): Promise< string[] > {
|
||||
// remove empty lines at the end
|
||||
while (lines.length > 0 && lines[lines.length - 1].trim() === "") {
|
||||
lines.pop();
|
||||
}
|
||||
if (lines.length > 0 && lines[lines.length - 1].endsWith("\n")) {
|
||||
lines[lines.length - 1] = lines[lines.length - 1].slice(0, -1);
|
||||
}
|
||||
|
||||
return lines;
|
||||
}
|
||||
|
||||
// async removeRepeatEndBrace(lines: string[]): Promise< string[] > {
|
||||
// let allIsBrace = true;
|
||||
// for (let i=1; i<lines.length; i++) {
|
||||
// if (lines[i].trim() !== lines[0].trim()) {
|
||||
// allIsBrace = false;
|
||||
// break;
|
||||
// }
|
||||
// if (lines[i].trim().length > 1){
|
||||
// allIsBrace = false;
|
||||
// break;
|
||||
// }
|
||||
// }
|
||||
// if (allIsBrace) {
|
||||
// lines = [lines[0]];
|
||||
// }
|
||||
|
||||
// if (lines.length === 1) {
|
||||
// const trim1 = lines[0].trim();
|
||||
// const trim2 = this.nextLine.trim();
|
||||
// if (trim1 === trim2 && trim1.length === 1) {
|
||||
// return [];
|
||||
// }
|
||||
// }
|
||||
// return lines;
|
||||
// }
|
||||
|
||||
async llmStreamComplete(prompt: string) : Promise<CodeCompleteResult | undefined> {
|
||||
// TODO
|
||||
// 对LLM的异常进行捕获,避免中断代码补全
|
||||
@ -131,17 +233,24 @@ export class LLMStreamComplete {
|
||||
const chunks = streamComplete(prompt);
|
||||
const chunks2 = this.chunkStopCanceled(chunks);
|
||||
const chunks3 = this.toLines(chunks2);
|
||||
const chunks4 = this.stopAtSameBlock(chunks3);
|
||||
const chunks4 = this.stopAtFirstBrace(chunks3);
|
||||
const chunks5 = this.stopFirstLineWhenInMiddleLine(chunks4);
|
||||
const chunks6 = this.stopWhenSameWithNext(chunks5);
|
||||
const chunks7 = this.stopAtSameBlock(chunks6);
|
||||
|
||||
let id = "";
|
||||
let completionCode = "";
|
||||
for await (const chunk of chunks4) {
|
||||
completionCode += chunk.text;
|
||||
let lines: string[] = [];
|
||||
for await (const chunk of chunks7) {
|
||||
lines.push(chunk.text);
|
||||
if (chunk.id) {
|
||||
id = chunk.id;
|
||||
}
|
||||
}
|
||||
|
||||
const line2 = await this.removeEmptyEndlines(lines);
|
||||
|
||||
const completionCode = line2.join("");
|
||||
|
||||
logger.channel()?.info("code:", completionCode);
|
||||
return { prompt, code: completionCode, id };
|
||||
}
|
||||
|
@ -5,8 +5,10 @@ import Debouncer from './debouncer';
|
||||
import MemoryCacheManager from './cache';
|
||||
import { createPrompt } from './promptCreator';
|
||||
import { CodeCompleteResult, LLMStreamComplete } from './chunkFilter';
|
||||
import { nvidiaStarcoderComplete } from './llm';
|
||||
import { DevChatConfig } from '../../util/config';
|
||||
import { outputAst } from './astTest';
|
||||
import { getEndOfLine } from './ast/language';
|
||||
import { RecentEditsManager } from './recentEdits';
|
||||
|
||||
|
||||
export function registerCodeCompleteCallbackCommand(context: vscode.ExtensionContext) {
|
||||
@ -31,12 +33,17 @@ export class InlineCompletionProvider implements vscode.InlineCompletionItemProv
|
||||
private debouncer: Debouncer;
|
||||
private cache: MemoryCacheManager;
|
||||
private devchatConfig: DevChatConfig;
|
||||
private lastComplete: string;
|
||||
private recentEditors: RecentEditsManager;
|
||||
|
||||
constructor() {
|
||||
// TODO
|
||||
// Read delay time from config
|
||||
this.debouncer = new Debouncer(500);
|
||||
this.cache = new MemoryCacheManager();
|
||||
this.devchatConfig = new DevChatConfig();
|
||||
this.lastComplete = "";
|
||||
this.recentEditors = new RecentEditsManager();
|
||||
}
|
||||
|
||||
async logEventToServer(event: LogEventRequest) {
|
||||
@ -55,7 +62,7 @@ export class InlineCompletionProvider implements vscode.InlineCompletionItemProv
|
||||
try {
|
||||
const response = await fetch(apiUrl, requestOptions);
|
||||
if (!response.ok) {
|
||||
if (this.devchatConfig.get("complete_debug")) {
|
||||
if (process.env.COMPLETE_DEBUG) {
|
||||
logger.channel()?.info("log event to server failed:", response.status);
|
||||
}
|
||||
}
|
||||
@ -64,19 +71,60 @@ export class InlineCompletionProvider implements vscode.InlineCompletionItemProv
|
||||
}
|
||||
}
|
||||
|
||||
// check whether need to send code complete event
|
||||
// async shouldSendCodeCompleteEvent(document: vscode.TextDocument, position: vscode.Position): Promise< boolean > {
|
||||
// // if complete_enable is false, then don't send code complete
|
||||
// if (!this.devchatConfig.get("complete_enable")) {
|
||||
// return false;
|
||||
// }
|
||||
|
||||
// // if A|B, then don't send code complete
|
||||
// const preChar = document.getText(new vscode.Range(position.line, position.character - 1, position.line, position.character));
|
||||
// const postChar = document.getText(new vscode.Range(position.line, position.character, position.line, position.character + 1));
|
||||
// if (preChar !== ' ' && postChar !== ' ') {
|
||||
// return false;
|
||||
// }
|
||||
|
||||
// const fsPath = document.uri.fsPath;
|
||||
// const fileContent = document.getText();
|
||||
// const lines = fileContent.split('\n');
|
||||
|
||||
// // don't complete while stmt is end
|
||||
// const langEndofLine: string[] = await getEndOfLine(fsPath);
|
||||
// for (const endOfLine of langEndofLine) {
|
||||
// if (lines[position.line].endsWith(endOfLine) && position.character >= lines[position.line].length) {
|
||||
// return false;
|
||||
// }
|
||||
// }
|
||||
|
||||
// return true;
|
||||
// }
|
||||
|
||||
async codeComplete(document: vscode.TextDocument, position: vscode.Position, context: vscode.InlineCompletionContext, token: vscode.CancellationToken): Promise<CodeCompleteResult | undefined> {
|
||||
// TODO
|
||||
// create prompt
|
||||
const fsPath = document.uri.fsPath;
|
||||
const fileContent = document.getText();
|
||||
const prompt = await createPrompt(fsPath, fileContent, position.line, position.character);
|
||||
if (this.devchatConfig.get("complete_prompt_debug")) {
|
||||
const posOffset = document.offsetAt(position);
|
||||
|
||||
if (process.env.COMPLETE_DEBUG) {
|
||||
logger.channel()?.info(`cur position: ${position.line}: ${position.character}`);
|
||||
}
|
||||
|
||||
const prompt = await createPrompt(fsPath, fileContent, position.line, position.character, posOffset, this.recentEditors.getEdits());
|
||||
if (!prompt) {
|
||||
return undefined;
|
||||
}
|
||||
if (process.env.COMPLETE_DEBUG) {
|
||||
logger.channel()?.info("prompt:", prompt);
|
||||
}
|
||||
|
||||
// check cache
|
||||
const result = await this.cache.get(prompt);
|
||||
if (result) {
|
||||
if (process.env.COMPLETE_DEBUG) {
|
||||
logger.channel()?.info(`cache hited:\n${result.code}`);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
@ -85,9 +133,32 @@ export class InlineCompletionProvider implements vscode.InlineCompletionItemProv
|
||||
const lines = fileContent.split('\n');
|
||||
let curlineIndent = lines[position.line].search(/\S/);
|
||||
if (curlineIndent === -1) {
|
||||
curlineIndent = 0;
|
||||
curlineIndent = lines[position.line].length;
|
||||
}
|
||||
const completor = new LLMStreamComplete(token, curlineIndent);
|
||||
|
||||
const langEndofLine: string[] = await getEndOfLine(fsPath);
|
||||
for (const endOfLine of langEndofLine) {
|
||||
if (lines[position.line].endsWith(endOfLine) && position.character >= lines[position.line].length) {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
if (this.lastComplete.endsWith(lines[position.line]) && this.lastComplete !== "" && lines[position.line].trim() !== "") {
|
||||
return undefined;
|
||||
}
|
||||
|
||||
let nextLine = lines[position.line].slice(position.character);
|
||||
if (nextLine.trim().length === 0) {
|
||||
for (let i = position.line + 1; i < lines.length; i++) {
|
||||
if (lines[i].trim().length > 0) {
|
||||
nextLine = lines[i];
|
||||
break;
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
const curLine = lines[position.line];
|
||||
const curColumn = position.character;
|
||||
const completor = new LLMStreamComplete(token, curlineIndent, nextLine, curLine, curColumn);
|
||||
const response = await completor.llmStreamComplete(prompt);
|
||||
if (!response || response.code.length === 0) {
|
||||
return undefined;
|
||||
@ -107,6 +178,19 @@ export class InlineCompletionProvider implements vscode.InlineCompletionItemProv
|
||||
if (!result) {
|
||||
return [];
|
||||
}
|
||||
if (context.selectedCompletionInfo) {
|
||||
return [];
|
||||
}
|
||||
if (this.devchatConfig.get("complete_enable") !== true) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// const filepath = document.uri.fsPath;
|
||||
// const fileContent = document.getText();
|
||||
// const posOffset = document.offsetAt(position);
|
||||
// await outputAst(filepath, fileContent, posOffset);
|
||||
// await testTreesitterQuery(filepath, fileContent);
|
||||
// return [];
|
||||
|
||||
const response: CodeCompleteResult | undefined = await this.codeComplete(document, position, context, token);
|
||||
if (!response) {
|
||||
@ -119,7 +203,7 @@ export class InlineCompletionProvider implements vscode.InlineCompletionItemProv
|
||||
|
||||
// TODO
|
||||
// 代码补全建议是否已经被用户看到,这个需要更加准确的方式来识别。
|
||||
if (this.devchatConfig.get("complete_debug")) {
|
||||
if (process.env.COMPLETE_DEBUG) {
|
||||
logger.channel()?.info("code complete show.");
|
||||
}
|
||||
this.logEventToServer(
|
||||
@ -132,14 +216,14 @@ export class InlineCompletionProvider implements vscode.InlineCompletionItemProv
|
||||
// log to server
|
||||
|
||||
const logRejectionTimeout: NodeJS.Timeout = setTimeout(() => {
|
||||
if (this.devchatConfig.get("complete_debug")) {
|
||||
if (process.env.COMPLETE_DEBUG) {
|
||||
logger.channel()?.info("code complete not accept.");
|
||||
}
|
||||
}, 10_000);
|
||||
|
||||
// 代码补全回调处理
|
||||
const callback = () => {
|
||||
if (this.devchatConfig.get("complete_debug")) {
|
||||
if (process.env.COMPLETE_DEBUG) {
|
||||
logger.channel()?.info("accept:", response.id);
|
||||
}
|
||||
// delete cache
|
||||
@ -156,12 +240,13 @@ export class InlineCompletionProvider implements vscode.InlineCompletionItemProv
|
||||
});
|
||||
};
|
||||
|
||||
this.lastComplete = response.code;
|
||||
return [
|
||||
new vscode.InlineCompletionItem(
|
||||
response.code,
|
||||
new vscode.Range(
|
||||
position,
|
||||
position
|
||||
position.translate(0, response.code.length)
|
||||
),
|
||||
{
|
||||
title: "code complete accept",
|
||||
|
@ -41,7 +41,7 @@ export async function * nvidiaStarcoderComplete(prompt: string) : AsyncGenerator
|
||||
"max_tokens": 1024,
|
||||
"seed": 42,
|
||||
"bad": null,
|
||||
"stop": ["<file_sep>"],
|
||||
"stop": ["<file_sep>", "```", "\n\n"],
|
||||
"stream": true
|
||||
};
|
||||
|
||||
@ -89,3 +89,57 @@ export async function * nvidiaStarcoderComplete(prompt: string) : AsyncGenerator
|
||||
logger.channel()?.error("Error making request:", error.message);
|
||||
}
|
||||
}
|
||||
|
||||
export async function * ollamaStarcoderComplete(prompt: string) : AsyncGenerator<CodeCompletionChunk> {
|
||||
const url = 'http://192.168.1.138:11434/api/generate';
|
||||
const headers = {
|
||||
'Content-Type': 'application/json',
|
||||
};
|
||||
const payload = {
|
||||
model: 'starcoder:7b',
|
||||
prompt: prompt,
|
||||
stream: true,
|
||||
options: {
|
||||
stop: ["<|endoftext|>", "<file_sep>", "```", "\n\n"],
|
||||
temperature: 0.2
|
||||
}
|
||||
};
|
||||
|
||||
let idResponse = undefined;
|
||||
|
||||
try {
|
||||
const response = await fetch(url, {
|
||||
method: 'POST',
|
||||
headers,
|
||||
body: JSON.stringify(payload),
|
||||
});
|
||||
|
||||
if (response.ok && response.body) {
|
||||
const stream = response.body as any;
|
||||
const decoder = new TextDecoder("utf-8");
|
||||
|
||||
for await (const chunk of stream) {
|
||||
const chunkText = decoder.decode(chunk).trim();
|
||||
// {"model":"starcoder:7b","created_at":"2024-04-04T08:33:50.624505431Z","response":"sort","done":false}
|
||||
|
||||
try {
|
||||
const data = JSON.parse(chunkText.trim());
|
||||
if (!idResponse) {
|
||||
idResponse = data.created_at;
|
||||
}
|
||||
yield {
|
||||
text: data.response,
|
||||
id: idResponse!
|
||||
};
|
||||
} catch (e: any) {
|
||||
logger.channel()?.info("receve:", chunkText);
|
||||
logger.channel()?.error("JSON Parsing Error:", e.message);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
logger.channel()?.error("Error making request:", response.statusText);
|
||||
}
|
||||
} catch (error: any) {
|
||||
logger.channel()?.error("Error making request:", error.message);
|
||||
}
|
||||
}
|
||||
|
38
src/contributes/codecomplete/llm/constants.ts
Normal file
38
src/contributes/codecomplete/llm/constants.ts
Normal file
@ -0,0 +1,38 @@
|
||||
const DEFAULT_MAX_TOKENS = 1024;
|
||||
const DEFAULT_CONTEXT_LENGTH = 4096;
|
||||
const DEFAULT_TEMPERATURE = 0.5;
|
||||
|
||||
const DEFAULT_ARGS = {
|
||||
maxTokens: DEFAULT_MAX_TOKENS,
|
||||
temperature: DEFAULT_TEMPERATURE,
|
||||
};
|
||||
|
||||
const CONTEXT_LENGTH_FOR_MODEL: { [name: string]: number } = {
|
||||
"gpt-3.5-turbo": 4096,
|
||||
"gpt-3.5-turbo-0613": 4096,
|
||||
"gpt-3.5-turbo-16k": 16_384,
|
||||
"gpt-4": 8192,
|
||||
"gpt-35-turbo-16k": 16_384,
|
||||
"gpt-35-turbo-0613": 4096,
|
||||
"gpt-35-turbo": 4096,
|
||||
"gpt-4-32k": 32_768,
|
||||
"gpt-4-turbo-preview": 128_000,
|
||||
"gpt-4-vision": 128_000,
|
||||
"gpt-4-0125-preview": 128_000,
|
||||
"gpt-4-1106-preview": 128_000,
|
||||
};
|
||||
|
||||
const TOKEN_BUFFER_FOR_SAFETY = 350;
|
||||
const PROXY_URL = "http://localhost:65433";
|
||||
|
||||
const MAX_CHUNK_SIZE = 500; // 512 - buffer for safety (in case of differing tokenizers)
|
||||
|
||||
export {
|
||||
CONTEXT_LENGTH_FOR_MODEL,
|
||||
DEFAULT_ARGS,
|
||||
DEFAULT_CONTEXT_LENGTH,
|
||||
DEFAULT_MAX_TOKENS,
|
||||
MAX_CHUNK_SIZE,
|
||||
PROXY_URL,
|
||||
TOKEN_BUFFER_FOR_SAFETY,
|
||||
};
|
361
src/contributes/codecomplete/llm/countTokens.ts
Normal file
361
src/contributes/codecomplete/llm/countTokens.ts
Normal file
@ -0,0 +1,361 @@
|
||||
// @ts-ignore
|
||||
import llamaTokenizer from "llama-tokenizer-js";
|
||||
//import { ChatMessage, MessageContent, MessagePart } from "..";
|
||||
import { TOKEN_BUFFER_FOR_SAFETY } from "./constants";
|
||||
|
||||
|
||||
export type ChatMessageRole = "user" | "assistant" | "system";
|
||||
|
||||
export interface MessagePart {
|
||||
type: "text" | "imageUrl";
|
||||
text?: string;
|
||||
imageUrl?: { url: string };
|
||||
}
|
||||
|
||||
export type MessageContent = string | MessagePart[];
|
||||
|
||||
export interface ChatMessage {
|
||||
role: ChatMessageRole;
|
||||
content: MessageContent;
|
||||
}
|
||||
|
||||
|
||||
interface Encoding {
|
||||
encode: any;
|
||||
decode: any;
|
||||
}
|
||||
|
||||
function encodingForModel(modelName: string): Encoding {
|
||||
return llamaTokenizer;
|
||||
}
|
||||
|
||||
function countImageTokens(content: MessagePart): number {
|
||||
if (content.type === "imageUrl") {
|
||||
return 85;
|
||||
} else {
|
||||
throw new Error("Non-image content type");
|
||||
}
|
||||
}
|
||||
|
||||
function countTokens(
|
||||
content: MessageContent,
|
||||
// defaults to llama2 because the tokenizer tends to produce more tokens
|
||||
modelName: string = "llama2",
|
||||
): number {
|
||||
const encoding = encodingForModel(modelName);
|
||||
if (Array.isArray(content)) {
|
||||
return content.reduce((acc, part) => {
|
||||
return acc + part.type === "imageUrl"
|
||||
? countImageTokens(part)
|
||||
: encoding.encode(part.text ?? "", "all", []).length;
|
||||
}, 0);
|
||||
} else {
|
||||
return encoding.encode(content, "all", []).length;
|
||||
}
|
||||
}
|
||||
|
||||
function flattenMessages(msgs: ChatMessage[]): ChatMessage[] {
|
||||
const flattened: ChatMessage[] = [];
|
||||
for (let i = 0; i < msgs.length; i++) {
|
||||
const msg = msgs[i];
|
||||
if (
|
||||
flattened.length > 0 &&
|
||||
flattened[flattened.length - 1].role === msg.role
|
||||
) {
|
||||
flattened[flattened.length - 1].content += "\n\n" + (msg.content || "");
|
||||
} else {
|
||||
flattened.push(msg);
|
||||
}
|
||||
}
|
||||
return flattened;
|
||||
}
|
||||
|
||||
export function stripImages(content: MessageContent): string {
|
||||
if (Array.isArray(content)) {
|
||||
return content
|
||||
.filter((part) => part.type === "text")
|
||||
.map((part) => part.text)
|
||||
.join("\n");
|
||||
} else {
|
||||
return content;
|
||||
}
|
||||
}
|
||||
|
||||
function countChatMessageTokens(
|
||||
modelName: string,
|
||||
chatMessage: ChatMessage,
|
||||
): number {
|
||||
// Doing simpler, safer version of what is here:
|
||||
// https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
|
||||
// every message follows <|im_start|>{role/name}\n{content}<|end|>\n
|
||||
const TOKENS_PER_MESSAGE: number = 4;
|
||||
return countTokens(chatMessage.content, modelName) + TOKENS_PER_MESSAGE;
|
||||
}
|
||||
|
||||
function pruneLinesFromTop(
|
||||
prompt: string,
|
||||
maxTokens: number,
|
||||
modelName: string,
|
||||
): string {
|
||||
let totalTokens = countTokens(prompt, modelName);
|
||||
const lines = prompt.split("\n");
|
||||
while (totalTokens > maxTokens && lines.length > 0) {
|
||||
totalTokens -= countTokens(lines.shift()!, modelName);
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
function pruneLinesFromBottom(
|
||||
prompt: string,
|
||||
maxTokens: number,
|
||||
modelName: string,
|
||||
): string {
|
||||
let totalTokens = countTokens(prompt, modelName);
|
||||
const lines = prompt.split("\n");
|
||||
while (totalTokens > maxTokens && lines.length > 0) {
|
||||
totalTokens -= countTokens(lines.pop()!, modelName);
|
||||
}
|
||||
|
||||
return lines.join("\n");
|
||||
}
|
||||
|
||||
function pruneStringFromBottom(
|
||||
modelName: string,
|
||||
maxTokens: number,
|
||||
prompt: string,
|
||||
): string {
|
||||
const encoding = encodingForModel(modelName);
|
||||
|
||||
const tokens = encoding.encode(prompt, "all", []);
|
||||
if (tokens.length <= maxTokens) {
|
||||
return prompt;
|
||||
}
|
||||
|
||||
return encoding.decode(tokens.slice(0, maxTokens));
|
||||
}
|
||||
|
||||
function pruneStringFromTop(
|
||||
modelName: string,
|
||||
maxTokens: number,
|
||||
prompt: string,
|
||||
): string {
|
||||
const encoding = encodingForModel(modelName);
|
||||
|
||||
const tokens = encoding.encode(prompt, "all", []);
|
||||
if (tokens.length <= maxTokens) {
|
||||
return prompt;
|
||||
}
|
||||
|
||||
return encoding.decode(tokens.slice(tokens.length - maxTokens));
|
||||
}
|
||||
|
||||
function pruneRawPromptFromTop(
|
||||
modelName: string,
|
||||
contextLength: number,
|
||||
prompt: string,
|
||||
tokensForCompletion: number,
|
||||
): string {
|
||||
const maxTokens =
|
||||
contextLength - tokensForCompletion - TOKEN_BUFFER_FOR_SAFETY;
|
||||
return pruneStringFromTop(modelName, maxTokens, prompt);
|
||||
}
|
||||
|
||||
function pruneRawPromptFromBottom(
|
||||
modelName: string,
|
||||
contextLength: number,
|
||||
prompt: string,
|
||||
tokensForCompletion: number,
|
||||
): string {
|
||||
const maxTokens =
|
||||
contextLength - tokensForCompletion - TOKEN_BUFFER_FOR_SAFETY;
|
||||
return pruneStringFromBottom(modelName, maxTokens, prompt);
|
||||
}
|
||||
|
||||
function summarize(message: MessageContent): string {
|
||||
if (Array.isArray(message)) {
|
||||
return stripImages(message).substring(0, 100) + "...";
|
||||
} else {
|
||||
return message.substring(0, 100) + "...";
|
||||
}
|
||||
}
|
||||
|
||||
function pruneChatHistory(
|
||||
modelName: string,
|
||||
chatHistory: ChatMessage[],
|
||||
contextLength: number,
|
||||
tokensForCompletion: number,
|
||||
): ChatMessage[] {
|
||||
let totalTokens =
|
||||
tokensForCompletion +
|
||||
chatHistory.reduce((acc, message) => {
|
||||
return acc + countChatMessageTokens(modelName, message);
|
||||
}, 0);
|
||||
|
||||
// 0. Prune any messages that take up more than 1/3 of the context length
|
||||
const longestMessages = [...chatHistory];
|
||||
longestMessages.sort((a, b) => b.content.length - a.content.length);
|
||||
|
||||
const longerThanOneThird = longestMessages.filter(
|
||||
(message: ChatMessage) =>
|
||||
countTokens(message.content, modelName) > contextLength / 3,
|
||||
);
|
||||
const distanceFromThird = longerThanOneThird.map(
|
||||
(message: ChatMessage) =>
|
||||
countTokens(message.content, modelName) - contextLength / 3,
|
||||
);
|
||||
|
||||
for (let i = 0; i < longerThanOneThird.length; i++) {
|
||||
// Prune line-by-line from the top
|
||||
const message = longerThanOneThird[i];
|
||||
let content = stripImages(message.content);
|
||||
const deltaNeeded = totalTokens - contextLength;
|
||||
const delta = Math.min(deltaNeeded, distanceFromThird[i]);
|
||||
message.content = pruneStringFromTop(
|
||||
modelName,
|
||||
countTokens(message.content, modelName) - delta,
|
||||
content,
|
||||
);
|
||||
totalTokens -= delta;
|
||||
}
|
||||
|
||||
// 1. Replace beyond last 5 messages with summary
|
||||
let i = 0;
|
||||
while (totalTokens > contextLength && i < chatHistory.length - 5) {
|
||||
const message = chatHistory[0];
|
||||
totalTokens -= countTokens(message.content, modelName);
|
||||
totalTokens += countTokens(summarize(message.content), modelName);
|
||||
message.content = summarize(message.content);
|
||||
i++;
|
||||
}
|
||||
|
||||
// 2. Remove entire messages until the last 5
|
||||
while (
|
||||
chatHistory.length > 5 &&
|
||||
totalTokens > contextLength &&
|
||||
chatHistory.length > 0
|
||||
) {
|
||||
const message = chatHistory.shift()!;
|
||||
totalTokens -= countTokens(message.content, modelName);
|
||||
}
|
||||
|
||||
// 3. Truncate message in the last 5, except last 1
|
||||
i = 0;
|
||||
while (
|
||||
totalTokens > contextLength &&
|
||||
chatHistory.length > 0 &&
|
||||
i < chatHistory.length - 1
|
||||
) {
|
||||
const message = chatHistory[i];
|
||||
totalTokens -= countTokens(message.content, modelName);
|
||||
totalTokens += countTokens(summarize(message.content), modelName);
|
||||
message.content = summarize(message.content);
|
||||
i++;
|
||||
}
|
||||
|
||||
// 4. Remove entire messages in the last 5, except last 1
|
||||
while (totalTokens > contextLength && chatHistory.length > 1) {
|
||||
const message = chatHistory.shift()!;
|
||||
totalTokens -= countTokens(message.content, modelName);
|
||||
}
|
||||
|
||||
// 5. Truncate last message
|
||||
if (totalTokens > contextLength && chatHistory.length > 0) {
|
||||
const message = chatHistory[0];
|
||||
message.content = pruneRawPromptFromTop(
|
||||
modelName,
|
||||
contextLength,
|
||||
stripImages(message.content),
|
||||
tokensForCompletion,
|
||||
);
|
||||
totalTokens = contextLength;
|
||||
}
|
||||
|
||||
return chatHistory;
|
||||
}
|
||||
|
||||
function compileChatMessages(
|
||||
modelName: string,
|
||||
msgs: ChatMessage[] | undefined = undefined,
|
||||
contextLength: number,
|
||||
maxTokens: number,
|
||||
supportsImages: boolean,
|
||||
prompt: string | undefined = undefined,
|
||||
functions: any[] | undefined = undefined,
|
||||
systemMessage: string | undefined = undefined,
|
||||
): ChatMessage[] {
|
||||
const msgsCopy = msgs
|
||||
? msgs.map((msg) => ({ ...msg })).filter((msg) => msg.content !== "")
|
||||
: [];
|
||||
|
||||
if (prompt) {
|
||||
const promptMsg: ChatMessage = {
|
||||
role: "user",
|
||||
content: prompt,
|
||||
};
|
||||
msgsCopy.push(promptMsg);
|
||||
}
|
||||
|
||||
if (systemMessage && systemMessage.trim() !== "") {
|
||||
const systemChatMsg: ChatMessage = {
|
||||
role: "system",
|
||||
content: systemMessage,
|
||||
};
|
||||
// Insert as second to last
|
||||
// Later moved to top, but want second-priority to last user message
|
||||
msgsCopy.splice(-1, 0, systemChatMsg);
|
||||
}
|
||||
|
||||
let functionTokens = 0;
|
||||
if (functions) {
|
||||
for (const func of functions) {
|
||||
functionTokens += countTokens(JSON.stringify(func), modelName);
|
||||
}
|
||||
}
|
||||
|
||||
if (maxTokens + functionTokens + TOKEN_BUFFER_FOR_SAFETY >= contextLength) {
|
||||
throw new Error(
|
||||
`maxTokens (${maxTokens}) is too close to contextLength (${contextLength}), which doesn't leave room for response. Try increasing the contextLength parameter of the model in your config.json.`,
|
||||
);
|
||||
}
|
||||
|
||||
// If images not supported, convert MessagePart[] to string
|
||||
if (!supportsImages) {
|
||||
for (const msg of msgsCopy) {
|
||||
if ("content" in msg && Array.isArray(msg.content)) {
|
||||
const content = stripImages(msg.content);
|
||||
msg.content = content;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const history = pruneChatHistory(
|
||||
modelName,
|
||||
msgsCopy,
|
||||
contextLength,
|
||||
functionTokens + maxTokens + TOKEN_BUFFER_FOR_SAFETY,
|
||||
);
|
||||
|
||||
if (
|
||||
systemMessage &&
|
||||
history.length >= 2 &&
|
||||
history[history.length - 2].role === "system"
|
||||
) {
|
||||
const movedSystemMessage = history.splice(-2, 1)[0];
|
||||
history.unshift(movedSystemMessage);
|
||||
}
|
||||
|
||||
const flattenedHistory = flattenMessages(history);
|
||||
|
||||
return flattenedHistory;
|
||||
}
|
||||
|
||||
export {
|
||||
compileChatMessages,
|
||||
countTokens,
|
||||
pruneLinesFromBottom,
|
||||
pruneLinesFromTop,
|
||||
pruneRawPromptFromTop,
|
||||
pruneStringFromBottom,
|
||||
pruneStringFromTop,
|
||||
};
|
152
src/contributes/codecomplete/lsp.ts
Normal file
152
src/contributes/codecomplete/lsp.ts
Normal file
@ -0,0 +1,152 @@
|
||||
// import { IDE, RangeInFile } from "core";
|
||||
import { getAst, getTreePathAtCursor, RangeInFileWithContents } from "./ast/ast";
|
||||
import { AutocompleteSnippet } from "./ranking";
|
||||
import * as vscode from "vscode";
|
||||
import Parser from "web-tree-sitter";
|
||||
import { RangeInFile, readRangeInFile } from "./utils";
|
||||
|
||||
type GotoProviderName =
|
||||
| "vscode.executeDefinitionProvider"
|
||||
| "vscode.executeTypeDefinitionProvider"
|
||||
| "vscode.executeDeclarationProvider"
|
||||
| "vscode.executeImplementationProvider"
|
||||
| "vscode.executeReferenceProvider";
|
||||
async function executeGotoProvider(
|
||||
uri: string,
|
||||
line: number,
|
||||
character: number,
|
||||
name: GotoProviderName,
|
||||
): Promise<RangeInFile[]> {
|
||||
const definitions = (await vscode.commands.executeCommand(
|
||||
name,
|
||||
vscode.Uri.parse(uri),
|
||||
new vscode.Position(line, character),
|
||||
)) as any;
|
||||
|
||||
// definitions have two possible types: Location[] or LocationLink[]
|
||||
let definitionResult: RangeInFile[] = [];
|
||||
for (const definition of definitions) {
|
||||
if (definition.range) {
|
||||
definitionResult.push({
|
||||
filepath: definition.uri.fsPath,
|
||||
range: definition.range,
|
||||
});
|
||||
} else if (definition.targetRange) {
|
||||
definitionResult.push({
|
||||
filepath: definition.targetUri.fsPath,
|
||||
range: definition.targetRange,
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
return definitionResult;
|
||||
}
|
||||
|
||||
export async function getDefinitions(line: number, character: number, uri: string): Promise<RangeInFile[]> {
|
||||
return executeGotoProvider(uri, line, character, "vscode.executeDefinitionProvider");
|
||||
}
|
||||
|
||||
// get type definitions
|
||||
export async function getTypeDefinitions(line: number, character: number, uri: string): Promise<RangeInFile[]> {
|
||||
return executeGotoProvider(uri, line, character, "vscode.executeTypeDefinitionProvider");
|
||||
}
|
||||
|
||||
async function getDefinitionsForNode(
|
||||
uri: string,
|
||||
node: Parser.SyntaxNode,
|
||||
): Promise<RangeInFile[]> {
|
||||
const ranges: RangeInFile[] = [];
|
||||
switch (node.type) {
|
||||
case "call_expression":
|
||||
// function call -> function definition
|
||||
let row: number = node.startPosition.row;
|
||||
let col: number = node.startPosition.column;
|
||||
|
||||
let foundParams: boolean = false;
|
||||
// visite children in reverse order
|
||||
for (let i = node.children.length - 1; i >= 0; i--) {
|
||||
const child = node.children[i];
|
||||
if ( foundParams ) {
|
||||
row = child.endPosition.row;
|
||||
col = child.endPosition.column - 1;
|
||||
break;
|
||||
}
|
||||
|
||||
const childText = child.text;
|
||||
// check if childText is like ( ... )
|
||||
if (childText.startsWith("(") && childText.endsWith(")")) {
|
||||
foundParams = true;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
const defs = await executeGotoProvider(
|
||||
uri,
|
||||
row,
|
||||
col,
|
||||
"vscode.executeDefinitionProvider",
|
||||
);
|
||||
ranges.push(...defs);
|
||||
break;
|
||||
case "variable_declarator":
|
||||
// variable assignment -> variable definition/type
|
||||
// usages of the var that appear after the declaration
|
||||
break;
|
||||
case "impl_item":
|
||||
// impl of trait -> trait definition
|
||||
break;
|
||||
case "":
|
||||
// function definition -> implementations?
|
||||
break;
|
||||
}
|
||||
return ranges;
|
||||
}
|
||||
|
||||
/**
|
||||
* and other stuff not directly on the path:
|
||||
* - variables defined on line above
|
||||
* ...etc...
|
||||
*/
|
||||
|
||||
export async function getDefinitionsFromLsp(
|
||||
filepath: string,
|
||||
contents: string,
|
||||
cursorIndex: number
|
||||
): Promise<AutocompleteSnippet[]> {
|
||||
const ast = await getAst(filepath, contents);
|
||||
if (!ast) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const treePath = await getTreePathAtCursor(ast, cursorIndex);
|
||||
if (!treePath) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const results: RangeInFileWithContents[] = [];
|
||||
for (const node of treePath.reverse()) {
|
||||
const definitions = await getDefinitionsForNode(filepath, node);
|
||||
results.push(
|
||||
...(await Promise.all(
|
||||
definitions.map(async (def) => ({
|
||||
...def,
|
||||
contents: await readRangeInFile(
|
||||
def.filepath,
|
||||
new vscode.Range(
|
||||
new vscode.Position(
|
||||
def.range.start.line,
|
||||
def.range.start.character,
|
||||
),
|
||||
new vscode.Position(def.range.end.line, def.range.end.character),
|
||||
),
|
||||
),
|
||||
})),
|
||||
)),
|
||||
);
|
||||
}
|
||||
|
||||
return results.map((result) => ({
|
||||
...result,
|
||||
score: 0.8,
|
||||
}));
|
||||
}
|
@ -5,12 +5,153 @@
|
||||
通过AST获取相对完整的信息,可能会增加提示的准确度,但也会增加代码提示的复杂度。
|
||||
*/
|
||||
|
||||
import * as path from "path";
|
||||
import { logger } from "../../util/logger";
|
||||
import { log } from "console";
|
||||
import { getAst, getAstNodeByRange, getTreePathAtCursor, RangeInFileWithContents } from "./ast/ast";
|
||||
import Parser from "web-tree-sitter";
|
||||
import { getCommentPrefix, getLangageFunctionConfig, LanguageFunctionsConfig } from "./ast/language";
|
||||
import { findFunctionRanges, FunctionRange } from "./ast/findFunctions";
|
||||
import { RecentEdit } from "./recentEdits";
|
||||
import { getLanguageForFile, getQueryVariablesSource } from "./ast/treeSitter";
|
||||
import { getDefinitions, getDefinitionsFromLsp, getTypeDefinitions } from "./lsp";
|
||||
import { RangeInFile, readFileByVSCode, readRangeInFile, readRangesInFile, Range as AstRange, readRangesInFileContents } from "./utils";
|
||||
import { collapseFile } from "./ast/collapseBlock";
|
||||
import { UiUtilWrapper } from "../../util/uiUtil";
|
||||
import { countTokens } from "./llm/countTokens";
|
||||
|
||||
|
||||
const PREFIX_MAX_SIZE: number = 600;
|
||||
const SUFFIX_MAX_SIZE: number = 400;
|
||||
const CONTEXT_LIMITED_SIZE: number = 6000;
|
||||
|
||||
|
||||
export async function currentFileContext(
|
||||
filepath: string,
|
||||
contents: string,
|
||||
curRow: number,
|
||||
curColumn: number
|
||||
) : Promise< { prefix: string, suffix: string } > {
|
||||
const contentTokens = countTokens(contents);
|
||||
if (contentTokens < CONTEXT_LIMITED_SIZE*0.5) {
|
||||
return curfilePrompt(filepath, contents, curRow, curColumn);
|
||||
}
|
||||
|
||||
const ast = await getAst(filepath, contents);
|
||||
if (!ast) {
|
||||
return curfilePrompt(filepath, contents, curRow, curColumn);
|
||||
}
|
||||
|
||||
const functionRanges = await findFunctionRanges(filepath, ast.rootNode);
|
||||
return await collapseCodeBlock(functionRanges, filepath, contents, curRow, curColumn);
|
||||
}
|
||||
|
||||
|
||||
export async function collapseCodeBlock(functions: FunctionRange[], filepath: string, contents: string, curRow: number, curColumn: number) {
|
||||
const commentPrefix = await getCommentPrefix(filepath);
|
||||
const lines = contents.split("\n");
|
||||
|
||||
let newCurRow = curRow;
|
||||
let newCurColumn = curColumn;
|
||||
|
||||
// find function before and after cursor
|
||||
let preFunc: FunctionRange | undefined = undefined;
|
||||
let nextFunc: FunctionRange | undefined = undefined;
|
||||
let curFunc: FunctionRange | undefined = undefined;
|
||||
for (const func of functions) {
|
||||
if (func.define.end.row < curRow) {
|
||||
preFunc = func;
|
||||
}
|
||||
if (!nextFunc && func.define.start.row > curRow) {
|
||||
nextFunc = func;
|
||||
break;
|
||||
}
|
||||
|
||||
if (func.define.start.row <= curRow && curRow <= func.define.end.row) {
|
||||
curFunc = func;
|
||||
}
|
||||
}
|
||||
|
||||
// disable collapse ranges
|
||||
let disableCollapseRanges: FunctionRange[] = [];
|
||||
if (!curFunc) {
|
||||
if (preFunc) {
|
||||
disableCollapseRanges.push(preFunc);
|
||||
} else if (nextFunc) {
|
||||
disableCollapseRanges.push(nextFunc);
|
||||
}
|
||||
} else {
|
||||
disableCollapseRanges.push(curFunc);
|
||||
const funcLines = curFunc.define.end.row - curFunc.define.start.row + 1;
|
||||
if (funcLines < 5) {
|
||||
if (preFunc) {
|
||||
disableCollapseRanges.push(preFunc);
|
||||
} else if (nextFunc) {
|
||||
disableCollapseRanges.push(nextFunc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// visit functions in reverse order
|
||||
for (const func of functions.reverse()) {
|
||||
const funcDefine = func.define;
|
||||
const funcBody = func.body;
|
||||
|
||||
if (funcBody.start === funcBody.end) {
|
||||
continue;
|
||||
}
|
||||
|
||||
let bodyStartLine = funcBody.start.row;
|
||||
let bodyEndLine = funcBody.end.row;
|
||||
if (funcDefine.start.row === funcBody.start.row) {
|
||||
bodyStartLine = funcBody.start.row + 1;
|
||||
bodyEndLine = funcBody.end.row - 1;
|
||||
}
|
||||
// whether line before body start column is empty
|
||||
const lineBeforeBodyStart = lines[funcBody.start.row].slice(0, funcBody.start.column);
|
||||
if (lineBeforeBodyStart.trim() !== "") {
|
||||
bodyStartLine = funcBody.start.row + 1;
|
||||
bodyEndLine = funcBody.end.row - 1;
|
||||
}
|
||||
|
||||
if (bodyEndLine - bodyStartLine <= 3) {
|
||||
continue;
|
||||
}
|
||||
// if (curRow >= funcDefine.start.row && curRow <= func.define.end.row) {
|
||||
// continue;
|
||||
// }
|
||||
let inDisableRange = false;
|
||||
for (const disableRange of disableCollapseRanges) {
|
||||
if (funcDefine === disableRange.define) {
|
||||
inDisableRange = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (inDisableRange) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// replace lines from bodyStartLine to bodyEndLine with "..."
|
||||
// 获取bodyStartLine这一行的缩进字符,需要在"..."之前添加对应的缩进
|
||||
let indent = lines[bodyStartLine].search(/\S/);
|
||||
if (indent === -1) {
|
||||
indent = lines[bodyStartLine].length;
|
||||
}
|
||||
const indentStr = " ".repeat(indent);
|
||||
lines.splice(bodyStartLine, bodyEndLine - bodyStartLine + 1, `${indentStr}${commentPrefix}...`);
|
||||
|
||||
// 更新光标位置
|
||||
if (curRow > bodyEndLine) {
|
||||
newCurRow -= bodyEndLine - bodyStartLine;
|
||||
}
|
||||
}
|
||||
|
||||
// create prefix and suffix, prefix is the code before the cursor, suffix is the code after the cursor
|
||||
// handle newCurRow and newCurColumn
|
||||
const prefix = lines.slice(0, newCurRow).join("\n") + "\n" + lines[newCurRow].slice(0, newCurColumn);
|
||||
const suffix = lines[newCurRow].slice(newCurColumn) + "\n" + lines.slice(newCurRow+1).join("\n");
|
||||
|
||||
return {prefix, suffix};
|
||||
}
|
||||
|
||||
// 尽量获取一个完整的代码片段作为代码补全的上下文
|
||||
// 解析AST是一个好的方法,但还是会有点偏重计算。先尝试通过缩进来定位合适的块。
|
||||
@ -23,8 +164,8 @@ async function curfilePrompt(filePath: string, fileContent: string, line: number
|
||||
// 初始化prefix和suffix内容及长度
|
||||
let prefix = '';
|
||||
let suffix = '';
|
||||
let prefixSize = 0;
|
||||
let suffixSize = 0;
|
||||
let prefixTokenCount = 0;
|
||||
let suffixTokenCount = 0;
|
||||
|
||||
// 从光标所在行开始,向上构建前缀
|
||||
for (let i = line; i >= 0; i--) {
|
||||
@ -33,11 +174,13 @@ async function curfilePrompt(filePath: string, fileContent: string, line: number
|
||||
lineText = lines[i].substring(0, column);
|
||||
}
|
||||
|
||||
prefix = lineText + prefix;
|
||||
prefixSize += lineText.length;
|
||||
if (prefixSize > PREFIX_MAX_SIZE) {
|
||||
const lineTokenCount = countTokens(lineText);
|
||||
if (prefixTokenCount + lineTokenCount > CONTEXT_LIMITED_SIZE*0.7*0.5) {
|
||||
break;
|
||||
}
|
||||
|
||||
prefix = lineText + prefix;
|
||||
prefixTokenCount += lineTokenCount;
|
||||
}
|
||||
|
||||
// 从光标所在行下一行开始,向下构建后缀
|
||||
@ -47,11 +190,13 @@ async function curfilePrompt(filePath: string, fileContent: string, line: number
|
||||
lineText = lines[i].substring(column, lines[i].length) + '\n';
|
||||
}
|
||||
|
||||
suffix += lineText;
|
||||
suffixSize += lineText.length;
|
||||
if (suffixSize > PREFIX_MAX_SIZE) {
|
||||
const lineTokenCount = countTokens(lineText);
|
||||
if (suffixTokenCount + lineTokenCount > CONTEXT_LIMITED_SIZE*0.3*0.5) {
|
||||
break;
|
||||
}
|
||||
|
||||
suffix += lineText;
|
||||
suffixTokenCount += lineTokenCount;
|
||||
}
|
||||
|
||||
// 返回前缀和后缀
|
||||
@ -61,9 +206,278 @@ async function curfilePrompt(filePath: string, fileContent: string, line: number
|
||||
};
|
||||
}
|
||||
|
||||
export async function createPrompt(filePath: string, fileContent: string, line: number, column: number) {
|
||||
const { prefix, suffix } = await curfilePrompt(filePath, fileContent, line, column);
|
||||
const prompt = "<fim_prefix>" + prefix + "<fim_suffix>" + suffix + "<fim_middle>";
|
||||
async function createRecentEditContext(recentEdits: RecentEdit[], curFile: string) {
|
||||
// read last 3 edits in reverse order
|
||||
let edits: RecentEdit[] = [];
|
||||
for (let i = recentEdits.length - 1; i >= 0 && edits.length < 3; i--) {
|
||||
if (recentEdits[i].fileName === curFile) {
|
||||
continue;
|
||||
}
|
||||
if (recentEdits[i].collapseContent === "") {
|
||||
continue;
|
||||
}
|
||||
|
||||
const lines = recentEdits[i].collapseContent.split("\n");
|
||||
// 判断不为空的代码行是否超过50行
|
||||
const filterEmptyLines = lines.filter(line => line.trim() !== "");
|
||||
if (filterEmptyLines.length > 50) {
|
||||
continue;
|
||||
}
|
||||
|
||||
edits.push(recentEdits[i]);
|
||||
}
|
||||
|
||||
let context = "";
|
||||
for (const edit of edits) {
|
||||
const commentPrefix = await getCommentPrefix(edit.fileName);
|
||||
context += `${commentPrefix}${edit.fileName}\n\n`;
|
||||
context += `${edit.collapseContent}\n\n\n\n`;
|
||||
}
|
||||
|
||||
return context;
|
||||
}
|
||||
|
||||
// find all related symbol defines
|
||||
export async function symbolDefinesContext(filePath: string, fileContent: string, line: number, column: number) : Promise < { filepath: string, codeblock: string }[] > {
|
||||
const workspacePath = UiUtilWrapper.workspaceFoldersFirstPath();
|
||||
if (!workspacePath) {
|
||||
return [];
|
||||
}
|
||||
|
||||
// parse filepath
|
||||
const ast = await getAst(filePath, fileContent);
|
||||
if (!ast) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const lang = await getLanguageForFile(filePath);
|
||||
if (!lang) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const querySource = await getQueryVariablesSource(filePath);
|
||||
if (!querySource) {
|
||||
return [];
|
||||
}
|
||||
|
||||
try {
|
||||
const query = lang?.query(querySource);
|
||||
const matches = query?.matches(ast.rootNode);
|
||||
if (!matches) {
|
||||
return [];
|
||||
}
|
||||
|
||||
const functions = await findFunctionRanges(filePath, ast.rootNode);
|
||||
// remove function that contain line, column
|
||||
const filteredFunctions = functions.filter(f => {
|
||||
return!(f.define.start.row <= line && f.define.end.row >= line);
|
||||
});
|
||||
|
||||
// collect matched ast nodes
|
||||
const importTypeNodes: Parser.SyntaxNode[] = [];
|
||||
const variableNodes: Parser.SyntaxNode[] = [];
|
||||
matches.forEach(m => {
|
||||
for (const capture of m.captures) {
|
||||
const node = capture.node;
|
||||
if (capture.name === 'import.type') {
|
||||
importTypeNodes.push(node);
|
||||
} else if (capture.name === 'variable') {
|
||||
variableNodes.push(node);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
// remove matched nodes in functions
|
||||
const filteredImportTypeNodes = importTypeNodes.filter(n => {
|
||||
return!filteredFunctions.some(f => {
|
||||
return f.define.start.row <= n.startPosition.row && f.define.end.row >= n.endPosition.row;
|
||||
});
|
||||
});
|
||||
const filteredVariableNodes = variableNodes.filter(n => {
|
||||
return!filteredFunctions.some(f => {
|
||||
return f.define.start.row <= n.startPosition.row && f.define.end.row >= n.endPosition.row;
|
||||
});
|
||||
});
|
||||
|
||||
let codeblocks: { filepath: string, codeblock: string }[] = [];
|
||||
|
||||
let codeblockRanges: RangeInFile[] = [];
|
||||
// for (const node of filteredImportTypeNodes) {
|
||||
// codeblockRanges.push( ...await getDefinitions(node.startPosition.row, node.startPosition.column, filePath));
|
||||
// }
|
||||
for (const node of filteredVariableNodes) {
|
||||
codeblockRanges.push( ...await getTypeDefinitions(node.startPosition.row, node.startPosition.column, filePath));
|
||||
}
|
||||
|
||||
// remove codeblock ranges that not in workspacePath
|
||||
codeblockRanges = codeblockRanges.filter(r => {
|
||||
return r.filepath.indexOf(workspacePath) === 0;
|
||||
});
|
||||
|
||||
// remove codeblock ranges that in node_modules
|
||||
codeblockRanges = codeblockRanges.filter(r => {
|
||||
return r.filepath.indexOf(path.join(workspacePath, 'node_modules'))!== 0;
|
||||
});
|
||||
|
||||
// remove repeated codeblock ranges
|
||||
codeblockRanges = codeblockRanges.filter((r, i) => {
|
||||
return codeblockRanges.findIndex(r2 => {
|
||||
return r2.filepath === r.filepath && r2.range.start.line === r.range.start.line && r2.range.start.character === r.range.start.character;
|
||||
}) === i;
|
||||
});
|
||||
|
||||
// 按文件对codeblockRanges分组
|
||||
const codeblockRangesByFile: { [key: string]: RangeInFile[] } = {};
|
||||
for (const range of codeblockRanges) {
|
||||
if (!codeblockRangesByFile[range.filepath]) {
|
||||
codeblockRangesByFile[range.filepath] = [];
|
||||
}
|
||||
codeblockRangesByFile[range.filepath].push(range);
|
||||
}
|
||||
|
||||
// 按文件获取codeblock
|
||||
for (const filepath in codeblockRangesByFile) {
|
||||
if (filepath === filePath) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const refContents = await readFileByVSCode(filepath);
|
||||
if (!refContents) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const refAst = await getAst(filepath, refContents);
|
||||
if (!refAst) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const refLines = refContents.split('\n');
|
||||
|
||||
let contents: string[] = [];
|
||||
let visitedBlockContents: string[] = [];
|
||||
for (const range of codeblockRangesByFile[filepath]) {
|
||||
const blockNode = await getAstNodeByRange(refAst, range.range.start.line, range.range.start.character);
|
||||
if (!blockNode) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const blockText = blockNode.text;
|
||||
if (visitedBlockContents.includes(blockText)) {
|
||||
continue;
|
||||
}
|
||||
visitedBlockContents.push(blockText);
|
||||
|
||||
contents.push(blockText);
|
||||
}
|
||||
|
||||
for (const content of contents) {
|
||||
// parse content and make collapse
|
||||
if (content.trim().split("\n").length === 1) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const collapseContent = await collapseFile(filepath, content);
|
||||
if (collapseContent) {
|
||||
codeblocks.push({ filepath, codeblock: collapseContent });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return codeblocks;
|
||||
} catch (e) {
|
||||
return [];
|
||||
}
|
||||
|
||||
return [];
|
||||
}
|
||||
|
||||
export async function createContextCallDefine( filepath: string, fileContent: string, posOffset: number ) : Promise < { filepath: string, codeblock: string }[] > {
|
||||
let defs = await getDefinitionsFromLsp(
|
||||
filepath,
|
||||
fileContent,
|
||||
posOffset
|
||||
);
|
||||
|
||||
const workspacePath = UiUtilWrapper.workspaceFoldersFirstPath();
|
||||
if (!workspacePath) {
|
||||
return [];
|
||||
}
|
||||
|
||||
defs = defs.filter(r => {
|
||||
return r.filepath.indexOf(workspacePath) === 0;
|
||||
});
|
||||
|
||||
// remove codeblock ranges that in node_modules
|
||||
defs = defs.filter(r => {
|
||||
return r.filepath.indexOf(path.join(workspacePath, 'node_modules'))!== 0;
|
||||
});
|
||||
|
||||
let codeblocks: { filepath: string, codeblock: string }[] = [];
|
||||
for (const cdef of defs) {
|
||||
const collapseContent = await collapseFile(filepath, cdef.contents);
|
||||
if (collapseContent) {
|
||||
codeblocks.push({ filepath, codeblock: collapseContent });
|
||||
}
|
||||
}
|
||||
|
||||
return codeblocks;
|
||||
}
|
||||
|
||||
export async function createPrompt(filePath: string, fileContent: string, line: number, column: number, posoffset: number, recentEdits: RecentEdit[]) {
|
||||
const commentPrefix = await getCommentPrefix(filePath);
|
||||
let recentEditContext = await createRecentEditContext(recentEdits, filePath);
|
||||
const symbolDefines: { filepath: string, codeblock: string }[] = await symbolDefinesContext(filePath, fileContent, line, column);
|
||||
|
||||
let { prefix, suffix } = await currentFileContext(filePath, fileContent, line, column);
|
||||
|
||||
let tokenCount = countTokens(prefix);
|
||||
|
||||
const suffixTokenCount = countTokens(suffix);
|
||||
if (tokenCount + suffixTokenCount < CONTEXT_LIMITED_SIZE) {
|
||||
tokenCount += suffixTokenCount;
|
||||
} else {
|
||||
suffix = "";
|
||||
}
|
||||
|
||||
let callDefContext = "";
|
||||
if (tokenCount < CONTEXT_LIMITED_SIZE) {
|
||||
const callCodeBlocks = await createContextCallDefine(filePath, fileContent, posoffset);
|
||||
for (const callCodeBlock of callCodeBlocks) {
|
||||
const callBlockToken = countTokens(callCodeBlock.codeblock);
|
||||
if (tokenCount + callBlockToken > CONTEXT_LIMITED_SIZE) {
|
||||
break;
|
||||
}
|
||||
|
||||
tokenCount += callBlockToken;
|
||||
callDefContext += `${commentPrefix}${callCodeBlock.filepath}\n\n`;
|
||||
callDefContext += `${callCodeBlock.codeblock}\n\n\n\n`;
|
||||
}
|
||||
}
|
||||
|
||||
let symbolContext = "";
|
||||
for (const symbolDefine of symbolDefines ) {
|
||||
const countSymboleToken = countTokens(symbolDefine.codeblock);
|
||||
if (tokenCount + countSymboleToken > CONTEXT_LIMITED_SIZE) {
|
||||
break;
|
||||
}
|
||||
|
||||
tokenCount += countSymboleToken;
|
||||
symbolContext += `${commentPrefix}${symbolDefine.filepath}\n\n`;
|
||||
symbolContext += `${symbolDefine.codeblock}\n\n\n\n`;
|
||||
}
|
||||
|
||||
const countRecentToken = countTokens(recentEditContext);
|
||||
if (tokenCount + countRecentToken < CONTEXT_LIMITED_SIZE) {
|
||||
tokenCount += countRecentToken;
|
||||
} else {
|
||||
recentEditContext = "";
|
||||
}
|
||||
logger.channel()?.info("Complete token:", tokenCount);
|
||||
|
||||
const prompt = "<fim_prefix>" + recentEditContext + symbolContext + callDefContext + `${commentPrefix}${filePath}\n\n` + prefix + "<fim_suffix>" + suffix + "<fim_middle>";
|
||||
return prompt;
|
||||
}
|
||||
|
||||
function findImportTypeDefine(filePath: string, fileContent: string, node: Parser.SyntaxNode) {
|
||||
throw new Error("Function not implemented.");
|
||||
}
|
||||
|
242
src/contributes/codecomplete/ranking.ts
Normal file
242
src/contributes/codecomplete/ranking.ts
Normal file
@ -0,0 +1,242 @@
|
||||
import { RangeInFileWithContents } from "./ast/ast";
|
||||
import { countTokens } from "./llm/countTokens";
|
||||
import { Range } from "./utils";
|
||||
|
||||
export type AutocompleteSnippet = RangeInFileWithContents & {
|
||||
score: number;
|
||||
};
|
||||
|
||||
const rx = /[\s.,\/#!$%\^&\*;:{}=\-_`~()\[\]]/g;
|
||||
function getSymbolsForSnippet(snippet: string): Set<string> {
|
||||
const symbols = snippet
|
||||
.split(rx)
|
||||
.map((s) => s.trim())
|
||||
.filter((s) => s !== "");
|
||||
return new Set(symbols);
|
||||
}
|
||||
|
||||
/**
|
||||
* Calculate similarity as number of shared symbols divided by total number of unique symbols between both.
|
||||
*/
|
||||
export function jaccardSimilarity(a: string, b: string): number {
|
||||
const aSet = getSymbolsForSnippet(a);
|
||||
const bSet = getSymbolsForSnippet(b);
|
||||
const union = new Set([...aSet, ...bSet]).size;
|
||||
|
||||
// Avoid division by zero
|
||||
if (union === 0) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
let intersection = 0;
|
||||
for (const symbol of aSet) {
|
||||
if (bSet.has(symbol)) {
|
||||
intersection++;
|
||||
}
|
||||
}
|
||||
|
||||
return intersection / union;
|
||||
}
|
||||
|
||||
/**
|
||||
* Rank code snippets to be used in tab-autocomplete prompt. Returns a sorted version of the snippet array.
|
||||
*/
|
||||
export function rankSnippets(
|
||||
ranges: RangeInFileWithContents[],
|
||||
windowAroundCursor: string,
|
||||
): AutocompleteSnippet[] {
|
||||
const snippets = ranges.map((snippet) => ({
|
||||
score: jaccardSimilarity(snippet.contents, windowAroundCursor),
|
||||
...snippet,
|
||||
}));
|
||||
const uniqueSnippets = deduplicateSnippets(snippets);
|
||||
return uniqueSnippets.sort((a, b) => a.score - b.score);
|
||||
}
|
||||
|
||||
/**
|
||||
* Deduplicate code snippets by merging overlapping ranges into a single range.
|
||||
*/
|
||||
export function deduplicateSnippets(
|
||||
snippets: AutocompleteSnippet[],
|
||||
): AutocompleteSnippet[] {
|
||||
// Group by file
|
||||
const fileGroups: { [key: string]: AutocompleteSnippet[] } = {};
|
||||
for (const snippet of snippets) {
|
||||
if (!fileGroups[snippet.filepath]) {
|
||||
fileGroups[snippet.filepath] = [];
|
||||
}
|
||||
fileGroups[snippet.filepath].push(snippet);
|
||||
}
|
||||
|
||||
// Merge overlapping ranges
|
||||
const allRanges: AutocompleteSnippet[] = [];
|
||||
for (const file of Object.keys(fileGroups)) {
|
||||
allRanges.push(...mergeSnippetsByRange(fileGroups[file]));
|
||||
}
|
||||
return allRanges;
|
||||
}
|
||||
|
||||
function mergeSnippetsByRange(
|
||||
snippets: AutocompleteSnippet[],
|
||||
): AutocompleteSnippet[] {
|
||||
if (snippets.length === 0) {
|
||||
return snippets;
|
||||
}
|
||||
|
||||
const sorted = snippets.sort(
|
||||
(a, b) => a.range.start.line - b.range.start.line,
|
||||
);
|
||||
const merged: AutocompleteSnippet[] = [];
|
||||
|
||||
while (sorted.length > 0) {
|
||||
const next = sorted.shift()!;
|
||||
const last = merged[merged.length - 1];
|
||||
if (merged.length > 0 && last.range.end.line >= next.range.start.line) {
|
||||
// Merge with previous snippet
|
||||
last.score = Math.max(last.score, next.score);
|
||||
try {
|
||||
last.range.end = next.range.end;
|
||||
} catch (e) {
|
||||
console.log("Error merging ranges", e);
|
||||
}
|
||||
last.contents = mergeOverlappingRangeContents(last, next);
|
||||
} else {
|
||||
merged.push(next);
|
||||
}
|
||||
}
|
||||
|
||||
return merged;
|
||||
}
|
||||
|
||||
function mergeOverlappingRangeContents(
|
||||
first: RangeInFileWithContents,
|
||||
second: RangeInFileWithContents,
|
||||
): string {
|
||||
const firstLines = first.contents.split("\n");
|
||||
const numOverlapping = first.range.end.line - second.range.start.line;
|
||||
return firstLines.slice(-numOverlapping).join("\n") + "\n" + second.contents;
|
||||
}
|
||||
|
||||
/**
|
||||
* Fill the allowed space with snippets
|
||||
*/
|
||||
export function fillPromptWithSnippets(
|
||||
snippets: AutocompleteSnippet[],
|
||||
maxSnippetTokens: number,
|
||||
modelName: string,
|
||||
): AutocompleteSnippet[] {
|
||||
let tokensRemaining = maxSnippetTokens;
|
||||
const keptSnippets: AutocompleteSnippet[] = [];
|
||||
for (let i = 0; i < snippets.length; i++) {
|
||||
const snippet = snippets[i];
|
||||
const tokenCount = countTokens(snippet.contents, modelName);
|
||||
if (tokensRemaining - tokenCount >= 0) {
|
||||
tokensRemaining -= tokenCount;
|
||||
keptSnippets.push(snippet);
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
return keptSnippets;
|
||||
}
|
||||
|
||||
function rangeIntersectionByLines(a: Range, b: Range): Range | null {
|
||||
const startLine = Math.max(a.start.line, b.start.line);
|
||||
const endLine = Math.min(a.end.line, b.end.line);
|
||||
if (startLine >= endLine) {
|
||||
return null;
|
||||
} else {
|
||||
return {
|
||||
start: {
|
||||
line: startLine,
|
||||
character: 0,
|
||||
},
|
||||
end: {
|
||||
line: endLine,
|
||||
character: 0,
|
||||
},
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove one range from another range, which may lead to returning two disjoint ranges
|
||||
*/
|
||||
function rangeDifferenceByLines(orig: Range, remove: Range): Range[] {
|
||||
if (
|
||||
orig.start.line >= remove.start.line &&
|
||||
orig.end.line <= remove.end.line
|
||||
) {
|
||||
// / | | /
|
||||
return [];
|
||||
} else if (
|
||||
orig.start.line <= remove.start.line &&
|
||||
orig.end.line >= remove.end.line
|
||||
) {
|
||||
// | / / |
|
||||
// Splits the range
|
||||
return [
|
||||
{
|
||||
start: orig.start,
|
||||
end: remove.start,
|
||||
},
|
||||
{
|
||||
start: remove.end,
|
||||
end: orig.end,
|
||||
},
|
||||
];
|
||||
} else if (
|
||||
orig.start.line >= remove.start.line &&
|
||||
orig.end.line >= remove.end.line
|
||||
) {
|
||||
// \ | / |
|
||||
return [
|
||||
{
|
||||
start: remove.end,
|
||||
end: orig.end,
|
||||
},
|
||||
];
|
||||
} else if (
|
||||
orig.start.line <= remove.start.line &&
|
||||
orig.end.line <= remove.end.line
|
||||
) {
|
||||
// | / | /
|
||||
return [
|
||||
{
|
||||
start: orig.start,
|
||||
end: remove.start,
|
||||
},
|
||||
];
|
||||
} else {
|
||||
return [orig];
|
||||
}
|
||||
}
|
||||
|
||||
export function removeRangeFromSnippets(
|
||||
snippets: AutocompleteSnippet[],
|
||||
filepath: string,
|
||||
range: Range,
|
||||
): AutocompleteSnippet[] {
|
||||
const finalSnippets: AutocompleteSnippet[] = [];
|
||||
for (let snippet of snippets) {
|
||||
if (snippet.filepath !== filepath) {
|
||||
finalSnippets.push(snippet);
|
||||
continue;
|
||||
}
|
||||
|
||||
const intersection = rangeIntersectionByLines(range, snippet.range);
|
||||
if (!intersection) {
|
||||
finalSnippets.push(snippet);
|
||||
} else {
|
||||
finalSnippets.push(
|
||||
...rangeDifferenceByLines(snippet.range, intersection).map((range) => ({
|
||||
...snippet,
|
||||
range,
|
||||
})),
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
return finalSnippets;
|
||||
}
|
@ -1,4 +1,76 @@
|
||||
/*
|
||||
记录最近修改的内容,用于代码补全
|
||||
*/
|
||||
import { logger } from '../../util/logger';
|
||||
import * as vscode from 'vscode';
|
||||
import { collapseFile } from './ast/collapseBlock';
|
||||
import { getCommentPrefix } from './ast/language';
|
||||
|
||||
|
||||
export class RecentEdit {
|
||||
fileName: string;
|
||||
content: string;
|
||||
collapseContent: string;
|
||||
|
||||
constructor(fileName: string, content: string) {
|
||||
this.fileName = fileName;
|
||||
this.content = content;
|
||||
this.collapseContent = "";
|
||||
}
|
||||
|
||||
async close() {
|
||||
// collapse file
|
||||
this.collapseContent = await collapseFile(this.fileName, this.content);
|
||||
}
|
||||
|
||||
async update(content: string) {
|
||||
this.content = content;
|
||||
this.collapseContent = "";
|
||||
}
|
||||
}
|
||||
|
||||
export class RecentEditsManager {
|
||||
private edits: RecentEdit[];
|
||||
private maxCount: number = 10;
|
||||
|
||||
constructor() {
|
||||
this.edits = [];
|
||||
|
||||
vscode.workspace.onDidChangeTextDocument(e => {
|
||||
if (e.document.uri.scheme !== "file") {
|
||||
return;
|
||||
}
|
||||
// logger.channel()?.info(`onDidChangeTextDocument: ${e.document.fileName}`);
|
||||
// find edit
|
||||
let edit = this.edits.find(editFile => editFile.fileName === e.document.fileName);
|
||||
if (edit) {
|
||||
edit.update(e.document.getText());
|
||||
} else {
|
||||
this.edits.push(new RecentEdit(e.document.fileName, e.document.getText()));
|
||||
}
|
||||
});
|
||||
|
||||
// onDidChangeActiveTextEditor: Event<TextEditor | undefined>
|
||||
vscode.window.onDidChangeActiveTextEditor(e => {
|
||||
if (e) {
|
||||
// logger.channel()?.info(`onDidChangeActiveTextEditor: ${e.document.fileName}`);
|
||||
// close last edit
|
||||
this.edits.forEach(edit => {
|
||||
edit.close();
|
||||
});
|
||||
// move edit with the same file name to the end of the list
|
||||
let edit = this.edits.find(editFile => editFile.fileName === e.document.fileName);
|
||||
if (edit) {
|
||||
this.edits.splice(this.edits.indexOf(edit), 1);
|
||||
this.edits.push(edit);
|
||||
} else {
|
||||
this.edits.push(new RecentEdit(e.document.fileName, e.document.getText()));
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
public getEdits(): RecentEdit[] {
|
||||
return this.edits;
|
||||
}
|
||||
}
|
||||
|
89
src/contributes/codecomplete/utils.ts
Normal file
89
src/contributes/codecomplete/utils.ts
Normal file
@ -0,0 +1,89 @@
|
||||
import * as vscode from 'vscode';
|
||||
|
||||
|
||||
export interface Range {
|
||||
start: Position;
|
||||
end: Position;
|
||||
}
|
||||
export interface Position {
|
||||
line: number;
|
||||
character: number;
|
||||
}
|
||||
|
||||
export interface RangeInFile {
|
||||
filepath: string;
|
||||
range: Range;
|
||||
}
|
||||
|
||||
export async function readRangeInFile(
|
||||
filepath: string,
|
||||
range: vscode.Range
|
||||
): Promise<string> {
|
||||
const contents = new TextDecoder().decode(
|
||||
await vscode.workspace.fs.readFile(vscode.Uri.file(filepath))
|
||||
);
|
||||
const lines = contents.split("\n");
|
||||
return (
|
||||
lines.slice(range.start.line, range.end.line).join("\n") +
|
||||
"\n" +
|
||||
lines[
|
||||
range.end.line < lines.length - 1 ? range.end.line : lines.length - 1
|
||||
].slice(0, range.end.character)
|
||||
);
|
||||
}
|
||||
|
||||
export async function readFileByVSCode(filepath: string): Promise<string> {
|
||||
const contents = new TextDecoder().decode(
|
||||
await vscode.workspace.fs.readFile(vscode.Uri.file(filepath))
|
||||
);
|
||||
|
||||
return contents;
|
||||
}
|
||||
|
||||
export async function readRangesInFileContents( contents: string, lines: string[], range: Range ) {
|
||||
if (!lines) {
|
||||
lines = contents.split("\n");
|
||||
}
|
||||
|
||||
if (range.start.line < range.end.line) {
|
||||
// TODO
|
||||
// handle start column
|
||||
return (
|
||||
lines.slice(range.start.line, range.end.line).join("\n") +
|
||||
"\n" +
|
||||
lines[
|
||||
range.end.line < lines.length - 1 ? range.end.line : lines.length - 1
|
||||
].slice(0, range.end.character)
|
||||
);
|
||||
} else {
|
||||
// TODO
|
||||
// handle start column
|
||||
return lines[
|
||||
range.end.line < lines.length - 1 ? range.end.line : lines.length - 1
|
||||
].slice(0, range.end.character);
|
||||
}
|
||||
}
|
||||
|
||||
export async function readRangesInFile(
|
||||
filepath: string,
|
||||
ranges: Range[]
|
||||
): Promise<string[]> {
|
||||
const contents = new TextDecoder().decode(
|
||||
await vscode.workspace.fs.readFile(vscode.Uri.file(filepath))
|
||||
);
|
||||
const lines = contents.split("\n");
|
||||
|
||||
const result: string[] = [];
|
||||
for (const range of ranges) {
|
||||
result.push(
|
||||
(
|
||||
lines.slice(range.start.line, range.end.line).join("\n") +
|
||||
"\n" +
|
||||
lines[
|
||||
range.end.line < lines.length - 1 ? range.end.line : lines.length - 1
|
||||
].slice(0, range.end.character)
|
||||
)
|
||||
);
|
||||
}
|
||||
return result;
|
||||
}
|
@ -29,5 +29,3 @@ export async function getContextDetail(message: any, panel: vscode.WebviewPanel
|
||||
logger.channel()?.error(`Error reading file ${message.file}: ${error}`);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -7,10 +7,13 @@ import { logger } from './logger';
|
||||
export class DevChatConfig {
|
||||
private configFilePath: string;
|
||||
private data: any;
|
||||
// last modify timestamp of the config file
|
||||
private lastModifyTime: number;
|
||||
|
||||
constructor() {
|
||||
// 视操作系统的差异,可能需要调整路径 ~/.chat/config.yml
|
||||
this.configFilePath = path.join(process.env.HOME || process.env.USERPROFILE || '', '.chat', 'config.yml');
|
||||
this.lastModifyTime = 0;
|
||||
this.readConfigFile();
|
||||
}
|
||||
|
||||
@ -18,6 +21,7 @@ export class DevChatConfig {
|
||||
try {
|
||||
const fileContents = fs.readFileSync(this.configFilePath, 'utf8');
|
||||
this.data = yaml.parse(fileContents);
|
||||
this.lastModifyTime = fs.statSync(this.configFilePath).mtimeMs;
|
||||
} catch (error) {
|
||||
logger.channel()?.error(`Error reading the config file: ${error}`);
|
||||
logger.channel()?.show();
|
||||
@ -36,6 +40,12 @@ export class DevChatConfig {
|
||||
}
|
||||
|
||||
public get(key: string | string[]): any {
|
||||
// check if the config file has been modified
|
||||
const currentModifyTime = fs.statSync(this.configFilePath).mtimeMs;
|
||||
if (currentModifyTime > this.lastModifyTime) {
|
||||
this.readConfigFile();
|
||||
}
|
||||
|
||||
let keys: string[] = [];
|
||||
|
||||
if (typeof key === 'string') {
|
||||
@ -64,6 +74,12 @@ export class DevChatConfig {
|
||||
}
|
||||
|
||||
public getAll(): any {
|
||||
// check if the config file has been modified
|
||||
const currentModifyTime = fs.statSync(this.configFilePath).mtimeMs;
|
||||
if (currentModifyTime > this.lastModifyTime) {
|
||||
this.readConfigFile();
|
||||
}
|
||||
|
||||
return this.data;
|
||||
}
|
||||
|
||||
|
15
yarn.lock
15
yarn.lock
@ -5030,6 +5030,11 @@ listenercount@~1.0.1:
|
||||
resolved "https://registry.npmjs.org/listenercount/-/listenercount-1.0.1.tgz"
|
||||
integrity sha512-3mk/Zag0+IJxeDrxSgaDPy4zZ3w05PRZeJNnlWhzFz5OkX49J4krc+A8X2d2M69vGMBEX0uyl8M+W+8gH+kBqQ==
|
||||
|
||||
llama-tokenizer-js@^1.2.1:
|
||||
version "1.2.1"
|
||||
resolved "https://registry.npmjs.org/llama-tokenizer-js/-/llama-tokenizer-js-1.2.1.tgz"
|
||||
integrity sha512-SEVVc++cXR0D0Wv30AzMVWzPCAKM701vZYU31h5lCTIn4k5cfZpJ070YDcb2nPq2Ts3xgu44L19wIrq1z/XjXQ==
|
||||
|
||||
loader-runner@^4.2.0:
|
||||
version "4.3.0"
|
||||
resolved "https://registry.npmjs.org/loader-runner/-/loader-runner-4.3.0.tgz"
|
||||
@ -6624,6 +6629,11 @@ tree-kill@^1.2.2:
|
||||
resolved "https://registry.npmjs.org/tree-kill/-/tree-kill-1.2.2.tgz"
|
||||
integrity sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A==
|
||||
|
||||
tree-sitter-wasms@^0.1.7:
|
||||
version "0.1.7"
|
||||
resolved "https://registry.npmjs.org/tree-sitter-wasms/-/tree-sitter-wasms-0.1.7.tgz"
|
||||
integrity sha512-/EnmSDDqEnSnIuCqVhSOc14T8r792LmiztqlZMFSRg+4ACrgUWNLvEr2AUP1K76XCJrvuH8liSWfkdQpK41hIA==
|
||||
|
||||
trough@^2.0.0:
|
||||
version "2.1.0"
|
||||
resolved "https://registry.npmjs.org/trough/-/trough-2.1.0.tgz"
|
||||
@ -7022,6 +7032,11 @@ web-streams-polyfill@^3.0.3:
|
||||
resolved "https://registry.npmjs.org/web-streams-polyfill/-/web-streams-polyfill-3.2.1.tgz"
|
||||
integrity sha512-e0MO3wdXWKrLbL0DgGnUV7WHVuw9OUvL4hjgnPkIeEvESk74gAITi5G606JtZPp39cd8HA9VQzCIvA49LpPN5Q==
|
||||
|
||||
web-tree-sitter@^0.22.2:
|
||||
version "0.22.2"
|
||||
resolved "https://registry.npmjs.org/web-tree-sitter/-/web-tree-sitter-0.22.2.tgz"
|
||||
integrity sha512-gY/aw1wNQbTU0m7U5PojDOftsKMbBIpusJ9Ia/y7sfIN4jk9xXLHwQs+eQ3QP772IdjxVqje0hRTCh5CzrWeGg==
|
||||
|
||||
webpack-cli@^5.0.1, webpack-cli@5.x.x:
|
||||
version "5.0.1"
|
||||
resolved "https://registry.npmjs.org/webpack-cli/-/webpack-cli-5.0.1.tgz"
|
||||
|
Loading…
x
Reference in New Issue
Block a user