Refactor countTokens function to simplify and improve performance

2024-06-06 18:35:43 +08:00 · 2024-06-06 18:35:43 +08:00 · 144866dcba
commit 144866dcba
parent 89f8320253
1 changed files with 11 additions and 10 deletions
--- a/src/contributes/codecomplete/llm/countTokens.ts
+++ b/src/contributes/codecomplete/llm/countTokens.ts
@ -42,16 +42,17 @@ function countTokens(
  // defaults to llama2 because the tokenizer tends to produce more tokens
  modelName: string = "llama2",
 ): number {
-  const encoding = encodingForModel(modelName);
-  if (Array.isArray(content)) {
-    return content.reduce((acc, part) => {
-      return acc + part.type === "imageUrl"
-        ? countImageTokens(part)
-        : encoding.encode(part.text ?? "", "all", []).length;
-    }, 0);
-  } else {
-    return encoding.encode(content, "all", []).length;
-  }
+  return content.length;
+  // const encoding = encodingForModel(modelName);
+  // if (Array.isArray(content)) {
+  //   return content.reduce((acc, part) => {
+  //     return acc + part.type === "imageUrl"
+  //       ? countImageTokens(part)
+  //       : encoding.encode(part.text ?? "", "all", []).length;
+  //   }, 0);
+  // } else {
+  //   return encoding.encode(content, "all", []).length;
+  // }
 }

 function flattenMessages(msgs: ChatMessage[]): ChatMessage[] {