diff --git a/src/utils/messageUtils.ts b/src/utils/messageUtils.ts index 21f0ec4..b971e5b 100644 --- a/src/utils/messageUtils.ts +++ b/src/utils/messageUtils.ts @@ -1,6 +1,7 @@ import { MessageInterface, ModelOptions } from '@type/chat'; -import { encoding_for_model } from '@dqbd/tiktoken'; +import { Tiktoken } from '@dqbd/tiktoken/lite'; +const cl100k_base = await import('@dqbd/tiktoken/encoders/cl100k_base.json'); // https://github.com/dqbd/tiktoken/issues/23#issuecomment-1483317174 export const getChatGPTEncoding = ( @@ -9,11 +10,16 @@ export const getChatGPTEncoding = ( ) => { const isGpt3 = model === 'gpt-3.5-turbo'; - const encoder = encoding_for_model(model, { - '<|im_start|>': 100264, - '<|im_end|>': 100265, - '<|im_sep|>': 100266, - }); + const encoder = new Tiktoken( + cl100k_base.bpe_ranks, + { + ...cl100k_base.special_tokens, + '<|im_start|>': 100264, + '<|im_end|>': 100265, + '<|im_sep|>': 100266, + }, + cl100k_base.pat_str + ); const msgSep = isGpt3 ? '\n' : ''; const roleSep = isGpt3 ? '\n' : '<|im_sep|>';