From 7a4afd265b58439bc9b374e747d4716f69af3dcd Mon Sep 17 00:00:00 2001 From: Jing Hua Date: Sat, 25 Mar 2023 23:06:27 +0800 Subject: [PATCH] fix: tiktoken performance --- src/utils/messageUtils.ts | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) diff --git a/src/utils/messageUtils.ts b/src/utils/messageUtils.ts index b971e5b..afccf4b 100644 --- a/src/utils/messageUtils.ts +++ b/src/utils/messageUtils.ts @@ -3,6 +3,17 @@ import { MessageInterface, ModelOptions } from '@type/chat'; import { Tiktoken } from '@dqbd/tiktoken/lite'; const cl100k_base = await import('@dqbd/tiktoken/encoders/cl100k_base.json'); +const encoder = new Tiktoken( + cl100k_base.bpe_ranks, + { + ...cl100k_base.special_tokens, + '<|im_start|>': 100264, + '<|im_end|>': 100265, + '<|im_sep|>': 100266, + }, + cl100k_base.pat_str +); + // https://github.com/dqbd/tiktoken/issues/23#issuecomment-1483317174 export const getChatGPTEncoding = ( messages: MessageInterface[], @@ -10,17 +21,6 @@ export const getChatGPTEncoding = ( ) => { const isGpt3 = model === 'gpt-3.5-turbo'; - const encoder = new Tiktoken( - cl100k_base.bpe_ranks, - { - ...cl100k_base.special_tokens, - '<|im_start|>': 100264, - '<|im_end|>': 100265, - '<|im_sep|>': 100266, - }, - cl100k_base.pat_str - ); - const msgSep = isGpt3 ? '\n' : ''; const roleSep = isGpt3 ? '\n' : '<|im_sep|>';