fix: tiktoken performance

This commit is contained in:
Jing Hua 2023-03-25 23:06:27 +08:00
parent 6a6d2fe819
commit 7a4afd265b

View file

@ -3,6 +3,17 @@ import { MessageInterface, ModelOptions } from '@type/chat';
import { Tiktoken } from '@dqbd/tiktoken/lite'; import { Tiktoken } from '@dqbd/tiktoken/lite';
const cl100k_base = await import('@dqbd/tiktoken/encoders/cl100k_base.json'); const cl100k_base = await import('@dqbd/tiktoken/encoders/cl100k_base.json');
const encoder = new Tiktoken(
cl100k_base.bpe_ranks,
{
...cl100k_base.special_tokens,
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266,
},
cl100k_base.pat_str
);
// https://github.com/dqbd/tiktoken/issues/23#issuecomment-1483317174 // https://github.com/dqbd/tiktoken/issues/23#issuecomment-1483317174
export const getChatGPTEncoding = ( export const getChatGPTEncoding = (
messages: MessageInterface[], messages: MessageInterface[],
@ -10,17 +21,6 @@ export const getChatGPTEncoding = (
) => { ) => {
const isGpt3 = model === 'gpt-3.5-turbo'; const isGpt3 = model === 'gpt-3.5-turbo';
const encoder = new Tiktoken(
cl100k_base.bpe_ranks,
{
...cl100k_base.special_tokens,
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266,
},
cl100k_base.pat_str
);
const msgSep = isGpt3 ? '\n' : ''; const msgSep = isGpt3 ? '\n' : '';
const roleSep = isGpt3 ? '\n' : '<|im_sep|>'; const roleSep = isGpt3 ? '\n' : '<|im_sep|>';