fix: tiktoken performance

This commit is contained in:
Jing Hua 2023-03-25 23:06:27 +08:00
parent 6a6d2fe819
commit 7a4afd265b

View file

@ -3,14 +3,7 @@ import { MessageInterface, ModelOptions } from '@type/chat';
import { Tiktoken } from '@dqbd/tiktoken/lite'; import { Tiktoken } from '@dqbd/tiktoken/lite';
const cl100k_base = await import('@dqbd/tiktoken/encoders/cl100k_base.json'); const cl100k_base = await import('@dqbd/tiktoken/encoders/cl100k_base.json');
// https://github.com/dqbd/tiktoken/issues/23#issuecomment-1483317174 const encoder = new Tiktoken(
export const getChatGPTEncoding = (
messages: MessageInterface[],
model: ModelOptions
) => {
const isGpt3 = model === 'gpt-3.5-turbo';
const encoder = new Tiktoken(
cl100k_base.bpe_ranks, cl100k_base.bpe_ranks,
{ {
...cl100k_base.special_tokens, ...cl100k_base.special_tokens,
@ -19,7 +12,14 @@ export const getChatGPTEncoding = (
'<|im_sep|>': 100266, '<|im_sep|>': 100266,
}, },
cl100k_base.pat_str cl100k_base.pat_str
); );
// https://github.com/dqbd/tiktoken/issues/23#issuecomment-1483317174
export const getChatGPTEncoding = (
messages: MessageInterface[],
model: ModelOptions
) => {
const isGpt3 = model === 'gpt-3.5-turbo';
const msgSep = isGpt3 ? '\n' : ''; const msgSep = isGpt3 ? '\n' : '';
const roleSep = isGpt3 ? '\n' : '<|im_sep|>'; const roleSep = isGpt3 ? '\n' : '<|im_sep|>';