mirror of
https://github.com/NovaOSS/nova-betterchat.git
synced 2024-11-29 10:14:00 +01:00
parent
8ff5079266
commit
6a6d2fe819
|
@ -1,6 +1,7 @@
|
||||||
import { MessageInterface, ModelOptions } from '@type/chat';
|
import { MessageInterface, ModelOptions } from '@type/chat';
|
||||||
|
|
||||||
import { encoding_for_model } from '@dqbd/tiktoken';
|
import { Tiktoken } from '@dqbd/tiktoken/lite';
|
||||||
|
const cl100k_base = await import('@dqbd/tiktoken/encoders/cl100k_base.json');
|
||||||
|
|
||||||
// https://github.com/dqbd/tiktoken/issues/23#issuecomment-1483317174
|
// https://github.com/dqbd/tiktoken/issues/23#issuecomment-1483317174
|
||||||
export const getChatGPTEncoding = (
|
export const getChatGPTEncoding = (
|
||||||
|
@ -9,11 +10,16 @@ export const getChatGPTEncoding = (
|
||||||
) => {
|
) => {
|
||||||
const isGpt3 = model === 'gpt-3.5-turbo';
|
const isGpt3 = model === 'gpt-3.5-turbo';
|
||||||
|
|
||||||
const encoder = encoding_for_model(model, {
|
const encoder = new Tiktoken(
|
||||||
|
cl100k_base.bpe_ranks,
|
||||||
|
{
|
||||||
|
...cl100k_base.special_tokens,
|
||||||
'<|im_start|>': 100264,
|
'<|im_start|>': 100264,
|
||||||
'<|im_end|>': 100265,
|
'<|im_end|>': 100265,
|
||||||
'<|im_sep|>': 100266,
|
'<|im_sep|>': 100266,
|
||||||
});
|
},
|
||||||
|
cl100k_base.pat_str
|
||||||
|
);
|
||||||
|
|
||||||
const msgSep = isGpt3 ? '\n' : '';
|
const msgSep = isGpt3 ? '\n' : '';
|
||||||
const roleSep = isGpt3 ? '\n' : '<|im_sep|>';
|
const roleSep = isGpt3 ? '\n' : '<|im_sep|>';
|
||||||
|
|
Loading…
Reference in a new issue