change tokeniser to tiktoken

This commit is contained in:
Jing Hua 2023-03-25 21:12:40 +08:00
parent 01b6fc7479
commit 9cff66f783
9 changed files with 163 additions and 50490 deletions

View file

@ -9,6 +9,7 @@
"preview": "vite preview"
},
"dependencies": {
"@dqbd/tiktoken": "^1.0.2",
"html2canvas": "^1.4.1",
"i18next": "^22.4.11",
"i18next-browser-languagedetector": "^7.0.1",
@ -29,7 +30,6 @@
},
"devDependencies": {
"@tailwindcss/typography": "^0.5.9",
"@types/dompurify": "^2.4.0",
"@types/react": "^18.0.27",
"@types/react-dom": "^18.0.10",
"@types/react-scroll-to-bottom": "^4.2.0",
@ -39,6 +39,8 @@
"postcss": "^8.4.21",
"tailwindcss": "^3.2.7",
"typescript": "^4.9.3",
"vite": "^4.1.0"
"vite": "^4.1.0",
"vite-plugin-top-level-await": "^1.3.0",
"vite-plugin-wasm": "^3.2.2"
}
}

View file

@ -2,7 +2,7 @@ import React, { useEffect, useState } from 'react';
import useStore from '@store/store';
import { shallow } from 'zustand/shallow';
import { countMessagesToken } from '@utils/messageUtils';
import countTokens from '@utils/messageUtils';
const TokenCount = React.memo(() => {
const [tokenCount, setTokenCount] = useState<number>(0);
@ -13,8 +13,14 @@ const TokenCount = React.memo(() => {
shallow
);
const model = useStore((state) =>
state.chats
? state.chats[state.currentChatIndex].config.model
: 'gpt-3.5-turbo'
);
useEffect(() => {
if (!generating) setTokenCount(countMessagesToken(messages));
if (!generating) setTokenCount(countTokens(messages, model));
}, [messages, generating]);
return (

View file

@ -16,10 +16,10 @@ Current date: ${dateString}`;
export const modelOptions: ModelOptions[] = [
'gpt-3.5-turbo',
// 'gpt-3.5-turbo-0301',
'gpt-4',
// 'gpt-4-0314',
'gpt-4-32k',
// 'gpt-3.5-turbo-0301',
// 'gpt-4-0314',
// 'gpt-4-32k-0314',
];

View file

@ -55,9 +55,13 @@ const useSubmit = () => {
try {
let stream;
if (chats[currentChatIndex].messages.length === 0)
throw new Error('No messages submitted!');
const messages = limitMessageTokens(
chats[currentChatIndex].messages,
chats[currentChatIndex].config.max_tokens
chats[currentChatIndex].config.max_tokens,
chats[currentChatIndex].config.model
);
if (messages.length === 0) throw new Error('Message exceed max token!');

View file

@ -25,13 +25,10 @@ export interface ConfigInterface {
frequency_penalty: number;
}
export type ModelOptions =
| 'gpt-4'
| 'gpt-4-0314'
| 'gpt-4-32k'
| 'gpt-4-32k-0314'
| 'gpt-3.5-turbo'
| 'gpt-3.5-turbo-0301';
export type ModelOptions = 'gpt-4' | 'gpt-4-32k' | 'gpt-3.5-turbo';
// | 'gpt-3.5-turbo-0301';
// | 'gpt-4-0314'
// | 'gpt-4-32k-0314'
export interface LocalStorageInterfaceV0ToV1 {
chats: ChatInterface[];

File diff suppressed because one or more lines are too long

View file

@ -1,15 +1,50 @@
import { MessageInterface } from '@type/chat';
import countTokens from './countTokens';
import { MessageInterface, ModelOptions } from '@type/chat';
import { encoding_for_model } from '@dqbd/tiktoken';
// https://github.com/dqbd/tiktoken/issues/23#issuecomment-1483317174
export const getChatGPTEncoding = (
messages: MessageInterface[],
model: ModelOptions
) => {
const isGpt3 = model === 'gpt-3.5-turbo';
const encoder = encoding_for_model(model, {
'<|im_start|>': 100264,
'<|im_end|>': 100265,
'<|im_sep|>': 100266,
});
const msgSep = isGpt3 ? '\n' : '';
const roleSep = isGpt3 ? '\n' : '<|im_sep|>';
const serialized = [
messages
.map(({ role, content }) => {
return `<|im_start|>${role}${roleSep}${content}<|im_end|>`;
})
.join(msgSep),
`<|im_start|>assistant${roleSep}`,
].join(msgSep);
return encoder.encode(serialized, 'all');
};
const countTokens = (messages: MessageInterface[], model: ModelOptions) => {
if (messages.length === 0) return 0;
return getChatGPTEncoding(messages, model).length;
};
export const limitMessageTokens = (
messages: MessageInterface[],
limit: number = 4096
limit: number = 4096,
model: ModelOptions
): MessageInterface[] => {
const limitedMessages: MessageInterface[] = [];
let tokenCount = 0;
for (let i = messages.length - 1; i >= 0; i--) {
const count = countTokens(messages[i].content);
const count = countTokens([messages[i]], model);
if (count + tokenCount > limit) break;
tokenCount += count;
limitedMessages.unshift({ ...messages[i] });
@ -18,9 +53,4 @@ export const limitMessageTokens = (
return limitedMessages;
};
export const countMessagesToken = (messages: MessageInterface[]) => {
return messages.reduce(
(tokenCount, message) => (tokenCount += countTokens(message.content)),
0
);
};
export default countTokens;

View file

@ -1,9 +1,11 @@
import { defineConfig } from 'vite';
import react from '@vitejs/plugin-react-swc';
import wasm from 'vite-plugin-wasm';
import topLevelAwait from 'vite-plugin-top-level-await';
// https://vitejs.dev/config/
export default defineConfig({
plugins: [react()],
plugins: [react(), wasm(), topLevelAwait()],
resolve: {
alias: {
'@icon/': new URL('./src/assets/icons/', import.meta.url).pathname,
@ -17,5 +19,5 @@ export default defineConfig({
'@src/': new URL('./src/', import.meta.url).pathname,
},
},
base: "./"
base: './',
});

109
yarn.lock
View file

@ -59,6 +59,11 @@
"@babel/helper-validator-identifier" "^7.19.1"
to-fast-properties "^2.0.0"
"@dqbd/tiktoken@^1.0.2":
version "1.0.2"
resolved "https://registry.yarnpkg.com/@dqbd/tiktoken/-/tiktoken-1.0.2.tgz#f180afc887426343bab2adc41106d04dbd3627c0"
integrity sha512-AjGTBRWsMoVmVeN55NLyupyM8TNamOUBl6tj5t/leLDVup3CFGO9tVagNL1jf3GyZLkWZSTmYVbPQ/M2LEcNzw==
"@emotion/babel-plugin@^11.0.0":
version "11.10.6"
resolved "https://registry.yarnpkg.com/@emotion/babel-plugin/-/babel-plugin-11.10.6.tgz#a68ee4b019d661d6f37dec4b8903255766925ead"
@ -270,56 +275,127 @@
"@nodelib/fs.scandir" "2.1.5"
fastq "^1.6.0"
"@rollup/plugin-virtual@^3.0.1":
version "3.0.1"
resolved "https://registry.yarnpkg.com/@rollup/plugin-virtual/-/plugin-virtual-3.0.1.tgz#cea7e489481cc0ca91516c047f8c53c1cfb1adf6"
integrity sha512-fK8O0IL5+q+GrsMLuACVNk2x21g3yaw+sG2qn16SnUd3IlBsQyvWxLMGHmCmXRMecPjGRSZ/1LmZB4rjQm68og==
"@swc/core-darwin-arm64@1.3.37":
version "1.3.37"
resolved "https://registry.yarnpkg.com/@swc/core-darwin-arm64/-/core-darwin-arm64-1.3.37.tgz#a92e075ae35f18a64aaf3823ea175f03564f8da1"
integrity sha512-iIyVqqioUpVeT/hbBVfkrsjfCyL4idNH+LVKGmoTAWaTTSB0+UNhNuA7Wh2CqIHWh1Mv7IlumitWPcqsVDdoEw==
"@swc/core-darwin-arm64@1.3.42":
version "1.3.42"
resolved "https://registry.yarnpkg.com/@swc/core-darwin-arm64/-/core-darwin-arm64-1.3.42.tgz#fabb645b288199b730d846e3eda370b77f5ebe9f"
integrity sha512-hM6RrZFyoCM9mX3cj/zM5oXwhAqjUdOCLXJx7KTQps7NIkv/Qjvobgvyf2gAb89j3ARNo9NdIoLjTjJ6oALtiA==
"@swc/core-darwin-x64@1.3.37":
version "1.3.37"
resolved "https://registry.yarnpkg.com/@swc/core-darwin-x64/-/core-darwin-x64-1.3.37.tgz#a3cc06c87140a2ca0b8e7ef1f3d5cc34dd080429"
integrity sha512-dao5nXPWKxtaxqak4ZkRyBoApNIelW/glantQhPhj0FjMjuIQc+v03ldJ8XDByWOG+6xuVUTheANCtEccxoQBw==
"@swc/core-darwin-x64@1.3.42":
version "1.3.42"
resolved "https://registry.yarnpkg.com/@swc/core-darwin-x64/-/core-darwin-x64-1.3.42.tgz#dcd434ec8dda6f2178a10da0def036a071a6e008"
integrity sha512-bjsWtHMb6wJK1+RGlBs2USvgZ0txlMk11y0qBLKo32gLKTqzUwRw0Fmfzuf6Ue2a/w//7eqMlPFEre4LvJajGw==
"@swc/core-linux-arm-gnueabihf@1.3.37":
version "1.3.37"
resolved "https://registry.yarnpkg.com/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.3.37.tgz#f7d8f8523830c6be653f608839d4bd5598457f1f"
integrity sha512-/mVrc8H/f062CUkqKGmBiil2VIYu4mKawHxERfeP1y38X5K/OwjG5s9MgO9TVxy+Ly6vejwj70kRhSa3hVp1Bw==
"@swc/core-linux-arm-gnueabihf@1.3.42":
version "1.3.42"
resolved "https://registry.yarnpkg.com/@swc/core-linux-arm-gnueabihf/-/core-linux-arm-gnueabihf-1.3.42.tgz#59c57b15113d316e8a4a6d690a6c09429483d201"
integrity sha512-Oe0ggMz3MyqXNfeVmY+bBTL0hFSNY3bx8dhcqsh4vXk/ZVGse94QoC4dd92LuPHmKT0x6nsUzB86x2jU9QHW5g==
"@swc/core-linux-arm64-gnu@1.3.37":
version "1.3.37"
resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.3.37.tgz#b162febd9de14fb08000c722b063be2bb5aefa6b"
integrity sha512-eRQ3KaZI0j5LidTfOIi/kUVOOMuVmw1HCdt/Z1TAUKoHMLVxY8xcJ3pEE3/+ednI60EmHpwpJRs6LelXyL6uzQ==
"@swc/core-linux-arm64-gnu@1.3.42":
version "1.3.42"
resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-gnu/-/core-linux-arm64-gnu-1.3.42.tgz#50d026b9f4d7a5f25deacc8c8dd45fc12be70a95"
integrity sha512-ZJsa8NIW1RLmmHGTJCbM7OPSbBZ9rOMrLqDtUOGrT0uoJXZnnQqolflamB5wviW0X6h3Z3/PSTNGNDCJ3u3Lqg==
"@swc/core-linux-arm64-musl@1.3.37":
version "1.3.37"
resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.3.37.tgz#3b1a628e880fbb1a5e2a7a46d42e8aa878c6bfdd"
integrity sha512-w2BRLODyxNQY2rfHZMZ5ir6QrrnGBPlnIslTrgKmVbn1OjZoxUCtuqhrYnCmybaAc4DOkeH02TqynEFXrm+EMw==
"@swc/core-linux-arm64-musl@1.3.42":
version "1.3.42"
resolved "https://registry.yarnpkg.com/@swc/core-linux-arm64-musl/-/core-linux-arm64-musl-1.3.42.tgz#3c0e51b0709dcf06289949803c9a36a46a97827c"
integrity sha512-YpZwlFAfOp5vkm/uVUJX1O7N3yJDO1fDQRWqsOPPNyIJkI2ydlRQtgN6ZylC159Qv+TimfXnGTlNr7o3iBAqjg==
"@swc/core-linux-x64-gnu@1.3.37":
version "1.3.37"
resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.3.37.tgz#ed443ad77dc90e415267d02a38e4113047b2d3d8"
integrity sha512-CfoH8EsZJZ9kunjMUjBNYD5fFuO86zw+K/o4wEw72Yg6ZEiqPmeIlCKU8tpTv4sK+CbhUXrmVzMB5tqsb2jALQ==
"@swc/core-linux-x64-gnu@1.3.42":
version "1.3.42"
resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-gnu/-/core-linux-x64-gnu-1.3.42.tgz#059ac0acddebd0360851871929a14dbacf74f865"
integrity sha512-0ccpKnsZbyHBzaQFdP8U9i29nvOfKitm6oJfdJzlqsY/jCqwvD8kv2CAKSK8WhJz//ExI2LqNrDI0yazx5j7+A==
"@swc/core-linux-x64-musl@1.3.37":
version "1.3.37"
resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.3.37.tgz#de607a4985458bd6e8b0e40f0d62d0e26bd8df1e"
integrity sha512-9YPrHYNdoG7PK11gV51GfL45biI2dic+YTqHUDKyykemsD7Ot1zUFX7Ty//pdvpKcKSff6SrHbfFACD5ziNirA==
"@swc/core-linux-x64-musl@1.3.42":
version "1.3.42"
resolved "https://registry.yarnpkg.com/@swc/core-linux-x64-musl/-/core-linux-x64-musl-1.3.42.tgz#7a61093d93a3abc2f893b7d31fd6c22c4cab2212"
integrity sha512-7eckRRuTZ6+3K21uyfXXgc2ZCg0mSWRRNwNT3wap2bYkKPeqTgb8pm8xYSZNEiMuDonHEat6XCCV36lFY6kOdQ==
"@swc/core-win32-arm64-msvc@1.3.37":
version "1.3.37"
resolved "https://registry.yarnpkg.com/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.3.37.tgz#d5851a47d7df183929b9746d56f76c282f940e6a"
integrity sha512-h17Ek8/wCDje6BrXOvCXBM80oBRmTSMMdLyt87whTl5xqYlWYYs9oQIzZndNRTlNpTgjGO8Ns2eo4kwVxIkBIA==
"@swc/core-win32-arm64-msvc@1.3.42":
version "1.3.42"
resolved "https://registry.yarnpkg.com/@swc/core-win32-arm64-msvc/-/core-win32-arm64-msvc-1.3.42.tgz#12f92c960ea801aa26ffa5b91d369ac24c2a3cca"
integrity sha512-t27dJkdw0GWANdN4TV0lY/V5vTYSx5SRjyzzZolep358ueCGuN1XFf1R0JcCbd1ojosnkQg2L7A7991UjXingg==
"@swc/core-win32-ia32-msvc@1.3.37":
version "1.3.37"
resolved "https://registry.yarnpkg.com/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.3.37.tgz#06ad7016f61b56aec4abf60eab3a91b786f9e294"
integrity sha512-1BR175E1olGy/zdt94cgdb6ps/lBNissAOaxyBk8taFpcjy3zpdP30yAoH0GIsC6isnZ5JfArbOJNRXXO5tE0Q==
"@swc/core-win32-ia32-msvc@1.3.42":
version "1.3.42"
resolved "https://registry.yarnpkg.com/@swc/core-win32-ia32-msvc/-/core-win32-ia32-msvc-1.3.42.tgz#be022aff03838515fa5506be300f0ea15f3fb476"
integrity sha512-xfpc/Zt/aMILX4IX0e3loZaFyrae37u3MJCv1gJxgqrpeLi7efIQr3AmERkTK3mxTO6R5urSliWw2W3FyZ7D3Q==
"@swc/core-win32-x64-msvc@1.3.37":
version "1.3.37"
resolved "https://registry.yarnpkg.com/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.3.37.tgz#60139a7089003a7447a4efef9704ae8fde21995e"
integrity sha512-1siDQ7dccQ1pesJmgAL3BUBbRPtfbNInOWnZOkiie/DfFqGQ117QKnCVyjUvwFKfTQx1+3UUTDmMSlRd00SlXg==
"@swc/core-win32-x64-msvc@1.3.42":
version "1.3.42"
resolved "https://registry.yarnpkg.com/@swc/core-win32-x64-msvc/-/core-win32-x64-msvc-1.3.42.tgz#fccac26974f03234e502276389f4330e2696887f"
integrity sha512-ra2K4Tu++EJLPhzZ6L8hWUsk94TdK/2UKhL9dzCBhtzKUixsGCEqhtqH1zISXNvW8qaVLFIMUP37ULe80/IJaA==
"@swc/core@^1.3.10":
version "1.3.42"
resolved "https://registry.yarnpkg.com/@swc/core/-/core-1.3.42.tgz#7067c4fd9a02536f9ca7b54ed8ebc45e2df810cf"
integrity sha512-nVFUd5+7tGniM2cT3LXaqnu3735Cu4az8A9gAKK+8sdpASI52SWuqfDBmjFCK9xG90MiVDVp2PTZr0BWqCIzpw==
optionalDependencies:
"@swc/core-darwin-arm64" "1.3.42"
"@swc/core-darwin-x64" "1.3.42"
"@swc/core-linux-arm-gnueabihf" "1.3.42"
"@swc/core-linux-arm64-gnu" "1.3.42"
"@swc/core-linux-arm64-musl" "1.3.42"
"@swc/core-linux-x64-gnu" "1.3.42"
"@swc/core-linux-x64-musl" "1.3.42"
"@swc/core-win32-arm64-msvc" "1.3.42"
"@swc/core-win32-ia32-msvc" "1.3.42"
"@swc/core-win32-x64-msvc" "1.3.42"
"@swc/core@^1.3.35":
version "1.3.37"
resolved "https://registry.yarnpkg.com/@swc/core/-/core-1.3.37.tgz#644653fa7deb20c7c342e7fd019c7abc44ecf1bf"
@ -353,13 +429,6 @@
dependencies:
"@types/ms" "*"
"@types/dompurify@^2.4.0":
version "2.4.0"
resolved "https://registry.yarnpkg.com/@types/dompurify/-/dompurify-2.4.0.tgz#fd9706392a88e0e0e6d367f3588482d817df0ab9"
integrity sha512-IDBwO5IZhrKvHFUl+clZxgf3hn2b/lU6H1KaBShPkQyGJUQ0xwebezIPSuiyGwfz1UzJWQl4M7BDxtHtCCPlTg==
dependencies:
"@types/trusted-types" "*"
"@types/hast@^2.0.0":
version "2.3.4"
resolved "https://registry.yarnpkg.com/@types/hast/-/hast-2.3.4.tgz#8aa5ef92c117d20d974a82bdfb6a648b08c0bafc"
@ -427,11 +496,6 @@
resolved "https://registry.yarnpkg.com/@types/scheduler/-/scheduler-0.16.2.tgz#1a62f89525723dde24ba1b01b092bf5df8ad4d39"
integrity sha512-hppQEBDmlwhFAXKJX2KnWLYu5yMfi91yazPb2l+lbJiwW+wdo1gNeRA+3RgNSO39WYX2euey41KEwnqesU2Jew==
"@types/trusted-types@*":
version "2.0.3"
resolved "https://registry.yarnpkg.com/@types/trusted-types/-/trusted-types-2.0.3.tgz#a136f83b0758698df454e328759dbd3d44555311"
integrity sha512-NfQ4gyz38SL8sDNrSixxU2Os1a5xcdFxipAFxYEuLUlvU2uDwS4NUpsImcf1//SlWItCVMMLiylsxbmNMToV/g==
"@types/unist@*", "@types/unist@^2.0.0":
version "2.0.6"
resolved "https://registry.yarnpkg.com/@types/unist/-/unist-2.0.6.tgz#250a7b16c3b91f672a24552ec64678eeb1d3a08d"
@ -661,11 +725,16 @@ core-js@3.18.3:
resolved "https://registry.yarnpkg.com/core-js/-/core-js-3.18.3.tgz#86a0bba2d8ec3df860fefcc07a8d119779f01509"
integrity sha512-tReEhtMReZaPFVw7dajMx0vlsz3oOb8ajgPoHVYGxr8ErnZ6PcYEvvmjGmXlfpnxpkYSdOQttjB+MvVbCGfvLw==
core-js@^3.6.0, core-js@^3.8.3:
core-js@^3.6.0:
version "3.29.0"
resolved "https://registry.yarnpkg.com/core-js/-/core-js-3.29.0.tgz#0273e142b67761058bcde5615c503c7406b572d6"
integrity sha512-VG23vuEisJNkGl6XQmFJd3rEG/so/CNatqeE+7uZAwTSwFeB/qaO0be8xZYUNWprJ/GIwL8aMt9cj1kvbpTZhg==
core-js@^3.8.3:
version "3.29.1"
resolved "https://registry.yarnpkg.com/core-js/-/core-js-3.29.1.tgz#40ff3b41588b091aaed19ca1aa5cb111803fa9a6"
integrity sha512-+jwgnhg6cQxKYIIjGtAHq2nwUOolo9eoFZ4sHfUH09BLXBgxnH4gA0zEd+t+BO2cNB8idaBtZFcFTRjQJRJmAw==
cosmiconfig@^7.0.0:
version "7.1.0"
resolved "https://registry.yarnpkg.com/cosmiconfig/-/cosmiconfig-7.1.0.tgz#1443b9afa596b670082ea46cbd8f6a62b84635f6"
@ -2299,6 +2368,20 @@ vfile@^5.0.0:
unist-util-stringify-position "^3.0.0"
vfile-message "^3.0.0"
vite-plugin-top-level-await@^1.3.0:
version "1.3.0"
resolved "https://registry.yarnpkg.com/vite-plugin-top-level-await/-/vite-plugin-top-level-await-1.3.0.tgz#83c73b5aed33a3819d85432da27f462218cfb3f5"
integrity sha512-owIfsgWudMlQODWJSwp0sQB3AZZu3qsMygeBjZy8CyjEk6OB9AGd8lHqmgwrcEqgvy9N58lYxSBLVk3/4ejEiA==
dependencies:
"@rollup/plugin-virtual" "^3.0.1"
"@swc/core" "^1.3.10"
uuid "^9.0.0"
vite-plugin-wasm@^3.2.2:
version "3.2.2"
resolved "https://registry.yarnpkg.com/vite-plugin-wasm/-/vite-plugin-wasm-3.2.2.tgz#7a66fef27733a0dea9b2b14f942a6389a2523f7c"
integrity sha512-cdbBUNR850AEoMd5nvLmnyeq63CSfoP1ctD/L2vLk/5+wsgAPlAVAzUK5nGKWO/jtehNlrSSHLteN+gFQw7VOA==
vite@^4.1.0:
version "4.1.4"
resolved "https://registry.yarnpkg.com/vite/-/vite-4.1.4.tgz#170d93bcff97e0ebc09764c053eebe130bfe6ca0"