From eb552d2a694efc499bab24be9e8dba2531b1e9cb Mon Sep 17 00:00:00 2001 From: sxjeru Date: Tue, 10 Sep 2024 02:42:44 +0800 Subject: [PATCH 1/4] =?UTF-8?q?=F0=9F=90=9B=20fix:=20Gemini=20cannot=20inp?= =?UTF-8?q?ut=20images=20when=20server=20database=20is=20enabled=20(#3370)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * Update index.ts * Update index.ts * Update index.ts * Update index.ts * ♻️ refactor: refactor the google implement * ✅ test: fix tests * ✅ test: fix tests --------- Co-authored-by: Arvin Xu --- src/libs/agent-runtime/google/index.test.ts | 70 ++++++++++++++------- src/libs/agent-runtime/google/index.ts | 49 +++++++++------ src/utils/imageToBase64.ts | 16 +++++ 3 files changed, 92 insertions(+), 43 deletions(-) diff --git a/src/libs/agent-runtime/google/index.test.ts b/src/libs/agent-runtime/google/index.test.ts index 9a161e6d31c7..17298e085c3f 100644 --- a/src/libs/agent-runtime/google/index.test.ts +++ b/src/libs/agent-runtime/google/index.test.ts @@ -5,6 +5,7 @@ import OpenAI from 'openai'; import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { OpenAIChatMessage } from '@/libs/agent-runtime'; +import * as imageToBase64Module from '@/utils/imageToBase64'; import * as debugStreamModule from '../utils/debugStream'; import { LobeGoogleAI } from './index'; @@ -303,36 +304,57 @@ describe('LobeGoogleAI', () => { describe('private method', () => { describe('convertContentToGooglePart', () => { - it('should throw TypeError when image URL does not contain base64 data', () => { - // 提供一个不包含base64数据的图像URL - const invalidImageUrl = 'http://example.com/image.png'; + it('should handle URL type images', async () => { + const imageUrl = 'http://example.com/image.png'; + const mockBase64 = 'mockBase64Data'; - expect(() => + // Mock the imageUrlToBase64 function + vi.spyOn(imageToBase64Module, 'imageUrlToBase64').mockResolvedValueOnce(mockBase64); + + const result = await instance['convertContentToGooglePart']({ + type: 'image_url', + image_url: { url: imageUrl }, + }); + + expect(result).toEqual({ + inlineData: { + data: mockBase64, + mimeType: 'image/png', + }, + }); + + expect(imageToBase64Module.imageUrlToBase64).toHaveBeenCalledWith(imageUrl); + }); + + it('should throw TypeError for unsupported image URL types', async () => { + const unsupportedImageUrl = 'unsupported://example.com/image.png'; + + await expect( instance['convertContentToGooglePart']({ type: 'image_url', - image_url: { url: invalidImageUrl }, + image_url: { url: unsupportedImageUrl }, }), - ).toThrow(TypeError); + ).rejects.toThrow(TypeError); }); }); describe('buildGoogleMessages', () => { - it('get default result with gemini-pro', () => { + it('get default result with gemini-pro', async () => { const messages: OpenAIChatMessage[] = [{ content: 'Hello', role: 'user' }]; - const contents = instance['buildGoogleMessages'](messages, 'gemini-pro'); + const contents = await instance['buildGoogleMessages'](messages, 'gemini-pro'); expect(contents).toHaveLength(1); expect(contents).toEqual([{ parts: [{ text: 'Hello' }], role: 'user' }]); }); - it('messages should end with user if using gemini-pro', () => { + it('messages should end with user if using gemini-pro', async () => { const messages: OpenAIChatMessage[] = [ { content: 'Hello', role: 'user' }, { content: 'Hi', role: 'assistant' }, ]; - const contents = instance['buildGoogleMessages'](messages, 'gemini-pro'); + const contents = await instance['buildGoogleMessages'](messages, 'gemini-pro'); expect(contents).toHaveLength(3); expect(contents).toEqual([ @@ -342,13 +364,13 @@ describe('LobeGoogleAI', () => { ]); }); - it('should include system role if there is a system role prompt', () => { + it('should include system role if there is a system role prompt', async () => { const messages: OpenAIChatMessage[] = [ { content: 'you are ChatGPT', role: 'system' }, { content: 'Who are you', role: 'user' }, ]; - const contents = instance['buildGoogleMessages'](messages, 'gemini-pro'); + const contents = await instance['buildGoogleMessages'](messages, 'gemini-pro'); expect(contents).toHaveLength(3); expect(contents).toEqual([ @@ -358,13 +380,13 @@ describe('LobeGoogleAI', () => { ]); }); - it('should not modify the length if model is gemini-1.5-pro', () => { + it('should not modify the length if model is gemini-1.5-pro', async () => { const messages: OpenAIChatMessage[] = [ { content: 'Hello', role: 'user' }, { content: 'Hi', role: 'assistant' }, ]; - const contents = instance['buildGoogleMessages'](messages, 'gemini-1.5-pro-latest'); + const contents = await instance['buildGoogleMessages'](messages, 'gemini-1.5-pro-latest'); expect(contents).toHaveLength(2); expect(contents).toEqual([ @@ -373,7 +395,7 @@ describe('LobeGoogleAI', () => { ]); }); - it('should use specified model when images are included in messages', () => { + it('should use specified model when images are included in messages', async () => { const messages: OpenAIChatMessage[] = [ { content: [ @@ -386,7 +408,7 @@ describe('LobeGoogleAI', () => { const model = 'gemini-1.5-flash-latest'; // 调用 buildGoogleMessages 方法 - const contents = instance['buildGoogleMessages'](messages, model); + const contents = await instance['buildGoogleMessages'](messages, model); expect(contents).toHaveLength(1); expect(contents).toEqual([ @@ -501,13 +523,13 @@ describe('LobeGoogleAI', () => { }); describe('convertOAIMessagesToGoogleMessage', () => { - it('should correctly convert assistant message', () => { + it('should correctly convert assistant message', async () => { const message: OpenAIChatMessage = { role: 'assistant', content: 'Hello', }; - const converted = instance['convertOAIMessagesToGoogleMessage'](message); + const converted = await instance['convertOAIMessagesToGoogleMessage'](message); expect(converted).toEqual({ role: 'model', @@ -515,13 +537,13 @@ describe('LobeGoogleAI', () => { }); }); - it('should correctly convert user message', () => { + it('should correctly convert user message', async () => { const message: OpenAIChatMessage = { role: 'user', content: 'Hi', }; - const converted = instance['convertOAIMessagesToGoogleMessage'](message); + const converted = await instance['convertOAIMessagesToGoogleMessage'](message); expect(converted).toEqual({ role: 'user', @@ -529,7 +551,7 @@ describe('LobeGoogleAI', () => { }); }); - it('should correctly convert message with inline base64 image parts', () => { + it('should correctly convert message with inline base64 image parts', async () => { const message: OpenAIChatMessage = { role: 'user', content: [ @@ -538,7 +560,7 @@ describe('LobeGoogleAI', () => { ], }; - const converted = instance['convertOAIMessagesToGoogleMessage'](message); + const converted = await instance['convertOAIMessagesToGoogleMessage'](message); expect(converted).toEqual({ role: 'user', @@ -548,7 +570,7 @@ describe('LobeGoogleAI', () => { ], }); }); - it.skip('should correctly convert message with image url parts', () => { + it.skip('should correctly convert message with image url parts', async () => { const message: OpenAIChatMessage = { role: 'user', content: [ @@ -557,7 +579,7 @@ describe('LobeGoogleAI', () => { ], }; - const converted = instance['convertOAIMessagesToGoogleMessage'](message); + const converted = await instance['convertOAIMessagesToGoogleMessage'](message); expect(converted).toEqual({ role: 'user', diff --git a/src/libs/agent-runtime/google/index.ts b/src/libs/agent-runtime/google/index.ts index 1222ad764d14..2f26139f97be 100644 --- a/src/libs/agent-runtime/google/index.ts +++ b/src/libs/agent-runtime/google/index.ts @@ -10,6 +10,8 @@ import { import { JSONSchema7 } from 'json-schema'; import { transform } from 'lodash-es'; +import { imageUrlToBase64 } from '@/utils/imageToBase64'; + import { LobeRuntimeAI } from '../BaseAI'; import { AgentRuntimeErrorType, ILobeAgentRuntimeErrorType } from '../error'; import { @@ -52,7 +54,7 @@ export class LobeGoogleAI implements LobeRuntimeAI { try { const model = payload.model; - const contents = this.buildGoogleMessages(payload.messages, model); + const contents = await this.buildGoogleMessages(payload.messages, model); const geminiStreamResult = await this.client .getGenerativeModel( @@ -109,7 +111,7 @@ export class LobeGoogleAI implements LobeRuntimeAI { } } - private convertContentToGooglePart = (content: UserMessageContentPart): Part => { + private convertContentToGooglePart = async (content: UserMessageContentPart): Promise => { switch (content.type) { case 'text': { return { text: content.text }; @@ -130,51 +132,60 @@ export class LobeGoogleAI implements LobeRuntimeAI { }; } - // if (type === 'url') { - // return { - // fileData: { - // fileUri: content.image_url.url, - // mimeType: mimeType || 'image/png', - // }, - // }; - // } + if (type === 'url') { + const base64Image = await imageUrlToBase64(content.image_url.url); + + return { + inlineData: { + data: base64Image, + mimeType: mimeType || 'image/png', + }, + }; + } throw new TypeError(`currently we don't support image url: ${content.image_url.url}`); } } }; - private convertOAIMessagesToGoogleMessage = (message: OpenAIChatMessage): Content => { + private convertOAIMessagesToGoogleMessage = async ( + message: OpenAIChatMessage, + ): Promise => { const content = message.content as string | UserMessageContentPart[]; return { parts: typeof content === 'string' ? [{ text: content }] - : content.map((c) => this.convertContentToGooglePart(c)), + : await Promise.all(content.map(async (c) => await this.convertContentToGooglePart(c))), role: message.role === 'assistant' ? 'model' : 'user', }; }; // convert messages from the Vercel AI SDK Format to the format // that is expected by the Google GenAI SDK - private buildGoogleMessages = (messages: OpenAIChatMessage[], model: string): Content[] => { + private buildGoogleMessages = async ( + messages: OpenAIChatMessage[], + model: string, + ): Promise => { // if the model is gemini-1.5-pro-latest, we don't need any special handling if (model === 'gemini-1.5-pro-latest') { - return messages + const pools = messages .filter((message) => message.role !== 'function') - .map((msg) => this.convertOAIMessagesToGoogleMessage(msg)); + .map(async (msg) => await this.convertOAIMessagesToGoogleMessage(msg)); + + return Promise.all(pools); } const contents: Content[] = []; let lastRole = 'model'; - messages.forEach((message) => { + for (const message of messages) { // current to filter function message if (message.role === 'function') { - return; + continue; } - const googleMessage = this.convertOAIMessagesToGoogleMessage(message); + const googleMessage = await this.convertOAIMessagesToGoogleMessage(message); // if the last message is a model message and the current message is a model message // then we need to add a user message to separate them @@ -187,7 +198,7 @@ export class LobeGoogleAI implements LobeRuntimeAI { // update the last role lastRole = googleMessage.role; - }); + } // if the last message is a user message, then we need to add a model message to separate them if (lastRole === 'model') { diff --git a/src/utils/imageToBase64.ts b/src/utils/imageToBase64.ts index 3ab277c9ed09..63a4302215d5 100644 --- a/src/utils/imageToBase64.ts +++ b/src/utils/imageToBase64.ts @@ -35,3 +35,19 @@ export const imageToBase64 = ({ return canvas.toDataURL(type); }; + +export const imageUrlToBase64 = async (imageUrl: string): Promise => { + try { + const res = await fetch(imageUrl); + const arrayBuffer = await res.arrayBuffer(); + + return typeof btoa === 'function' + ? btoa( + new Uint8Array(arrayBuffer).reduce((data, byte) => data + String.fromCharCode(byte), ''), + ) + : Buffer.from(arrayBuffer).toString('base64'); + } catch (error) { + console.error('Error converting image to base64:', error); + throw error; + } +}; From 831bc5fec0f2509b90086a4d4134ce13d9d259ab Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 9 Sep 2024 18:49:28 +0000 Subject: [PATCH 2/4] :bookmark: chore(release): v1.15.29 [skip ci] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### [Version 1.15.29](https://github.com/lobehub/lobe-chat/compare/v1.15.28...v1.15.29) Released on **2024-09-09** #### 🐛 Bug Fixes - **misc**: Gemini cannot input images when server database is enabled.
Improvements and Fixes #### What's fixed * **misc**: Gemini cannot input images when server database is enabled, closes [#3370](https://github.com/lobehub/lobe-chat/issues/3370) ([eb552d2](https://github.com/lobehub/lobe-chat/commit/eb552d2))
[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
--- CHANGELOG.md | 25 +++++++++++++++++++++++++ package.json | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cf441b8516f9..f071f944e79c 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,31 @@ # Changelog +### [Version 1.15.29](https://github.com/lobehub/lobe-chat/compare/v1.15.28...v1.15.29) + +Released on **2024-09-09** + +#### 🐛 Bug Fixes + +- **misc**: Gemini cannot input images when server database is enabled. + +
+ +
+Improvements and Fixes + +#### What's fixed + +- **misc**: Gemini cannot input images when server database is enabled, closes [#3370](https://github.com/lobehub/lobe-chat/issues/3370) ([eb552d2](https://github.com/lobehub/lobe-chat/commit/eb552d2)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ ### [Version 1.15.28](https://github.com/lobehub/lobe-chat/compare/v1.15.27...v1.15.28) Released on **2024-09-09** diff --git a/package.json b/package.json index 44bd8fc90532..7af7257562cd 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lobehub/chat", - "version": "1.15.28", + "version": "1.15.29", "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.", "keywords": [ "framework", From 89c8dd485aabaec8a544f07947c16cef57751f35 Mon Sep 17 00:00:00 2001 From: Arvin Xu Date: Tue, 10 Sep 2024 03:29:33 +0800 Subject: [PATCH 3/4] =?UTF-8?q?=F0=9F=90=9B=20fix:=20fix=20claude=203.5=20?= =?UTF-8?q?image=20with=20s3=20url=20(#3870)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * ✅ test: add tests for imageToBase64 * 🐛 fix: fix anthropic image chat * ✅ test: add more tests --- src/libs/agent-runtime/anthropic/index.ts | 12 +- src/libs/agent-runtime/bedrock/index.ts | 2 +- .../utils/anthropicHelpers.test.ts | 136 +++++++++++++----- .../agent-runtime/utils/anthropicHelpers.ts | 59 +++++--- src/utils/imageToBase64.test.ts | 90 ++++++++++++ 5 files changed, 240 insertions(+), 59 deletions(-) create mode 100644 src/utils/imageToBase64.test.ts diff --git a/src/libs/agent-runtime/anthropic/index.ts b/src/libs/agent-runtime/anthropic/index.ts index b3c94bf5a65b..240fd2a5cdb9 100644 --- a/src/libs/agent-runtime/anthropic/index.ts +++ b/src/libs/agent-runtime/anthropic/index.ts @@ -29,7 +29,8 @@ export class LobeAnthropicAI implements LobeRuntimeAI { async chat(payload: ChatStreamPayload, options?: ChatCompetitionOptions) { try { - const anthropicPayload = this.buildAnthropicPayload(payload); + const anthropicPayload = await this.buildAnthropicPayload(payload); + const response = await this.client.messages.create( { ...anthropicPayload, stream: true }, { @@ -86,20 +87,17 @@ export class LobeAnthropicAI implements LobeRuntimeAI { } } - private buildAnthropicPayload(payload: ChatStreamPayload) { + private async buildAnthropicPayload(payload: ChatStreamPayload) { const { messages, model, max_tokens = 4096, temperature, top_p, tools } = payload; const system_message = messages.find((m) => m.role === 'system'); const user_messages = messages.filter((m) => m.role !== 'system'); return { max_tokens, - messages: buildAnthropicMessages(user_messages), + messages: await buildAnthropicMessages(user_messages), model, system: system_message?.content as string, - temperature: - payload.temperature !== undefined - ? temperature / 2 - : undefined, + temperature: payload.temperature !== undefined ? temperature / 2 : undefined, tools: buildAnthropicTools(tools), top_p, } satisfies Anthropic.MessageCreateParams; diff --git a/src/libs/agent-runtime/bedrock/index.ts b/src/libs/agent-runtime/bedrock/index.ts index 2d3b95770194..9b24ff71cf9d 100644 --- a/src/libs/agent-runtime/bedrock/index.ts +++ b/src/libs/agent-runtime/bedrock/index.ts @@ -62,7 +62,7 @@ export class LobeBedrockAI implements LobeRuntimeAI { body: JSON.stringify({ anthropic_version: 'bedrock-2023-05-31', max_tokens: max_tokens || 4096, - messages: buildAnthropicMessages(user_messages), + messages: await buildAnthropicMessages(user_messages), system: system_message?.content as string, temperature: temperature / 2, tools: buildAnthropicTools(tools), diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.test.ts b/src/libs/agent-runtime/utils/anthropicHelpers.test.ts index 172f0faad65f..3c17cf3646ae 100644 --- a/src/libs/agent-runtime/utils/anthropicHelpers.test.ts +++ b/src/libs/agent-runtime/utils/anthropicHelpers.test.ts @@ -1,6 +1,8 @@ import { OpenAI } from 'openai'; import { describe, expect, it } from 'vitest'; +import { imageUrlToBase64 } from '@/utils/imageToBase64'; + import { OpenAIChatMessage, UserMessageContentPart } from '../types/chat'; import { buildAnthropicBlock, @@ -10,28 +12,30 @@ import { } from './anthropicHelpers'; import { parseDataUri } from './uriParser'; -describe('anthropicHelpers', () => { - // Mock the parseDataUri function since it's an implementation detail - vi.mock('./uriParser', () => ({ - parseDataUri: vi.fn().mockReturnValue({ - mimeType: 'image/jpeg', - base64: 'base64EncodedString', - }), - })); +// Mock the parseDataUri function since it's an implementation detail +vi.mock('./uriParser', () => ({ + parseDataUri: vi.fn().mockReturnValue({ + mimeType: 'image/jpeg', + base64: 'base64EncodedString', + type: 'base64', + }), +})); +vi.mock('@/utils/imageToBase64'); +describe('anthropicHelpers', () => { describe('buildAnthropicBlock', () => { - it('should return the content as is for text type', () => { + it('should return the content as is for text type', async () => { const content: UserMessageContentPart = { type: 'text', text: 'Hello!' }; - const result = buildAnthropicBlock(content); + const result = await buildAnthropicBlock(content); expect(result).toEqual(content); }); - it('should transform an image URL into an Anthropic.ImageBlockParam', () => { + it('should transform an image URL into an Anthropic.ImageBlockParam', async () => { const content: UserMessageContentPart = { type: 'image_url', image_url: { url: '' }, }; - const result = buildAnthropicBlock(content); + const result = await buildAnthropicBlock(content); expect(parseDataUri).toHaveBeenCalledWith(content.image_url.url); expect(result).toEqual({ source: { @@ -42,28 +46,96 @@ describe('anthropicHelpers', () => { type: 'image', }); }); + + it('should transform a regular image URL into an Anthropic.ImageBlockParam', async () => { + vi.mocked(parseDataUri).mockReturnValueOnce({ + mimeType: 'image/png', + base64: null, + type: 'url', + }); + vi.mocked(imageUrlToBase64).mockResolvedValue('convertedBase64String'); + + const content = { + type: 'image_url', + image_url: { url: 'https://example.com/image.png' }, + } as const; + + const result = await buildAnthropicBlock(content); + + expect(parseDataUri).toHaveBeenCalledWith(content.image_url.url); + expect(imageUrlToBase64).toHaveBeenCalledWith(content.image_url.url); + expect(result).toEqual({ + source: { + data: 'convertedBase64String', + media_type: 'image/png', + type: 'base64', + }, + type: 'image', + }); + }); + + it('should use default media_type for URL images when mimeType is not provided', async () => { + vi.mocked(parseDataUri).mockReturnValueOnce({ + mimeType: null, + base64: null, + type: 'url', + }); + vi.mocked(imageUrlToBase64).mockResolvedValue('convertedBase64String'); + + const content = { + type: 'image_url', + image_url: { url: 'https://example.com/image' }, + } as const; + + const result = await buildAnthropicBlock(content); + + expect(result).toEqual({ + source: { + data: 'convertedBase64String', + media_type: 'image/png', + type: 'base64', + }, + type: 'image', + }); + }); + + it('should throw an error for invalid image URLs', async () => { + vi.mocked(parseDataUri).mockReturnValueOnce({ + mimeType: null, + base64: null, + // @ts-ignore + type: 'invalid', + }); + + const content = { + type: 'image_url', + image_url: { url: 'invalid-url' }, + } as const; + + await expect(buildAnthropicBlock(content)).rejects.toThrow('Invalid image URL: invalid-url'); + }); }); describe('buildAnthropicMessage', () => { - it('should correctly convert system message to assistant message', () => { + it('should correctly convert system message to assistant message', async () => { const message: OpenAIChatMessage = { content: [{ type: 'text', text: 'Hello!' }], role: 'system', }; - const result = buildAnthropicMessage(message); + const result = await buildAnthropicMessage(message); expect(result).toEqual({ content: [{ type: 'text', text: 'Hello!' }], role: 'user' }); }); - it('should correctly convert user message with string content', () => { + it('should correctly convert user message with string content', async () => { const message: OpenAIChatMessage = { content: 'Hello!', role: 'user', }; - const result = buildAnthropicMessage(message); + const result = await buildAnthropicMessage(message); expect(result).toEqual({ content: 'Hello!', role: 'user' }); }); - it('should correctly convert user message with content parts', () => { + it('should correctly convert user message with content parts', async () => { const message: OpenAIChatMessage = { content: [ { type: 'text', text: 'Check out this image:' }, @@ -71,19 +143,19 @@ describe('anthropicHelpers', () => { ], role: 'user', }; - const result = buildAnthropicMessage(message); + const result = await buildAnthropicMessage(message); expect(result.role).toBe('user'); expect(result.content).toHaveLength(2); expect((result.content[1] as any).type).toBe('image'); }); - it('should correctly convert tool message', () => { + it('should correctly convert tool message', async () => { const message: OpenAIChatMessage = { content: 'Tool result content', role: 'tool', tool_call_id: 'tool123', }; - const result = buildAnthropicMessage(message); + const result = await buildAnthropicMessage(message); expect(result.role).toBe('user'); expect(result.content).toEqual([ { @@ -94,7 +166,7 @@ describe('anthropicHelpers', () => { ]); }); - it('should correctly convert assistant message with tool calls', () => { + it('should correctly convert assistant message with tool calls', async () => { const message: OpenAIChatMessage = { content: 'Here is the result:', role: 'assistant', @@ -109,7 +181,7 @@ describe('anthropicHelpers', () => { }, ], }; - const result = buildAnthropicMessage(message); + const result = await buildAnthropicMessage(message); expect(result.role).toBe('assistant'); expect(result.content).toEqual([ { text: 'Here is the result:', type: 'text' }, @@ -122,12 +194,12 @@ describe('anthropicHelpers', () => { ]); }); - it('should correctly convert function message', () => { + it('should correctly convert function message', async () => { const message: OpenAIChatMessage = { content: 'def hello(name):\n return f"Hello {name}"', role: 'function', }; - const result = buildAnthropicMessage(message); + const result = await buildAnthropicMessage(message); expect(result).toEqual({ content: 'def hello(name):\n return f"Hello {name}"', role: 'assistant', @@ -136,13 +208,13 @@ describe('anthropicHelpers', () => { }); describe('buildAnthropicMessages', () => { - it('should correctly convert OpenAI Messages to Anthropic Messages', () => { + it('should correctly convert OpenAI Messages to Anthropic Messages', async () => { const messages: OpenAIChatMessage[] = [ { content: 'Hello', role: 'user' }, { content: 'Hi', role: 'assistant' }, ]; - const result = buildAnthropicMessages(messages); + const result = await buildAnthropicMessages(messages); expect(result).toHaveLength(2); expect(result).toEqual([ { content: 'Hello', role: 'user' }, @@ -150,14 +222,14 @@ describe('anthropicHelpers', () => { ]); }); - it('messages should end with user', () => { + it('messages should end with user', async () => { const messages: OpenAIChatMessage[] = [ { content: 'Hello', role: 'user' }, { content: 'Hello', role: 'user' }, { content: 'Hi', role: 'assistant' }, ]; - const contents = buildAnthropicMessages(messages); + const contents = await buildAnthropicMessages(messages); expect(contents).toHaveLength(4); expect(contents).toEqual([ @@ -168,7 +240,7 @@ describe('anthropicHelpers', () => { ]); }); - it('messages should pair', () => { + it('messages should pair', async () => { const messages: OpenAIChatMessage[] = [ { content: 'a', role: 'assistant' }, { content: 'b', role: 'assistant' }, @@ -177,7 +249,7 @@ describe('anthropicHelpers', () => { { content: '你好', role: 'user' }, ]; - const contents = buildAnthropicMessages(messages); + const contents = await buildAnthropicMessages(messages); expect(contents).toHaveLength(9); expect(contents).toEqual([ @@ -193,7 +265,7 @@ describe('anthropicHelpers', () => { ]); }); - it('should correctly convert OpenAI tool message to Anthropic format', () => { + it('should correctly convert OpenAI tool message to Anthropic format', async () => { const messages: OpenAIChatMessage[] = [ { content: '告诉我杭州和北京的天气,先回答我好的', @@ -242,7 +314,7 @@ describe('anthropicHelpers', () => { }, ]; - const contents = buildAnthropicMessages(messages); + const contents = await buildAnthropicMessages(messages); expect(contents).toEqual([ { content: '告诉我杭州和北京的天气,先回答我好的', role: 'user' }, diff --git a/src/libs/agent-runtime/utils/anthropicHelpers.ts b/src/libs/agent-runtime/utils/anthropicHelpers.ts index d640f1b308fb..9e0686329d18 100644 --- a/src/libs/agent-runtime/utils/anthropicHelpers.ts +++ b/src/libs/agent-runtime/utils/anthropicHelpers.ts @@ -1,35 +1,52 @@ import Anthropic from '@anthropic-ai/sdk'; import OpenAI from 'openai'; +import { imageUrlToBase64 } from '@/utils/imageToBase64'; + import { OpenAIChatMessage, UserMessageContentPart } from '../types'; import { parseDataUri } from './uriParser'; -export const buildAnthropicBlock = ( +export const buildAnthropicBlock = async ( content: UserMessageContentPart, -): Anthropic.ContentBlock | Anthropic.ImageBlockParam => { +): Promise => { switch (content.type) { case 'text': { return content; } case 'image_url': { - const { mimeType, base64 } = parseDataUri(content.image_url.url); + const { mimeType, base64, type } = parseDataUri(content.image_url.url); - return { - source: { - data: base64 as string, - media_type: mimeType as Anthropic.ImageBlockParam.Source['media_type'], - type: 'base64', - }, - type: 'image', - }; + if (type === 'base64') + return { + source: { + data: base64 as string, + media_type: mimeType as Anthropic.ImageBlockParam.Source['media_type'], + type: 'base64', + }, + type: 'image', + }; + + if (type === 'url') { + const base64 = await imageUrlToBase64(content.image_url.url); + return { + source: { + data: base64 as string, + media_type: (mimeType as Anthropic.ImageBlockParam.Source['media_type']) || 'image/png', + type: 'base64', + }, + type: 'image', + }; + } + + throw new Error(`Invalid image URL: ${content.image_url.url}`); } } }; -export const buildAnthropicMessage = ( +export const buildAnthropicMessage = async ( message: OpenAIChatMessage, -): Anthropic.Messages.MessageParam => { +): Promise => { const content = message.content as string | UserMessageContentPart[]; switch (message.role) { @@ -39,7 +56,10 @@ export const buildAnthropicMessage = ( case 'user': { return { - content: typeof content === 'string' ? content : content.map((c) => buildAnthropicBlock(c)), + content: + typeof content === 'string' + ? content + : await Promise.all(content.map(async (c) => await buildAnthropicBlock(c))), role: 'user', }; } @@ -90,14 +110,15 @@ export const buildAnthropicMessage = ( } }; -export const buildAnthropicMessages = ( +export const buildAnthropicMessages = async ( oaiMessages: OpenAIChatMessage[], -): Anthropic.Messages.MessageParam[] => { +): Promise => { const messages: Anthropic.Messages.MessageParam[] = []; let lastRole = 'assistant'; let pendingToolResults: Anthropic.ToolResultBlockParam[] = []; - oaiMessages.forEach((message, index) => { + for (const message of oaiMessages) { + const index = oaiMessages.indexOf(message); // refs: https://docs.anthropic.com/claude/docs/tool-use#tool-use-and-tool-result-content-blocks if (message.role === 'tool') { pendingToolResults.push({ @@ -117,7 +138,7 @@ export const buildAnthropicMessages = ( lastRole = 'user'; } } else { - const anthropicMessage = buildAnthropicMessage(message); + const anthropicMessage = await buildAnthropicMessage(message); if (lastRole === anthropicMessage.role) { messages.push({ content: '_', role: lastRole === 'user' ? 'assistant' : 'user' }); @@ -126,7 +147,7 @@ export const buildAnthropicMessages = ( lastRole = anthropicMessage.role; messages.push(anthropicMessage); } - }); + } return messages; }; diff --git a/src/utils/imageToBase64.test.ts b/src/utils/imageToBase64.test.ts new file mode 100644 index 000000000000..53b751154386 --- /dev/null +++ b/src/utils/imageToBase64.test.ts @@ -0,0 +1,90 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; + +import { imageToBase64, imageUrlToBase64 } from './imageToBase64'; + +describe('imageToBase64', () => { + let mockImage: HTMLImageElement; + let mockCanvas: HTMLCanvasElement; + let mockContext: CanvasRenderingContext2D; + + beforeEach(() => { + mockImage = { + width: 200, + height: 100, + } as HTMLImageElement; + + mockContext = { + drawImage: vi.fn(), + } as unknown as CanvasRenderingContext2D; + + mockCanvas = { + width: 0, + height: 0, + getContext: vi.fn().mockReturnValue(mockContext), + toDataURL: vi.fn().mockReturnValue(''), + } as unknown as HTMLCanvasElement; + + vi.spyOn(document, 'createElement').mockReturnValue(mockCanvas); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('should convert image to base64 with correct size and type', () => { + const result = imageToBase64({ img: mockImage, size: 100, type: 'image/jpeg' }); + + expect(document.createElement).toHaveBeenCalledWith('canvas'); + expect(mockCanvas.width).toBe(100); + expect(mockCanvas.height).toBe(100); + expect(mockCanvas.getContext).toHaveBeenCalledWith('2d'); + expect(mockContext.drawImage).toHaveBeenCalledWith(mockImage, 50, 0, 100, 100, 0, 0, 100, 100); + expect(mockCanvas.toDataURL).toHaveBeenCalledWith('image/jpeg'); + expect(result).toBe(''); + }); + + it('should use default type when not specified', () => { + imageToBase64({ img: mockImage, size: 100 }); + expect(mockCanvas.toDataURL).toHaveBeenCalledWith('image/webp'); + }); + + it('should handle taller images correctly', () => { + mockImage.width = 100; + mockImage.height = 200; + imageToBase64({ img: mockImage, size: 100 }); + expect(mockContext.drawImage).toHaveBeenCalledWith(mockImage, 0, 50, 100, 100, 0, 0, 100, 100); + }); +}); + +describe('imageUrlToBase64', () => { + const mockFetch = vi.fn(); + const mockArrayBuffer = new ArrayBuffer(8); + + beforeEach(() => { + global.fetch = mockFetch; + global.btoa = vi.fn().mockReturnValue('mockBase64String'); + }); + + afterEach(() => { + vi.restoreAllMocks(); + }); + + it('should convert image URL to base64 string', async () => { + mockFetch.mockResolvedValue({ + arrayBuffer: () => Promise.resolve(mockArrayBuffer), + }); + + const result = await imageUrlToBase64('https://example.com/image.jpg'); + + expect(mockFetch).toHaveBeenCalledWith('https://example.com/image.jpg'); + expect(global.btoa).toHaveBeenCalled(); + expect(result).toBe('mockBase64String'); + }); + + it('should throw an error when fetch fails', async () => { + const mockError = new Error('Fetch failed'); + mockFetch.mockRejectedValue(mockError); + + await expect(imageUrlToBase64('https://example.com/image.jpg')).rejects.toThrow('Fetch failed'); + }); +}); From c7fde146bc8f747f54d75a060c817d470ed978fb Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Mon, 9 Sep 2024 19:36:21 +0000 Subject: [PATCH 4/4] :bookmark: chore(release): v1.15.30 [skip ci] MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ### [Version 1.15.30](https://github.com/lobehub/lobe-chat/compare/v1.15.29...v1.15.30) Released on **2024-09-09** #### 🐛 Bug Fixes - **misc**: Fix claude 3.5 image with s3 url.
Improvements and Fixes #### What's fixed * **misc**: Fix claude 3.5 image with s3 url, closes [#3870](https://github.com/lobehub/lobe-chat/issues/3870) ([89c8dd4](https://github.com/lobehub/lobe-chat/commit/89c8dd4))
[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top)
--- CHANGELOG.md | 25 +++++++++++++++++++++++++ package.json | 2 +- 2 files changed, 26 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f071f944e79c..0301d6fc1f0e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,31 @@ # Changelog +### [Version 1.15.30](https://github.com/lobehub/lobe-chat/compare/v1.15.29...v1.15.30) + +Released on **2024-09-09** + +#### 🐛 Bug Fixes + +- **misc**: Fix claude 3.5 image with s3 url. + +
+ +
+Improvements and Fixes + +#### What's fixed + +- **misc**: Fix claude 3.5 image with s3 url, closes [#3870](https://github.com/lobehub/lobe-chat/issues/3870) ([89c8dd4](https://github.com/lobehub/lobe-chat/commit/89c8dd4)) + +
+ +
+ +[![](https://img.shields.io/badge/-BACK_TO_TOP-151515?style=flat-square)](#readme-top) + +
+ ### [Version 1.15.29](https://github.com/lobehub/lobe-chat/compare/v1.15.28...v1.15.29) Released on **2024-09-09** diff --git a/package.json b/package.json index 7af7257562cd..d9f558eb6e4b 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@lobehub/chat", - "version": "1.15.29", + "version": "1.15.30", "description": "Lobe Chat - an open-source, high-performance chatbot framework that supports speech synthesis, multimodal, and extensible Function Call plugin system. Supports one-click free deployment of your private ChatGPT/LLM web application.", "keywords": [ "framework",