From 7d6bb0e9c2688caab537698936b88eeab059f1ee Mon Sep 17 00:00:00 2001 From: Stefan Bratanov Date: Wed, 24 Jul 2024 12:55:05 +0100 Subject: [PATCH] Add Uploads support --- README.md | 23 ++++ .../jvm/openai/CompleteUploadRequest.java | 39 ++++++ .../jvm/openai/CreateUploadRequest.java | 62 +++++++++ .../stefanbratanov/jvm/openai/Endpoint.java | 1 + .../stefanbratanov/jvm/openai/OpenAI.java | 10 ++ .../stefanbratanov/jvm/openai/Upload.java | 11 ++ .../stefanbratanov/jvm/openai/UploadPart.java | 3 + .../jvm/openai/UploadsClient.java | 119 ++++++++++++++++++ .../jvm/openai/OpenAIIntegrationTest.java | 44 +++++++ .../OpenApiSpecificationValidationTest.java | 43 +++++++ .../jvm/openai/TestDataUtil.java | 52 ++++++-- 11 files changed, 399 insertions(+), 8 deletions(-) create mode 100644 src/main/java/io/github/stefanbratanov/jvm/openai/CompleteUploadRequest.java create mode 100644 src/main/java/io/github/stefanbratanov/jvm/openai/CreateUploadRequest.java create mode 100644 src/main/java/io/github/stefanbratanov/jvm/openai/Upload.java create mode 100644 src/main/java/io/github/stefanbratanov/jvm/openai/UploadPart.java create mode 100644 src/main/java/io/github/stefanbratanov/jvm/openai/UploadsClient.java diff --git a/README.md b/README.md index f8bc177..f8fbb33 100644 --- a/README.md +++ b/README.md @@ -53,6 +53,7 @@ ChatCompletion chatCompletion = chatClient.createChatCompletion(createChatComple | [Fine-tuning](https://platform.openai.com/docs/api-reference/fine-tuning) | ✔️ | | [Batch](https://platform.openai.com/docs/api-reference/batch) | ✔️ | | [Files](https://platform.openai.com/docs/api-reference/files) | ✔️ | +| [Uploads](https://platform.openai.com/docs/api-reference/uploads) | ✔️ | | [Images](https://platform.openai.com/docs/api-reference/images) | ✔️ | | [Models](https://platform.openai.com/docs/api-reference/models) | ✔️ | | [Moderations](https://platform.openai.com/docs/api-reference/moderations) | ✔️ | @@ -203,6 +204,28 @@ Batch batch = batchClient.createBatch(request); Batch retrievedBatch = batchClient.retrieveBatch(batch.id()); System.out.println(retrievedBatch.status()); ``` +- Upload large file in multiple parts +```java +UploadsClient uploadsClient = openAI.uploadsClient(); +CreateUploadRequest createUploadRequest = CreateUploadRequest.newBuilder() + .filename("training_examples.jsonl") + .purpose(Purpose.FINE_TUNE) + .bytes(2147483648) + .mimeType("text/jsonl") + .build(); +Upload upload = uploadsClient.createUpload(createUploadRequest); + +UploadPart part1 = uploadsClient.addUploadPart(upload.id(), Paths.get("/tmp/part1.jsonl")); +UploadPart part2 = uploadsClient.addUploadPart(upload.id(), Paths.get("/tmp/part2.jsonl")); + +CompleteUploadRequest completeUploadRequest = CompleteUploadRequest.newBuilder() + .partIds(List.of(part1.id(), part2.id())) + .build(); + +Upload completedUpload = uploadsClient.completeUpload(upload.id(), completeUploadRequest); +// the created usable File object +File file = completedUpload.file(); +``` - Build AI Assistant ```java AssistantsClient assistantsClient = openAI.assistantsClient(); diff --git a/src/main/java/io/github/stefanbratanov/jvm/openai/CompleteUploadRequest.java b/src/main/java/io/github/stefanbratanov/jvm/openai/CompleteUploadRequest.java new file mode 100644 index 0000000..b6d797e --- /dev/null +++ b/src/main/java/io/github/stefanbratanov/jvm/openai/CompleteUploadRequest.java @@ -0,0 +1,39 @@ +package io.github.stefanbratanov.jvm.openai; + +import java.util.List; +import java.util.Optional; + +public record CompleteUploadRequest(List partIds, Optional md5) { + + public static Builder newBuilder() { + return new Builder(); + } + + public static class Builder { + + private List partIds; + + private Optional md5 = Optional.empty(); + + /** + * @param partIds The ordered list of Part IDs. + */ + public Builder partIds(List partIds) { + this.partIds = partIds; + return this; + } + + /** + * @param md5 The optional md5 checksum for the file contents to verify if the bytes uploaded + * matches what you expect. + */ + public Builder md5(String md5) { + this.md5 = Optional.of(md5); + return this; + } + + public CompleteUploadRequest build() { + return new CompleteUploadRequest(partIds, md5); + } + } +} diff --git a/src/main/java/io/github/stefanbratanov/jvm/openai/CreateUploadRequest.java b/src/main/java/io/github/stefanbratanov/jvm/openai/CreateUploadRequest.java new file mode 100644 index 0000000..429ca0a --- /dev/null +++ b/src/main/java/io/github/stefanbratanov/jvm/openai/CreateUploadRequest.java @@ -0,0 +1,62 @@ +package io.github.stefanbratanov.jvm.openai; + +public record CreateUploadRequest(String filename, String purpose, int bytes, String mimeType) { + + public static Builder newBuilder() { + return new Builder(); + } + + public static class Builder { + + private String filename; + private String purpose; + private int bytes; + private String mimeType; + + /** + * @param filename The name of the file to upload. + */ + public Builder filename(String filename) { + this.filename = filename; + return this; + } + + /** + * @param purpose The intended purpose of the uploaded file. + */ + public Builder purpose(String purpose) { + this.purpose = purpose; + return this; + } + + /** + * @param purpose The intended purpose of the uploaded file. + */ + public Builder purpose(Purpose purpose) { + this.purpose = purpose.getId(); + return this; + } + + /** + * @param bytes The number of bytes in the file you are uploading. + */ + public Builder bytes(int bytes) { + this.bytes = bytes; + return this; + } + + /** + * @param mimeType The MIME type of the file. + *

This must fall within the supported MIME types for your file purpose. See the + * supported MIME types for assistants and vision. + */ + public Builder mimeType(String mimeType) { + this.mimeType = mimeType; + return this; + } + + public CreateUploadRequest build() { + return new CreateUploadRequest(filename, purpose, bytes, mimeType); + } + } +} diff --git a/src/main/java/io/github/stefanbratanov/jvm/openai/Endpoint.java b/src/main/java/io/github/stefanbratanov/jvm/openai/Endpoint.java index 6c5188c..99ef0c9 100644 --- a/src/main/java/io/github/stefanbratanov/jvm/openai/Endpoint.java +++ b/src/main/java/io/github/stefanbratanov/jvm/openai/Endpoint.java @@ -14,6 +14,7 @@ enum Endpoint { FILES("files"), FINE_TUNING("fine_tuning/jobs"), BATCHES("batches"), + UPLOADS("uploads"), // Beta THREADS("threads"), ASSISTANTS("assistants"), diff --git a/src/main/java/io/github/stefanbratanov/jvm/openai/OpenAI.java b/src/main/java/io/github/stefanbratanov/jvm/openai/OpenAI.java index 2b69294..9714a60 100644 --- a/src/main/java/io/github/stefanbratanov/jvm/openai/OpenAI.java +++ b/src/main/java/io/github/stefanbratanov/jvm/openai/OpenAI.java @@ -20,6 +20,7 @@ public final class OpenAI { private final FineTuningClient fineTuningClient; private final BatchClient batchClient; private final FilesClient filesClient; + private final UploadsClient uploadsClient; private final ImagesClient imagesClient; private final ModelsClient modelsClient; private final ModerationsClient moderationsClient; @@ -48,6 +49,7 @@ private OpenAI( new FineTuningClient(baseUrl, authenticationHeaders, httpClient, requestTimeout); batchClient = new BatchClient(baseUrl, authenticationHeaders, httpClient, requestTimeout); filesClient = new FilesClient(baseUrl, authenticationHeaders, httpClient, requestTimeout); + uploadsClient = new UploadsClient(baseUrl, authenticationHeaders, httpClient, requestTimeout); imagesClient = new ImagesClient(baseUrl, authenticationHeaders, httpClient, requestTimeout); modelsClient = new ModelsClient(baseUrl, authenticationHeaders, httpClient, requestTimeout); moderationsClient = @@ -115,6 +117,14 @@ public FilesClient filesClient() { return filesClient; } + /** + * @return a client based on Uploads + */ + public UploadsClient uploadsClient() { + return uploadsClient; + } + /** * @return a client based on Images diff --git a/src/main/java/io/github/stefanbratanov/jvm/openai/Upload.java b/src/main/java/io/github/stefanbratanov/jvm/openai/Upload.java new file mode 100644 index 0000000..4731a01 --- /dev/null +++ b/src/main/java/io/github/stefanbratanov/jvm/openai/Upload.java @@ -0,0 +1,11 @@ +package io.github.stefanbratanov.jvm.openai; + +public record Upload( + String id, + int createdAt, + String filename, + int bytes, + String purpose, + String status, + int expiresAt, + File file) {} diff --git a/src/main/java/io/github/stefanbratanov/jvm/openai/UploadPart.java b/src/main/java/io/github/stefanbratanov/jvm/openai/UploadPart.java new file mode 100644 index 0000000..43822c9 --- /dev/null +++ b/src/main/java/io/github/stefanbratanov/jvm/openai/UploadPart.java @@ -0,0 +1,3 @@ +package io.github.stefanbratanov.jvm.openai; + +public record UploadPart(String id, int createdAt, String uploadId) {} diff --git a/src/main/java/io/github/stefanbratanov/jvm/openai/UploadsClient.java b/src/main/java/io/github/stefanbratanov/jvm/openai/UploadsClient.java new file mode 100644 index 0000000..4d26a6c --- /dev/null +++ b/src/main/java/io/github/stefanbratanov/jvm/openai/UploadsClient.java @@ -0,0 +1,119 @@ +package io.github.stefanbratanov.jvm.openai; + +import java.net.URI; +import java.net.http.HttpClient; +import java.net.http.HttpRequest; +import java.net.http.HttpRequest.BodyPublishers; +import java.net.http.HttpResponse; +import java.nio.file.Path; +import java.time.Duration; +import java.util.Optional; + +/** + * Allows you to upload large files in multiple parts. + * + *

Based on Uploads + */ +public final class UploadsClient extends OpenAIClient { + + private static final String PARTS_SEGMENT = "/parts"; + private static final String COMPLETE_SEGMENT = "/complete"; + private static final String CANCEL_SEGMENT = "/cancel"; + + private final URI baseUrl; + + UploadsClient( + URI baseUrl, + String[] authenticationHeaders, + HttpClient httpClient, + Optional requestTimeout) { + super(authenticationHeaders, httpClient, requestTimeout); + this.baseUrl = baseUrl; + } + + /** + * Creates an intermediate Upload object that you can add Parts to. Currently, an Upload can + * accept at most 8 GB in total and expires after an hour after you create it. + * + *

Once you complete the Upload, we will create a File object that contains all the parts you + * uploaded. This File is usable in the rest of our platform as a regular File object. + * + * @throws OpenAIException in case of API errors + */ + public Upload createUpload(CreateUploadRequest request) { + HttpRequest httpRequest = + newHttpRequestBuilder(Constants.CONTENT_TYPE_HEADER, Constants.JSON_MEDIA_TYPE) + .uri(baseUrl.resolve(Endpoint.UPLOADS.getPath())) + .POST(createBodyPublisher(request)) + .build(); + HttpResponse httpResponse = sendHttpRequest(httpRequest); + return deserializeResponse(httpResponse.body(), Upload.class); + } + + /** + * Adds a Part to an Upload object. A Part represents a chunk of bytes from the file you are + * trying to upload. + * + *

Each Part can be at most 64 MB, and you can add Parts until you hit the Upload maximum of 8 + * GB. + * + *

It is possible to add multiple Parts in parallel. You can decide the intended order of the + * Parts when you complete the Upload. + * + * @param uploadId The ID of the Upload. + * @param data The chunk of bytes for this Part. + * @throws OpenAIException in case of API errors + */ + public UploadPart addUploadPart(String uploadId, Path data) { + MultipartBodyPublisher multipartBodyPublisher = + MultipartBodyPublisher.newBuilder().filePart("data", data).build(); + HttpRequest httpRequest = + newHttpRequestBuilder( + Constants.CONTENT_TYPE_HEADER, multipartBodyPublisher.getContentTypeHeader()) + .uri(baseUrl.resolve(Endpoint.UPLOADS.getPath() + "/" + uploadId + PARTS_SEGMENT)) + .POST(multipartBodyPublisher) + .build(); + HttpResponse httpResponse = sendHttpRequest(httpRequest); + return deserializeResponse(httpResponse.body(), UploadPart.class); + } + + /** + * Completes the Upload. + * + *

Within the returned Upload object, there is a nested File object that is ready to use in the + * rest of the platform. + * + *

You can specify the order of the Parts by passing in an ordered list of the Part IDs. + * + *

The number of bytes uploaded upon completion must match the number of bytes initially + * specified when creating the Upload object. No Parts may be added after an Upload is completed. + * + * @param uploadId The ID of the Upload. + * @throws OpenAIException in case of API errors + */ + public Upload completeUpload(String uploadId, CompleteUploadRequest request) { + HttpRequest httpRequest = + newHttpRequestBuilder(Constants.CONTENT_TYPE_HEADER, Constants.JSON_MEDIA_TYPE) + .uri(baseUrl.resolve(Endpoint.UPLOADS.getPath() + "/" + uploadId + COMPLETE_SEGMENT)) + .POST(createBodyPublisher(request)) + .build(); + HttpResponse httpResponse = sendHttpRequest(httpRequest); + return deserializeResponse(httpResponse.body(), Upload.class); + } + + /** + * Cancels the Upload. No Parts may be added after an Upload is cancelled. + * + * @param uploadId The ID of the Upload. + * @throws OpenAIException in case of API errors + */ + public Upload cancelUpload(String uploadId) { + HttpRequest httpRequest = + newHttpRequestBuilder() + .uri(baseUrl.resolve(Endpoint.UPLOADS.getPath() + "/" + uploadId + CANCEL_SEGMENT)) + .POST(BodyPublishers.noBody()) + .build(); + HttpResponse httpResponse = sendHttpRequest(httpRequest); + return deserializeResponse(httpResponse.body(), Upload.class); + } +} diff --git a/src/test/java/io/github/stefanbratanov/jvm/openai/OpenAIIntegrationTest.java b/src/test/java/io/github/stefanbratanov/jvm/openai/OpenAIIntegrationTest.java index 59cde29..82ccbdb 100644 --- a/src/test/java/io/github/stefanbratanov/jvm/openai/OpenAIIntegrationTest.java +++ b/src/test/java/io/github/stefanbratanov/jvm/openai/OpenAIIntegrationTest.java @@ -6,8 +6,10 @@ import io.github.stefanbratanov.jvm.openai.ContentPart.TextContentPart; import io.github.stefanbratanov.jvm.openai.CreateChatCompletionRequest.StreamOptions; +import java.io.IOException; import java.io.UncheckedIOException; import java.net.http.HttpTimeoutException; +import java.nio.file.Files; import java.nio.file.Path; import java.time.Duration; import java.util.List; @@ -371,6 +373,48 @@ void testFilesClient() { assertThat(retrievedFile).isEqualTo(uploadedFile); } + @Test + void testUploadsClient(@TempDir Path tempDir) throws IOException { + UploadsClient uploadsClient = openAI.uploadsClient(); + FilesClient filesClient = openAI.filesClient(); + + CreateUploadRequest createUploadRequest = + CreateUploadRequest.newBuilder() + .filename("hello.txt") + .purpose(Purpose.BATCH) + .bytes(11) + .mimeType("text/plain") + .build(); + + Upload upload = uploadsClient.createUpload(createUploadRequest); + + Path part1 = tempDir.resolve("part1.txt"); + Path part2 = tempDir.resolve("part2.txt"); + + Files.writeString(part1, "Hello "); + Files.writeString(part2, "World"); + + UploadPart uploadPart = uploadsClient.addUploadPart(upload.id(), part1); + UploadPart uploadPart2 = uploadsClient.addUploadPart(upload.id(), part2); + + CompleteUploadRequest completeUploadRequest = + CompleteUploadRequest.newBuilder() + .partIds(List.of(uploadPart.id(), uploadPart2.id())) + .build(); + + Upload completedUpload = uploadsClient.completeUpload(upload.id(), completeUploadRequest); + + assertThat(completedUpload.status()).isEqualTo("completed"); + + File file = completedUpload.file(); + + assertThat(file).isNotNull(); + + byte[] retrievedContent = filesClient.retrieveFileContent(file.id()); + + assertThat(new String(retrievedContent)).isEqualTo("Hello World"); + } + @Test // using mock server because fine-tuning models are costly void testFineTuningClient() { FineTuningClient fineTuningClient = openAIWithMockServer.fineTuningClient(); diff --git a/src/test/java/io/github/stefanbratanov/jvm/openai/OpenApiSpecificationValidationTest.java b/src/test/java/io/github/stefanbratanov/jvm/openai/OpenApiSpecificationValidationTest.java index 9c9a0cb..99a72a3 100644 --- a/src/test/java/io/github/stefanbratanov/jvm/openai/OpenApiSpecificationValidationTest.java +++ b/src/test/java/io/github/stefanbratanov/jvm/openai/OpenApiSpecificationValidationTest.java @@ -169,6 +169,49 @@ void validateFiles() { "Object has missing required properties ([\"object\",\"status\"])"); } + @RepeatedTest(50) + void validateUploads() { + CreateUploadRequest createUploadRequest = testDataUtil.randomCreateUploadRequest(); + + Request request = + createRequestWithBody( + Method.POST, "/" + Endpoint.UPLOADS.getPath(), serializeObject(createUploadRequest)); + + Upload upload = testDataUtil.randomUpload(); + + Response response = createResponseWithBody(serializeObject(upload)); + + validate( + request, + response, + "Object has missing required properties ([\"object\",\"status\"])", + // spec issue + "Object has missing required properties ([\"step_number\"]"); + + UploadPart uploadPart = testDataUtil.randomUploadPart(); + + response = createResponseWithBody(serializeObject(uploadPart)); + + validate("/" + Endpoint.UPLOADS.getPath() + "/{upload_id}/parts", Method.POST, response); + + CompleteUploadRequest completeUploadRequest = testDataUtil.randomCompleteUploadRequest(); + + request = + createRequestWithBody( + Method.POST, + "/" + Endpoint.UPLOADS.getPath() + "/{upload_id}/complete", + serializeObject(completeUploadRequest)); + + response = createResponseWithBody(serializeObject(upload)); + + validate( + request, + response, + "Object has missing required properties ([\"object\",\"status\"])", + // spec issue + "Object has missing required properties ([\"step_number\"]"); + } + @RepeatedTest(50) void validateImages() { CreateImageRequest createImageRequest = testDataUtil.randomCreateImageRequest(); diff --git a/src/test/java/io/github/stefanbratanov/jvm/openai/TestDataUtil.java b/src/test/java/io/github/stefanbratanov/jvm/openai/TestDataUtil.java index dceb062..b74d31d 100644 --- a/src/test/java/io/github/stefanbratanov/jvm/openai/TestDataUtil.java +++ b/src/test/java/io/github/stefanbratanov/jvm/openai/TestDataUtil.java @@ -237,14 +237,39 @@ public File randomFile() { randomInt(1, 1000), randomLong(1, 42_000), randomString(7), - oneOf( - "assistants", - "assistants_output", - "batch", - "batch_output", - "fine-tune", - "fine-tune-results", - "vision")); + randomFilePurpose()); + } + + public Upload randomUpload() { + return new Upload( + randomString(5), + randomInt(100, 999), + randomString(5), + randomInt(1, 100), + randomFilePurpose(), + oneOf("pending", "completed", "cancelled", "expired"), + randomInt(100, 999), + randomFile()); + } + + public CreateUploadRequest randomCreateUploadRequest() { + return CreateUploadRequest.newBuilder() + .filename(randomString(5)) + .purpose(oneOf("assistants", "batch", "fine-tune", "vision")) + .bytes(randomInt(2, 100)) + .mimeType(oneOf("text/plain", "application/pdf", "text/javascript")) + .build(); + } + + public UploadPart randomUploadPart() { + return new UploadPart(randomString(5), randomInt(100, 999), randomString(5)); + } + + public CompleteUploadRequest randomCompleteUploadRequest() { + return CompleteUploadRequest.newBuilder() + .partIds(listOf(randomInt(1, 5), () -> randomString(3, 7))) + .md5(randomString(32)) + .build(); } public CreateImageRequest randomCreateImageRequest() { @@ -260,6 +285,17 @@ public CreateImageRequest randomCreateImageRequest() { .build(); } + public String randomFilePurpose() { + return oneOf( + "assistants", + "assistants_output", + "batch", + "batch_output", + "fine-tune", + "fine-tune-results", + "vision"); + } + public Images randomImages() { return new Images(randomLong(1, 10_000), listOf(randomInt(1, 5), this::randomImage)); }