From ee83900051c5732b067b568bab4be1e6099fc8fc Mon Sep 17 00:00:00 2001 From: Andrew Chappell Date: Thu, 14 Dec 2023 10:21:36 +1100 Subject: [PATCH] Updates based on feedback Signed-off-by: Andrew Chappell --- CHANGELOG.md | 2 +- USER_GUIDE.md | 68 ++---------------------------------------- guides/ml-commons.md | 71 ++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 75 insertions(+), 66 deletions(-) create mode 100644 guides/ml-commons.md diff --git a/CHANGELOG.md b/CHANGELOG.md index eea508c8..1dcbf559 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,7 +10,7 @@ Inspired from [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) - Added `includePortInHostHeader` option to `ClientBuilder::fromConfig` ([#118](https://github.com/opensearch-project/opensearch-php/pull/118)) - Added the `RefreshSearchAnalyzers` endpoint ([[#152](https://github.com/opensearch-project/opensearch-php/issues/152)) - Added support for `format` parameter to specify the sql response format ([#161](https://github.com/opensearch-project/opensearch-php/pull/161)) -- Implemented the Model, Model Group and Connector apis ([#170](https://github.com/opensearch-project/opensearch-php/pull/170)) +- Added ml commons model, model group and connector APIs ([#170](https://github.com/opensearch-project/opensearch-php/pull/170)) ### Changed diff --git a/USER_GUIDE.md b/USER_GUIDE.md index 4e3fb175..b43e9a7c 100644 --- a/USER_GUIDE.md +++ b/USER_GUIDE.md @@ -24,7 +24,7 @@ class MyOpenSearchClass public function __construct() { - //simple Setup + // Simple Setup $this->client = OpenSearch\ClientBuilder::fromConfig([ 'hosts' => [ 'https://localhost:9200' @@ -501,69 +501,7 @@ $client = \OpenSearch\ClientBuilder::fromConfig($config); ... ``` -## Machine Learning Example Usage -This example assumes you are using the AWS managed OpenSearch -service. See [The ML Commons documentation for more examples and further information.](https://github.com/opensearch-project/ml-commons/blob/main/docs/remote_inference_blueprints/openai_connector_embedding_blueprint.md) +## Advanced Features -It walks through the process of setting up a model to generate -vector embeddings from OpenAI. -```php -ml()->registerModelGroup([ - 'body' => [ - 'name' => 'openai_model_group', - 'description' => 'Group containing models for OpenAI', - ], -]); - -# Create the connector. -$connectorResponse = $client->ml()->createConnector([ - 'body' => [ - 'name' => "Open AI Embedding Connector", - 'description' => "Creates a connector to Open AI's embedding endpoint", - 'version' => 1, - 'protocol' => 'http', - 'parameters' => ['model' => 'text-embedding-ada-002'], - 'credential' => [ - "secretArn" => '', - "roleArn" => '', - ] - 'actions' => [ - [ - 'action_type' => 'predict', - 'method' => 'POST', - 'url' => 'https://api.openai.com/v1/embeddings', - 'headers' => [ - 'Authorization': 'Bearer ${credential.secretArn.}' - ], - 'request_body' => "{ \"input\": \${parameters.input}, \"model\": \"\${parameters.model}\" }", - 'pre_process_function' => "connector.pre_process.openai.embedding", - 'post_process_function' => "connector.post_process.openai.embedding", - ], - ], - ], -]); - -# Register the model. -$registerModelResponse = $client->ml()->registerModel([ - 'body' => [ - 'name' => 'OpenAI embedding model', - 'function_name' => 'remote', - 'model_group_id' => $modelGroupResponse['model_group_id'], - 'description' => 'Model for retrieving vector embeddings from OpenAI', - 'connector_id' => $connectorResponse['connector_id'], - ], -]); - -# Monitor the state of the register model task. -$taskResponse = $client->ml()->getTask(['id' => $registerModelResponse['task_id']]); - -assert($taskResponse['state'] === 'COMPLETED'); - -# Finally deploy the model. You will now be able to generate vector -# embeddings from OpenSearch (via OpenAI). -$client->ml()->deployModel(['id' => $taskResponse['model_id']]); -``` +* [ML Commons](guides/ml-commons.md) diff --git a/guides/ml-commons.md b/guides/ml-commons.md new file mode 100644 index 00000000..ed3cb489 --- /dev/null +++ b/guides/ml-commons.md @@ -0,0 +1,71 @@ +# Machine Learning Example Usage + +Walks through the process of setting up a model to generate +vector embeddings from OpenAI on AWS managed Opensearch. + +### Prerequisites + +* This example assumes you are using the AWS managed OpenSearch + service. See [The ml commons documentation](https://github.com/opensearch-project/ml-commons/blob/main/docs/remote_inference_blueprints/openai_connector_embedding_blueprint.md) for more examples and further information. +* You will need an API key from OpenAI. [Sign up](https://platform.openai.com/signup) +* The API key must be stored in [AWS Secrets Manager](https://aws.amazon.com/secrets-manager/) + +```php +ml()->registerModelGroup([ + 'body' => [ + 'name' => 'openai_model_group', + 'description' => 'Group containing models for OpenAI', + ], +]); + +# Create the connector. +$connectorResponse = $client->ml()->createConnector([ + 'body' => [ + 'name' => "Open AI Embedding Connector", + 'description' => "Creates a connector to Open AI's embedding endpoint", + 'version' => 1, + 'protocol' => 'http', + 'parameters' => ['model' => 'text-embedding-ada-002'], + 'credential' => [ + "secretArn" => '', + "roleArn" => '', + ] + 'actions' => [ + [ + 'action_type' => 'predict', + 'method' => 'POST', + 'url' => 'https://api.openai.com/v1/embeddings', + 'headers' => [ + 'Authorization': 'Bearer ${credential.secretArn.}' + ], + 'request_body' => "{ \"input\": \${parameters.input}, \"model\": \"\${parameters.model}\" }", + 'pre_process_function' => "connector.pre_process.openai.embedding", + 'post_process_function' => "connector.post_process.openai.embedding", + ], + ], + ], +]); + +# Register the model. +$registerModelResponse = $client->ml()->registerModel([ + 'body' => [ + 'name' => 'OpenAI embedding model', + 'function_name' => 'remote', + 'model_group_id' => $modelGroupResponse['model_group_id'], + 'description' => 'Model for retrieving vector embeddings from OpenAI', + 'connector_id' => $connectorResponse['connector_id'], + ], +]); + +# Monitor the state of the register model task. +$taskResponse = $client->ml()->getTask(['id' => $registerModelResponse['task_id']]); + +assert($taskResponse['state'] === 'COMPLETED'); + +# Finally deploy the model. You will now be able to generate vector +# embeddings from OpenSearch (via OpenAI). +$client->ml()->deployModel(['id' => $taskResponse['model_id']]); +```