From 9dc5c75105bda3b9e483a98abf07cef2a23ca09d Mon Sep 17 00:00:00 2001
From: Michael Goin <michael@neuralmagic.com>
Date: Thu, 11 Apr 2024 14:14:39 -0400
Subject: [PATCH] Update server docs to use v2 infer endpoints

---
 docs/use-cases/README.md                          |  2 +-
 docs/use-cases/cv/embedding-extraction.md         |  2 +-
 docs/use-cases/cv/image-segmentation-yolact.md    |  4 ++--
 docs/use-cases/cv/object-detection-yolov5.md      |  4 ++--
 docs/use-cases/general/scheduler.md               |  2 +-
 docs/use-cases/nlp/question-answering.md          |  2 +-
 docs/use-cases/nlp/sentiment-analysis.md          |  4 ++--
 docs/use-cases/nlp/text-classification.md         | 15 ++++++++-------
 docs/use-cases/nlp/token-classification.md        |  4 ++--
 .../nlp/transformers-embedding-extraction.md      |  4 ++--
 .../nlp/zero-shot-text-classification.md          |  4 ++--
 .../ultralytics-readmes/deepsparse-readme.md      |  4 ++--
 12 files changed, 26 insertions(+), 25 deletions(-)

diff --git a/docs/use-cases/README.md b/docs/use-cases/README.md
index 8d7532d398..f021f6f8e7 100644
--- a/docs/use-cases/README.md
+++ b/docs/use-cases/README.md
@@ -72,7 +72,7 @@ Making a request:
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/sentiment_analysis/infer'
 
 # send the data
 obj = {"sequences": "Sending requests to DeepSparse Server is fast and easy!"}
diff --git a/docs/use-cases/cv/embedding-extraction.md b/docs/use-cases/cv/embedding-extraction.md
index 9b1501896c..c37aa74d70 100644
--- a/docs/use-cases/cv/embedding-extraction.md
+++ b/docs/use-cases/cv/embedding-extraction.md
@@ -93,7 +93,7 @@ deepsparse.server --config_file config.yaml
 Make requests to the server: 
 ```python
 import requests, json
-url = "http://0.0.0.0:5543/predict/from_files"
+url = "http://0.0.0.0:5543/v2/models/embedding_extraction-0/infer/from_files"
 paths = ["lion.jpeg"]
 files = [("request", open(img, 'rb')) for img in paths]
 resp = requests.post(url=url, files=files)
diff --git a/docs/use-cases/cv/image-segmentation-yolact.md b/docs/use-cases/cv/image-segmentation-yolact.md
index f37cee9e0d..19f83b3105 100644
--- a/docs/use-cases/cv/image-segmentation-yolact.md
+++ b/docs/use-cases/cv/image-segmentation-yolact.md
@@ -188,7 +188,7 @@ Run inference:
 import requests
 import json
 
-url = 'http://0.0.0.0:5543/predict/from_files'
+url = 'http://0.0.0.0:5543/v2/models/yolact/infer/from_files'
 path = ['thailand.jpeg'] # list of images for inference
 files = [('request', open(img, 'rb')) for img in path]
 resp = requests.post(url=url, files=files)
@@ -217,7 +217,7 @@ Run inference:
 import requests
 import json
 
-url = 'http://0.0.0.0:5543/predict/from_files'
+url = 'http://0.0.0.0:5543/v2/models/yolact/infer/from_files'
 path = ['thailand.jpeg'] # list of images for inference
 files = [('request', open(img, 'rb')) for img in path]
 resp = requests.post(url=url, files=files)
diff --git a/docs/use-cases/cv/object-detection-yolov5.md b/docs/use-cases/cv/object-detection-yolov5.md
index 12f0a1bde7..ba8dab4dee 100644
--- a/docs/use-cases/cv/object-detection-yolov5.md
+++ b/docs/use-cases/cv/object-detection-yolov5.md
@@ -230,7 +230,7 @@ Making a request.
 import requests
 import json
 
-url = 'http://0.0.0.0:5543/predict/from_files'
+url = 'http://0.0.0.0:5543/v2/models/yolo/infer/from_files'
 path = ['basilica.jpg'] # list of images for inference
 files = [('request', open(img, 'rb')) for img in path]
 resp = requests.post(url=url, files=files)
@@ -271,7 +271,7 @@ Making a request:
 ```python
 import requests, json
 
-url = 'http://0.0.0.0:5543/predict/from_files'
+url = 'http://0.0.0.0:5543/v2/models/yolo/infer/from_files'
 path = ['basilica.jpg'] # list of images for inference
 files = [('request', open(img, 'rb')) for img in path]
 resp = requests.post(url=url, files=files)
diff --git a/docs/use-cases/general/scheduler.md b/docs/use-cases/general/scheduler.md
index 5b7cccfbfc..1e64c5ebad 100644
--- a/docs/use-cases/general/scheduler.md
+++ b/docs/use-cases/general/scheduler.md
@@ -158,7 +158,7 @@ Run inference:
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/sentiment_analysis/infer'
 
 # send the data
 obj = {"sequences": "Sending requests to DeepSparse Server is fast and easy!"}
diff --git a/docs/use-cases/nlp/question-answering.md b/docs/use-cases/nlp/question-answering.md
index 89124d32d7..1de16bf532 100644
--- a/docs/use-cases/nlp/question-answering.md
+++ b/docs/use-cases/nlp/question-answering.md
@@ -230,7 +230,7 @@ Here is an example client request, using the Python requests library for formatt
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/question_answering/infer'
 
 # send the data
 obj = {
diff --git a/docs/use-cases/nlp/sentiment-analysis.md b/docs/use-cases/nlp/sentiment-analysis.md
index c290b73831..230c30b2de 100644
--- a/docs/use-cases/nlp/sentiment-analysis.md
+++ b/docs/use-cases/nlp/sentiment-analysis.md
@@ -259,7 +259,7 @@ Here is an example client request, using the Python `requests` library for forma
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/sentiment_analysis/infer'
 
 # send the data
 obj = {"sequences": "Sending requests to DeepSparse Server is fast and easy!"}
@@ -297,7 +297,7 @@ Making a request:
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/sentiment_analysis/infer'
 
 # send the data
 obj = {"sequences": "Sending requests to DeepSparse Server is fast and easy!"}
diff --git a/docs/use-cases/nlp/text-classification.md b/docs/use-cases/nlp/text-classification.md
index c01268402e..dc415ad752 100644
--- a/docs/use-cases/nlp/text-classification.md
+++ b/docs/use-cases/nlp/text-classification.md
@@ -325,7 +325,7 @@ Making a request:
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/text_classification/infer'
 
 # send the data
 obj = {"sequences": "Sending requests to DeepSparse Server is fast and easy!"}
@@ -351,14 +351,15 @@ Making a request:
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = "http://0.0.0.0:5543/v2/models/text_classification/infer"
 
 # send the data
 obj = {
-  "sequences": [[
-      "The text classification pipeline is fast and easy to use!",
-      "The pipeline for text classification makes it simple to get started"
-]]}
+    "sequences": [
+        ["The pipeline for text classification makes it simple to get started"],
+        ["The text classification pipeline is fast and easy to use!"],
+    ]
+}
 resp = requests.post(url=url, json=obj)
 
 # recieve the post-processed output
@@ -391,7 +392,7 @@ Making a request:
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/text_classification/infer'
 
 # send the data
 obj = {"sequences": "Sending requests to DeepSparse Server is fast and easy!"}
diff --git a/docs/use-cases/nlp/token-classification.md b/docs/use-cases/nlp/token-classification.md
index c2655ec48c..e7aa3c659c 100644
--- a/docs/use-cases/nlp/token-classification.md
+++ b/docs/use-cases/nlp/token-classification.md
@@ -228,7 +228,7 @@ Here is an example client request, using the Python requests library for formatt
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/token_classification/infer'
 # send the data
 obj = {"inputs": "Mary is flying from Nairobi to New York to attend a conference"}
 resp = requests.post(url=url, json=obj)
@@ -261,7 +261,7 @@ Making a request:
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/token_classification/infer'
 
 # send the data
 obj = {"inputs": "Mary is flying from Nairobi to New York to attend a conference",}
diff --git a/docs/use-cases/nlp/transformers-embedding-extraction.md b/docs/use-cases/nlp/transformers-embedding-extraction.md
index bbf5614ddb..07b877f8d9 100644
--- a/docs/use-cases/nlp/transformers-embedding-extraction.md
+++ b/docs/use-cases/nlp/transformers-embedding-extraction.md
@@ -155,7 +155,7 @@ Here is an example client request, using the Python `requests` library for forma
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/transformers_embedding_extraction/infer'
 
 # send the data
 obj = {"inputs": "The transformers embedding extraction Pipeline is the best!"}
@@ -191,7 +191,7 @@ Making requests:
 ```python 
 import requests, json
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/transformers_embedding_extraction/infer'
 
 # send the data
 obj = {"inputs": "The transformers embedding extraction Pipeline is the best!"}
diff --git a/docs/use-cases/nlp/zero-shot-text-classification.md b/docs/use-cases/nlp/zero-shot-text-classification.md
index 2461444fa6..27145311eb 100644
--- a/docs/use-cases/nlp/zero-shot-text-classification.md
+++ b/docs/use-cases/nlp/zero-shot-text-classification.md
@@ -199,7 +199,7 @@ Making a request:
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/zero_shot_text_classification/infer'
 
 # send the data
 obj = {
@@ -238,7 +238,7 @@ Making a request:
 import requests
 
 # Uvicorn is running on this port
-url = 'http://0.0.0.0:5543/predict'
+url = 'http://0.0.0.0:5543/v2/models/zero_shot_text_classification/infer'
 
 # send the data
 obj =  {"sequences": ["The Boston Red Sox are my favorite baseball team!"]}
diff --git a/examples/ultralytics-yolo/ultralytics-readmes/deepsparse-readme.md b/examples/ultralytics-yolo/ultralytics-readmes/deepsparse-readme.md
index 1607e81961..62ce985361 100644
--- a/examples/ultralytics-yolo/ultralytics-readmes/deepsparse-readme.md
+++ b/examples/ultralytics-yolo/ultralytics-readmes/deepsparse-readme.md
@@ -145,8 +145,8 @@ import requests, json
 path = ['basilica.jpg'] 
 files = [('request', open(img, 'rb')) for img in path]
 
-# send request over HTTP to /predict/from_files endpoint
-url = 'http://0.0.0.0:5543/predict/from_files'
+# send request over HTTP to /v2/models/yolo/infer/from_files endpoint
+url = 'http://0.0.0.0:5543/v2/models/yolo/infer/from_files'
 resp = requests.post(url=url, files=files)
 
 # response is returned in JSON