feat(python-client): add new parameters (#118)

OlivierDehaene · web-flow · commit d8dc8f1b0c97 · 2023-03-09T16:05:33.000+01:00
diff --git a/clients/python/README.md b/clients/python/README.md
@@ -133,6 +133,22 @@ class FinishReason(Enum):
     StopSequence = "stop_sequence"
 
 
+# Additional sequences when using the `best_of` parameter
+class BestOfSequence:
+    # Generated text
+    generated_text: str
+    # Generation finish reason
+    finish_reason: FinishReason
+    # Number of generated tokens
+    generated_tokens: int
+    # Sampling seed if sampling was activated
+    seed: Optional[int]
+    # Prompt tokens
+    prefill: List[PrefillToken]
+    # Generated tokens
+    tokens: List[Token]
+
+
 # `generate` details
 class Details:
     # Generation finish reason
@@ -145,6 +161,8 @@ class Details:
     prefill: List[PrefillToken]
     # Generated tokens
     tokens: List[Token]
+    # Additional sequences when using the `best_of` parameter
+    best_of_sequences: Optional[List[BestOfSequence]]
 
 
 # `generate` return value
diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "text-generation"
-version = "0.2.1"
+version = "0.3.0"
 description = "Hugging Face Text Generation Python Client"
 license = "Apache-2.0"
 authors = ["Olivier Dehaene <olivier@huggingface.co>"]
diff --git a/clients/python/tests/conftest.py b/clients/python/tests/conftest.py
@@ -4,11 +4,6 @@
 from huggingface_hub.utils import build_hf_headers
 
 
-@pytest.fixture
-def bloom_model():
-    return "bigscience/bloom"
-
-
 @pytest.fixture
 def flan_t5_xxl():
     return "google/flan-t5-xxl"
diff --git a/clients/python/tests/test_client.py b/clients/python/tests/test_client.py
@@ -5,24 +5,32 @@
 from text_generation.types import FinishReason, PrefillToken, Token
 
 
-def test_generate(bloom_url, hf_headers):
-    client = Client(bloom_url, hf_headers)
+def test_generate(flan_t5_xxl_url, hf_headers):
+    client = Client(flan_t5_xxl_url, hf_headers)
     response = client.generate("test", max_new_tokens=1)
 
-    assert response.generated_text == "."
+    assert response.generated_text == ""
     assert response.details.finish_reason == FinishReason.Length
     assert response.details.generated_tokens == 1
     assert response.details.seed is None
     assert len(response.details.prefill) == 1
-    assert response.details.prefill[0] == PrefillToken(
-        id=9234, text="test", logprob=None
-    )
+    assert response.details.prefill[0] == PrefillToken(id=0, text="<pad>", logprob=None)
     assert len(response.details.tokens) == 1
     assert response.details.tokens[0] == Token(
-        id=17, text=".", logprob=-1.75, special=False
+        id=3, text=" ", logprob=-1.984375, special=False
     )
 
 
+def test_generate_best_of(flan_t5_xxl_url, hf_headers):
+    client = Client(flan_t5_xxl_url, hf_headers)
+    response = client.generate("test", max_new_tokens=1, best_of=2, do_sample=True)
+
+    assert response.details.seed is not None
+    assert response.details.best_of_sequences is not None
+    assert len(response.details.best_of_sequences) == 1
+    assert response.details.best_of_sequences[0].seed is not None
+
+
 def test_generate_not_found(fake_url, hf_headers):
     client = Client(fake_url, hf_headers)
     with pytest.raises(NotFoundError):
@@ -35,16 +43,16 @@ def test_generate_validation_error(flan_t5_xxl_url, hf_headers):
         client.generate("test", max_new_tokens=10_000)
 
 
-def test_generate_stream(bloom_url, hf_headers):
-    client = Client(bloom_url, hf_headers)
+def test_generate_stream(flan_t5_xxl_url, hf_headers):
+    client = Client(flan_t5_xxl_url, hf_headers)
     responses = [
         response for response in client.generate_stream("test", max_new_tokens=1)
     ]
 
     assert len(responses) == 1
     response = responses[0]
 
-    assert response.generated_text == "."
+    assert response.generated_text == ""
     assert response.details.finish_reason == FinishReason.Length
     assert response.details.generated_tokens == 1
     assert response.details.seed is None
@@ -63,21 +71,19 @@ def test_generate_stream_validation_error(flan_t5_xxl_url, hf_headers):
 
 
 @pytest.mark.asyncio
-async def test_generate_async(bloom_url, hf_headers):
-    client = AsyncClient(bloom_url, hf_headers)
+async def test_generate_async(flan_t5_xxl_url, hf_headers):
+    client = AsyncClient(flan_t5_xxl_url, hf_headers)
     response = await client.generate("test", max_new_tokens=1)
 
-    assert response.generated_text == "."
+    assert response.generated_text == ""
     assert response.details.finish_reason == FinishReason.Length
     assert response.details.generated_tokens == 1
     assert response.details.seed is None
     assert len(response.details.prefill) == 1
-    assert response.details.prefill[0] == PrefillToken(
-        id=9234, text="test", logprob=None
-    )
+    assert response.details.prefill[0] == PrefillToken(id=0, text="<pad>", logprob=None)
     assert len(response.details.tokens) == 1
     assert response.details.tokens[0] == Token(
-        id=17, text=".", logprob=-1.75, special=False
+        id=3, text=" ", logprob=-1.984375, special=False
     )
 
 
@@ -96,16 +102,16 @@ async def test_generate_async_validation_error(flan_t5_xxl_url, hf_headers):
 
 
 @pytest.mark.asyncio
-async def test_generate_stream_async(bloom_url, hf_headers):
-    client = AsyncClient(bloom_url, hf_headers)
+async def test_generate_stream_async(flan_t5_xxl_url, hf_headers):
+    client = AsyncClient(flan_t5_xxl_url, hf_headers)
     responses = [
         response async for response in client.generate_stream("test", max_new_tokens=1)
     ]
 
     assert len(responses) == 1
     response = responses[0]
 
-    assert response.generated_text == "."
+    assert response.generated_text == ""
     assert response.details.finish_reason == FinishReason.Length
     assert response.details.generated_tokens == 1
     assert response.details.seed is None
diff --git a/clients/python/tests/test_inference_api.py b/clients/python/tests/test_inference_api.py
@@ -14,8 +14,8 @@ def test_get_supported_models():
     assert isinstance(get_supported_models(), list)
 
 
-def test_client(bloom_model):
-    client = InferenceAPIClient(bloom_model)
+def test_client(flan_t5_xxl):
+    client = InferenceAPIClient(flan_t5_xxl)
     assert isinstance(client, Client)
 
 
@@ -24,8 +24,8 @@ def test_client_unsupported_model(unsupported_model):
         InferenceAPIClient(unsupported_model)
 
 
-def test_async_client(bloom_model):
-    client = InferenceAPIAsyncClient(bloom_model)
+def test_async_client(flan_t5_xxl):
+    client = InferenceAPIAsyncClient(flan_t5_xxl)
     assert isinstance(client, AsyncClient)
 
 
diff --git a/clients/python/tests/test_types.py b/clients/python/tests/test_types.py
@@ -1,10 +1,20 @@
 import pytest
 
-from text_generation.types import Parameters
+from text_generation.types import Parameters, Request
 from text_generation.errors import ValidationError
 
 
 def test_parameters_validation():
+    # Test best_of
+    Parameters(best_of=1)
+    with pytest.raises(ValidationError):
+        Parameters(best_of=0)
+    with pytest.raises(ValidationError):
+        Parameters(best_of=-1)
+    Parameters(best_of=2, do_sample=True)
+    with pytest.raises(ValidationError):
+        Parameters(best_of=2)
+
     # Test repetition_penalty
     Parameters(repetition_penalty=1)
     with pytest.raises(ValidationError):
@@ -32,8 +42,41 @@ def test_parameters_validation():
         Parameters(top_k=-1)
 
     # Test top_p
-    Parameters(top_p=1)
+    Parameters(top_p=0.5)
     with pytest.raises(ValidationError):
         Parameters(top_p=0)
     with pytest.raises(ValidationError):
         Parameters(top_p=-1)
+    with pytest.raises(ValidationError):
+        Parameters(top_p=1)
+
+    # Test truncate
+    Parameters(truncate=1)
+    with pytest.raises(ValidationError):
+        Parameters(truncate=0)
+    with pytest.raises(ValidationError):
+        Parameters(truncate=-1)
+
+    # Test typical_p
+    Parameters(typical_p=0.5)
+    with pytest.raises(ValidationError):
+        Parameters(typical_p=0)
+    with pytest.raises(ValidationError):
+        Parameters(typical_p=-1)
+    with pytest.raises(ValidationError):
+        Parameters(typical_p=1)
+
+
+def test_request_validation():
+    Request(inputs="test")
+
+    with pytest.raises(ValidationError):
+        Request(inputs="")
+
+    Request(inputs="test", stream=True)
+    Request(inputs="test", parameters=Parameters(best_of=2, do_sample=True))
+
+    with pytest.raises(ValidationError):
+        Request(
+            inputs="test", parameters=Parameters(best_of=2, do_sample=True), stream=True
+        )
diff --git a/clients/python/text_generation/__init__.py b/clients/python/text_generation/__init__.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-__version__ = "0.2.1"
+__version__ = "0.3.0"
 
 from text_generation.client import Client, AsyncClient
 from text_generation.inference_api import InferenceAPIClient, InferenceAPIAsyncClient
diff --git a/clients/python/text_generation/client.py b/clients/python/text_generation/client.py
diff --git a/clients/python/text_generation/types.py b/clients/python/text_generation/types.py