Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion Dockerfile.neuron
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ RUN mkdir -p /tgi
# Fetch the optimum-neuron sources directly to avoid relying on pypi deployments
FROM alpine AS optimum-neuron
RUN mkdir -p /optimum-neuron
ADD https://github.com/huggingface/optimum-neuron/archive/refs/tags/v0.2.0.tar.gz /optimum-neuron/sources.tar.gz
ADD https://github.com/huggingface/optimum-neuron/archive/refs/tags/v0.2.2.tar.gz /optimum-neuron/sources.tar.gz
RUN tar -C /optimum-neuron -xf /optimum-neuron/sources.tar.gz --strip-components=1

# Build cargo components (adapted from TGI original Dockerfile)
Expand Down
18 changes: 18 additions & 0 deletions integration-tests/fixtures/neuron/export_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,15 @@
"auto_cast_type": "fp16",
},
},
"qwen3": {
"model_id": "Qwen/Qwen3-1.7B",
"export_kwargs": {
"batch_size": 4,
"sequence_length": 4096,
"num_cores": 2,
"auto_cast_type": "bf16",
},
},
"granite": {
"model_id": "ibm-granite/granite-3.1-2b-instruct",
"export_kwargs": {
Expand All @@ -55,6 +64,15 @@
"auto_cast_type": "bf16",
},
},
"phi3": {
"model_id": "microsoft/Phi-3-mini-4k-instruct",
"export_kwargs": {
"batch_size": 4,
"sequence_length": 4096,
"num_cores": 2,
"auto_cast_type": "bf16",
},
},
}


Expand Down
12 changes: 8 additions & 4 deletions integration-tests/neuron/test_generate.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,10 @@ async def test_model_single_request(tgi_service):
assert response.details.generated_tokens == 17
greedy_expectations = {
"llama": " and how does it work?\nDeep learning is a subset of machine learning that uses artificial",
"qwen2": " - Part 1\n\nDeep Learning is a subset of Machine Learning that is based on",
"granite": "\n\nDeep Learning is a subset of Machine Learning, which is a branch of Art",
"qwen2": " - Deep Learning is a subset of Machine Learning that involves the use of artificial neural networks",
"granite": "\n\nDeep learning is a subset of machine learning techniques based on artificial neural networks",
"qwen3": " A Deep Learning is a subset of machine learning that uses neural networks with multiple layers to",
"phi3": "\n\nDeep learning is a subfield of machine learning that focuses on creating",
}
assert response.generated_text == greedy_expectations[service_name]

Expand Down Expand Up @@ -78,8 +80,10 @@ async def test_model_multiple_requests(tgi_service, neuron_generate_load):
assert len(responses) == 4
expectations = {
"llama": "Deep learning is a subset of machine learning that uses artificial",
"qwen2": "Deep Learning is a subset of Machine Learning that is based on",
"granite": "Deep Learning is a subset of Machine Learning, which is a branch of Art",
"qwen2": "Deep Learning is a subset of Machine Learning that involves",
"granite": "Deep learning is a subset of machine learning techniques",
"qwen3": "Deep Learning is a subset of machine learning that uses neural networks",
"phi3": "Deep learning is a subfield of machine learning that focuses on creating",
}
expected = expectations[tgi_service.client.service_name]
for r in responses:
Expand Down
Loading