diff --git a/templates/CLIP-ViT-Large/README.md b/templates/CLIP-ViT-Large/README.md new file mode 100644 index 0000000..844d119 --- /dev/null +++ b/templates/CLIP-ViT-Large/README.md @@ -0,0 +1,19 @@ +# CLIP ViT-Large/14 + +OpenAI's powerful vision-language model for understanding images and text in a shared embedding space. + +Unleash the power of zero-shot image classification with Nosana! Run CLIP on GPU-backed nodes for instant image recognition without any task-specific training. + +## Key Features +- Vision Transformer (ViT) architecture with 428M parameters +- Zero-shot image classification and retrieval +- Joint image-text embedding space +- State-of-the-art performance on diverse benchmarks +- Robust to distribution shifts + +## Configuration +- Port: 9000 +- GPU: Required (4GB VRAM) +- REST API for image classification and embedding +- Supports both vision and text modalities +- Handles arbitrary image classification tasks diff --git a/templates/CLIP-ViT-Large/info.json b/templates/CLIP-ViT-Large/info.json new file mode 100644 index 0000000..9085c42 --- /dev/null +++ b/templates/CLIP-ViT-Large/info.json @@ -0,0 +1,9 @@ +{ + "id": "clip-vit-large", + "name": "CLIP ViT-Large/14", + "description": "OpenAI's Vision-Language model for zero-shot image classification and embedding", + "category": ["Image classification", "New", "Multimodal"], + "icon": "https://storage.googleapis.com/sf-blog-images/2020/10/openAI_logo.png", + "github_url": "https://github.com/openai/CLIP", + "version": "1.0.0" +} \ No newline at end of file diff --git a/templates/CLIP-ViT-Large/job-definition.json b/templates/CLIP-ViT-Large/job-definition.json new file mode 100644 index 0000000..60d44f4 --- /dev/null +++ b/templates/CLIP-ViT-Large/job-definition.json @@ -0,0 +1,30 @@ +{ + "ops": [ + { + "id": "clip-vit-large", + "args": { + "cmd": [ + "/bin/sh", + "-c", + "python3 -m transformers.pipelines --task zero-shot-image-classification --model openai/clip-vit-large-patch14 --device 0 --port 9000 --host 0.0.0.0" + ], + "env": { + "HF_TOKEN": "fill_in_your_huggingface_token" + }, + "gpu": true, + "image": "huggingface/transformers-pytorch:latest", + "expose": 9000, + "entrypoint": [] + }, + "type": "container/run" + } + ], + "meta": { + "trigger": "dashboard", + "system_requirements": { + "required_vram": 4 + } + }, + "type": "container", + "version": "0.1" +} \ No newline at end of file diff --git a/templates/Liquid-V1-7B/README.md b/templates/Liquid-V1-7B/README.md new file mode 100644 index 0000000..34282af --- /dev/null +++ b/templates/Liquid-V1-7B/README.md @@ -0,0 +1,19 @@ +# Liquid V1 7B + +A multimodal large language model capable of any-to-any generation - text to image, image to text, or text to text. + +Unleash the power of multimodal AI with Nosana! Run this advanced Liquid model on GPU-backed nodes for versatile text and image generation capabilities. + +## Key Features +- Built on Google's Gemma 7B architecture +- 8.56B parameters with BF16 precision +- Any-to-any generation capabilities +- Image understanding and generation in a single model +- No external visual embeddings (like CLIP) required + +## Configuration +- Port: 9000 +- GPU: Required (16GB VRAM) +- REST API for multimodal processing +- HuggingFace Transformers backend + diff --git a/templates/Liquid-V1-7B/info.json b/templates/Liquid-V1-7B/info.json new file mode 100644 index 0000000..080d938 --- /dev/null +++ b/templates/Liquid-V1-7B/info.json @@ -0,0 +1,9 @@ +{ + "id": "liquid-v1-7b", + "name": "Liquid V1 7B", + "description": "Multimodal LLM capable of both image understanding and generation", + "category": ["Multimodal", "LLM", "Image Generation","API"], + "icon": "https://huggingface.co/front/assets/huggingface_logo.svg", + "github_url": "https://github.com/Junfeng5/Liquid", + "version": "1.0.0" +} \ No newline at end of file diff --git a/templates/Liquid-V1-7B/job-definition.json b/templates/Liquid-V1-7B/job-definition.json new file mode 100644 index 0000000..b18449d --- /dev/null +++ b/templates/Liquid-V1-7B/job-definition.json @@ -0,0 +1,30 @@ +{ + "ops": [ + { + "id": "liquid-v1-7b", + "args": { + "cmd": [ + "/bin/sh", + "-c", + "python3 -m transformers.pipelines --task any-to-any --model Junfeng5/Liquid_V1_7B --device 0 --port 9000 --host 0.0.0.0" + ], + "env": { + "HF_TOKEN": "fill_in_your_huggingface_token" + }, + "gpu": true, + "image": "huggingface/transformers-pytorch:latest", + "expose": 9000, + "entrypoint": [] + }, + "type": "container/run" + } + ], + "meta": { + "trigger": "dashboard", + "system_requirements": { + "required_vram": 16 + } + }, + "type": "container", + "version": "0.1" +} \ No newline at end of file diff --git a/templates/Open-Sora-v2/README.md b/templates/Open-Sora-v2/README.md new file mode 100644 index 0000000..153ba80 --- /dev/null +++ b/templates/Open-Sora-v2/README.md @@ -0,0 +1,19 @@ +# Open-Sora v2 + +An open-source text-to-video and image-to-video generation model designed to democratize efficient video production. + +Unleash the power of cutting-edge video generation with Nosana! Run Open-Sora v2 on GPU-backed nodes for high-quality video content creation from text prompts or reference images. + +## Key Features +- 11B parameter model supporting 256px and 768px resolution +- Text-to-video and image-to-video generation in one model +- Multiple aspect ratios (16:9, 9:16, 1:1, 2.39:1) +- Adjustable motion scores for creative control +- Comparable quality to proprietary models like HunyuanVideo and Step-Video + +## Configuration +- Port: 9000 +- GPU: Required (44GB VRAM for optimal performance) +- Multi-GPU support with ColossalAI +- REST API for video generation requests +- Apache 2.0 licensed open-source project diff --git a/templates/Open-Sora-v2/info.json b/templates/Open-Sora-v2/info.json new file mode 100644 index 0000000..dba53fa --- /dev/null +++ b/templates/Open-Sora-v2/info.json @@ -0,0 +1,9 @@ +{ + "id": "open-sora-v2", + "name": "Open-Sora v2", + "description": "Open-source text-to-video and image-to-video generation model", + "category": ["Text to Video", "Image to Video", "Multimodel"], + "icon": "https://huggingface.co/front/assets/huggingface_logo.svg", + "github_url": "https://github.com/hpcaitech/Open-Sora", + "version": "1.0.0" +} \ No newline at end of file diff --git a/templates/Open-Sora-v2/job-definition.json b/templates/Open-Sora-v2/job-definition.json new file mode 100644 index 0000000..e10ae7f --- /dev/null +++ b/templates/Open-Sora-v2/job-definition.json @@ -0,0 +1,30 @@ +{ + "ops": [ + { + "id": "open-sora-v2", + "args": { + "cmd": [ + "/bin/sh", + "-c", + "cd /app && torchrun --nproc_per_node 1 --standalone scripts/diffusion/inference.py configs/diffusion/inference/t2i2v_256px.py --save-dir /output --port 9000 --host 0.0.0.0" + ], + "env": { + "HF_TOKEN": "fill_in_your_huggingface_token" + }, + "gpu": true, + "image": "hpcaitech/open-sora:latest", + "expose": 9000, + "entrypoint": [] + }, + "type": "container/run" + } + ], + "meta": { + "trigger": "dashboard", + "system_requirements": { + "required_vram": 44 + } + }, + "type": "container", + "version": "0.1" +} \ No newline at end of file diff --git a/templates/Playground-v2.5/README.md b/templates/Playground-v2.5/README.md new file mode 100644 index 0000000..a946a3f --- /dev/null +++ b/templates/Playground-v2.5/README.md @@ -0,0 +1,20 @@ +# Playground v2.5 1024px Aesthetic + +A state-of-the-art text-to-image diffusion model that generates highly aesthetic images at 1024x1024 resolution. + +Unleash the power of cutting-edge image generation with Nosana! Run Playground v2.5 on GPU-backed nodes for stunning visual creations with unmatched aesthetic quality. + +## Key Features +- Top-performing open-source image generation model +- Outperforms SDXL, DALL-E 3, and Midjourney 5.2 in user studies +- 1024x1024 resolution with multi-aspect ratio support +- Enhanced human preference alignment +- Exceptional detail and aesthetic quality + +## Configuration +- Port: 9000 +- GPU: Required (12GB VRAM) +- REST API for text-to-image generation +- Based on Stable Diffusion XL architecture +- Uses EDMDPMSolverMultistepScheduler for crisp details + diff --git a/templates/Playground-v2.5/info.json b/templates/Playground-v2.5/info.json new file mode 100644 index 0000000..d213a1d --- /dev/null +++ b/templates/Playground-v2.5/info.json @@ -0,0 +1,9 @@ +{ + "id": "playground-v2.5", + "name": "Playground v2.5 1024px Aesthetic", + "description": "State-of-the-art text-to-image diffusion model for high-quality aesthetic images", + "category": ["Text to Image", "Multimodel", "API"], + "icon": "https://huggingface.co/front/assets/huggingface_logo.svg", + "github_url": "https://github.com/playgroundai/playground-v2.5", + "version": "1.0.0" +} \ No newline at end of file diff --git a/templates/Playground-v2.5/job-definition.json b/templates/Playground-v2.5/job-definition.json new file mode 100644 index 0000000..c74e46a --- /dev/null +++ b/templates/Playground-v2.5/job-definition.json @@ -0,0 +1,30 @@ +{ + "ops": [ + { + "id": "playground-v2.5", + "args": { + "cmd": [ + "/bin/sh", + "-c", + "python3 -m diffusers.pipelines.stable_diffusion_xl --model playgroundai/playground-v2.5-1024px-aesthetic --device 0 --port 9000 --host 0.0.0.0 --dtype float16" + ], + "env": { + "HF_TOKEN": "fill_in_your_huggingface_token" + }, + "gpu": true, + "image": "huggingface/diffusers:latest", + "expose": 9000, + "entrypoint": [] + }, + "type": "container/run" + } + ], + "meta": { + "trigger": "dashboard", + "system_requirements": { + "required_vram": 12 + } + }, + "type": "container", + "version": "0.1" +} \ No newline at end of file diff --git a/templates/gender-classification/README.md b/templates/gender-classification/README.md new file mode 100644 index 0000000..acd682f --- /dev/null +++ b/templates/gender-classification/README.md @@ -0,0 +1,47 @@ +# Gender Classification + +Vision Transformer model for binary gender classification (male/female). + +## Specs + +- 85.8M parameters +- 92.4% accuracy +- PyTorch/Transformers + +## Features + +- 85.8M parameter ViT model +- 92.4% accuracy on gender detection +- Classifies images into male/female categories +- Fast inference with minimal resource requirements + +## Running the Model + +This container provides a REST API endpoint for gender classification: + +```bash +# Example API call +curl -X POST \ + http://localhost:9000/predict \ + -H 'Content-Type: application/json' \ + -d '{"image_url": "https://example.com/image.jpg"}' +``` + +## Hardware Requirements + +- GPU with 4GB VRAM (recommended) +- 4GB system RAM + +## Requirements + +- NVIDIA GPU (recommended for inference) +- 4GB RAM minimum +- PyTorch environment with Transformers library + +## Usage + +The model can be used for various applications such as user analytics, content personalization, or demographic studies where gender detection is required. + +## License + +Please refer to the model card on Hugging Face for licensing information \ No newline at end of file diff --git a/templates/gender-classification/info.json b/templates/gender-classification/info.json new file mode 100644 index 0000000..0c325b2 --- /dev/null +++ b/templates/gender-classification/info.json @@ -0,0 +1,9 @@ +{ + "id": "gender-classification", + "name": "Gender Classification", + "description": "Binary gender classification model using Vision Transformer for male/female detection", + "category": ["API","New" , "Image Classification"], + "icon": "https://huggingface.co/front/assets/huggingface_logo.svg", + "github_url": "https://github.com/huggingface/transformers", + "version": "1.0.0" +} \ No newline at end of file diff --git a/templates/gender-classification/job-definition.json b/templates/gender-classification/job-definition.json new file mode 100644 index 0000000..3655939 --- /dev/null +++ b/templates/gender-classification/job-definition.json @@ -0,0 +1,30 @@ +{ + "ops": [ + { + "id": "gender-classification", + "args": { + "cmd": [ + "/bin/sh", + "-c", + "python3 -m transformers.pipelines --task image-classification --model rizvandwiki/gender-classification --device 0 --port 9000 --host 0.0.0.0" + ], + "env": { + "HF_TOKEN": "fill_in_your_huggingface_token" + }, + "gpu": true, + "image": "huggingface/transformers-pytorch:latest", + "expose": 9000, + "entrypoint": [] + }, + "type": "container/run" + } + ], + "meta": { + "trigger": "dashboard", + "system_requirements": { + "required_vram": 4 + } + }, + "type": "container", + "version": "0.1" +} \ No newline at end of file