Conditional Ollama on VM GPU only (#208)

gotsysdba · web-flow · commit 1e715e5a7c25 · 2025-07-08T17:03:28.000+01:00
* Conditional Ollama on VM GPU only
* Maintain Helm Charts through Tags
diff --git a/.github/workflows/documentation.yml b/.github/workflows/documentation.yml
@@ -78,11 +78,30 @@ jobs:
             --source docs \
             --baseURL "${{ steps.pages.outputs.base_url }}/"
 
-      - name: Package Helm chart
+      - name: Package Tags Helm chart
+        run: |
+          TAGS=$(git tag --sort=creatordate)
+          for TAG in $TAGS; do
+            echo "Processing tag: $TAG"
+
+            # Create a temporary worktree to avoid changing the current working directory
+            WORKDIR=$(mktemp -d)
+            git worktree add "$WORKDIR" "$TAG"
+
+            # Package Helm chart for this tag
+            helm package "$WORKDIR/helm" -d docs/public/helm --debug
+
+            # Clean up
+            git worktree remove "$WORKDIR"
+          done          
+
+      - name: Package Main Helm chart
         run: |
           mkdir -p docs/public/helm
-          helm package helm -d docs/public/helm
-          helm repo index docs/public/helm --url "${{ steps.pages.outputs.base_url }}/helm"
+          helm package helm -d docs/public/helm --debug
+
+      - name: Index Helm charts
+        run: helm repo index docs/public/helm --url "${{ steps.pages.outputs.base_url }}/helm"
 
       - name: Upload artifact
         uses: actions/upload-pages-artifact@v3
diff --git a/docs/content/advanced/iac.md b/docs/content/advanced/iac.md
@@ -20,10 +20,12 @@ While the **IaC** can be run from a command-line with prior experience, the step
 
 ## Virtual Machine
 
-The Virtual Machine (VM) deployment provisions both the {{< short_app_ref >}} API Server and GUI Client together in an "All-in-One" configuration for experimentation and development.  As part of the deployment, one local Large Language Model and one Embedding Model is made available out-of-the-box.  There will be an option to deploy on a **GPU**, which will be more expensive then a **CPU** but will perform much better with the pre-deployed Models.  
+The Virtual Machine (VM) deployment provisions both the {{< short_app_ref >}} API Server and GUI Client together in an "All-in-One" configuration for experimentation and development.  
 
-{{% notice style="code" title="Soooo Sloooow..." icon="traffic-light" %}}
-If deploying the VM IaC on a **CPU**, we recommend [configuring additional, external models](/client/configuration/model_config) for better performance. 
+There will be an option to deploy on a **GPU**, which will be more expensive then a **CPU** but will, as part of the deployment, make available one local Large Language Model and one Embedding Model for use out-of-the-box. 
+
+{{% notice style="code" title="Models Needed!" icon="traffic-light" %}}
+If deploying the VM IaC on a **CPU**, you will need to [configure a model](/client/configuration/model_config) for functionality. 
 {{% /notice %}}
 
 ### Configure Variables
diff --git a/opentofu/modules/vm/locals.tf b/opentofu/modules/vm/locals.tf
@@ -9,6 +9,7 @@ locals {
     oci_region     = var.region
     db_name        = var.adb_name
     db_password    = var.adb_password
+    install_ollama = var.vm_is_gpu_shape ? true : false
   })
 
   vm_compute_shape = var.vm_is_gpu_shape ? var.compute_gpu_shape : var.compute_cpu_shape
diff --git a/opentofu/modules/vm/templates/cloudinit-compute.tpl b/opentofu/modules/vm/templates/cloudinit-compute.tpl
@@ -40,10 +40,12 @@ write_files:
       #!/bin/env bash
       mkdir -p /app
       chown oracleai:oracleai /app
-      curl -fsSL https://ollama.com/install.sh | sh
-      systemctl enable ollama
-      systemctl daemon-reload
-      systemctl restart ollama
+      if ${install_ollama}; then
+        curl -fsSL https://ollama.com/install.sh | sh
+        systemctl enable ollama
+        systemctl daemon-reload
+        systemctl restart ollama
+      fi
       systemctl stop firewalld.service
       firewall-offline-cmd --zone=public --add-port 8501/tcp
       firewall-offline-cmd --zone=public --add-port 8000/tcp
@@ -85,8 +87,10 @@ write_files:
       unzip -o /tmp/wallet.zip -d /app/tns_admin
 
       # Install Models
-      ollama pull llama3.1
-      ollama pull mxbai-embed-large
+      if ${install_ollama}; then
+        ollama pull llama3.1
+        ollama pull mxbai-embed-large
+      fi
 
       # Wait for python modules to finish
       wait $INSTALL_PID
@@ -100,7 +104,9 @@ write_files:
       export DB_PASSWORD='${db_password}'
       export DB_DSN='${db_name}_TP'
       export DB_WALLET_PASSWORD='${db_password}'
-      export ON_PREM_OLLAMA_URL=http://127.0.0.1:11434
+      if ${install_ollama}; then
+        export ON_PREM_OLLAMA_URL=http://127.0.0.1:11434
+      fi
       # Clean Cache
       find /app -type d -name "__pycache__" -exec rm -rf {} \;
       find /app -type d -name ".numba_cache" -exec rm -rf {} \;
diff --git a/opentofu/provider.tf b/opentofu/provider.tf
@@ -6,7 +6,7 @@ terraform {
   required_providers {
     oci = {
       source  = "oracle/oci"
-      version = "~> 7.7" // Last evaluated 1-Jul-2025
+      version = "~> 7.8" // Last evaluated 8-Jul-2025
     }
   }
   required_version = ">= 1.5"
diff --git a/src/server/bootstrap/model_def.py b/src/server/bootstrap/model_def.py
@@ -159,7 +159,7 @@ def main() -> list[Model]:
             "url": os.environ.get("ON_PREM_OLLAMA_URL", default="http://127.0.0.1:11434"),
             "api_key": "",
             "openai_compat": True,
-            "max_chunk_size": 512,
+            "max_chunk_size": 8192,
         },
     ]
 

Original file line number	Diff line number	Diff line change
`@@ -6,7 +6,7 @@ terraform {`
`6`	`6`	`required_providers {`
`7`	`7`	`oci = {`
`8`	`8`	`source = "oracle/oci"`
`9`		`- version = "~> 7.7" // Last evaluated 1-Jul-2025`
	`9`	`+ version = "~> 7.8" // Last evaluated 8-Jul-2025`
`10`	`10`	`}`
`11`	`11`	`}`
`12`	`12`	`required_version = ">= 1.5"`
Original file line number	Diff line number	Diff line change
`@@ -159,7 +159,7 @@ def main() -> list[Model]:`
`159`	`159`	`"url": os.environ.get("ON_PREM_OLLAMA_URL", default="http://127.0.0.1:11434"),`
`160`	`160`	`"api_key": "",`
`161`	`161`	`"openai_compat": True,`
`162`		`- "max_chunk_size": 512,`
	`162`	`+ "max_chunk_size": 8192,`
`163`	`163`	`},`
`164`	`164`	`]`
`165`	`165`