Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions config.example.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ healthCheckTimeout: 500
# - Valid log levels: debug, info, warn, error
logLevel: info

# logHTTPRequests: enable logging of all HTTP requests
# - optional, default: false
# - when true, logs all incoming HTTP request and response bodies
logHTTPRequests: false

# metricsMaxInMemory: maximum number of metrics to keep in memory
# - optional, default: 1000
# - controls how many metrics are stored in memory before older ones are discarded
Expand Down
2 changes: 2 additions & 0 deletions proxy/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,7 @@ func (c *GroupConfig) UnmarshalYAML(unmarshal func(interface{}) error) error {
type Config struct {
HealthCheckTimeout int `yaml:"healthCheckTimeout"`
LogRequests bool `yaml:"logRequests"`
LogHTTPRequests bool `yaml:"logHTTPRequests"`
LogLevel string `yaml:"logLevel"`
MetricsMaxInMemory int `yaml:"metricsMaxInMemory"`
Models map[string]ModelConfig `yaml:"models"` /* key is model ID */
Expand Down Expand Up @@ -193,6 +194,7 @@ func LoadConfigFromReader(r io.Reader) (Config, error) {
// default configuration values
config := Config{
HealthCheckTimeout: 120,
LogHTTPRequests: false,
StartPort: 5800,
LogLevel: "info",
MetricsMaxInMemory: 1000,
Expand Down
39 changes: 24 additions & 15 deletions proxy/metrics_middleware.go
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,18 @@ func MetricsMiddleware(pm *ProxyManager) gin.HandlerFunc {
return
}

metricsRecorder := &MetricsRecorder{
metricsMonitor: pm.metricsMonitor,
realModelName: realModelName,
isStreaming: gjson.GetBytes(bodyBytes, "stream").Bool(),
startTime: time.Now(),
}
if pm.metricsMonitor.logHTTPRequests {
metricsRecorder.requestBody = bodyBytes
}
Comment on lines +39 to +47
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

💡 Verification agent

🧩 Analysis chain

Consider memory and security implications of request body storage.

The conditional capture of request bodies is functionally correct, but consider the following:

  1. Memory usage: Large request bodies will be held in memory until metrics are processed
  2. PII exposure: Request bodies may contain sensitive user data that shouldn't be logged
  3. Performance: String conversion and storage adds overhead

Consider adding safeguards like size limits or content filtering.


🏁 Script executed:

#!/bin/bash
# Description: Check if there are any size limits or content filtering mechanisms in the codebase

# Search for any existing request size limits or filtering
rg -A 5 -B 5 "size.*limit|limit.*size|max.*body|body.*max" --type go

# Look for any existing PII filtering or sanitization
rg -A 5 -B 5 "sanitiz|filter.*pii|pii.*filter|redact" --type go

Length of output: 5717


Consider adding size limits and PII sanitization when logging request bodies

I searched the codebase and didn’t find any existing limits or redaction for stored HTTP payloads, so the current implementation will retain arbitrarily large or sensitive data in memory. To mitigate memory and security risks, please:

• In proxy/metrics_middleware.go (around lines 39–47), wrap the assignment of metricsRecorder.requestBody with a configurable maximum-size check (e.g., drop or truncate if len(bodyBytes) exceeds a threshold).
• Introduce an allowlist/denylist or redaction step before storing the payload to filter out known PII patterns (e.g., email addresses, credit-card numbers) or sensitive JSON fields.
• Surface these safeguards via your metrics configuration (e.g., pm.metricsMonitor.maxBodySize and pm.metricsMonitor.redactFields) so they can be tuned per deployment.

🤖 Prompt for AI Agents
In proxy/metrics_middleware.go around lines 39 to 47, the current code assigns
the entire request body to metricsRecorder.requestBody without size limits or
PII sanitization. To fix this, add a check against a configurable maxBodySize
from pm.metricsMonitor and truncate or drop the body if it exceeds this size.
Implement a redaction step using a configurable redactFields list to filter or
mask sensitive fields or patterns like emails before storing the payload. Ensure
these configurations are exposed in pm.metricsMonitor for deployment tuning.

writer := &MetricsResponseWriter{
ResponseWriter: c.Writer,
metricsRecorder: &MetricsRecorder{
metricsMonitor: pm.metricsMonitor,
realModelName: realModelName,
isStreaming: gjson.GetBytes(bodyBytes, "stream").Bool(),
startTime: time.Now(),
},
ResponseWriter: c.Writer,
metricsRecorder: metricsRecorder,
}
c.Writer = writer
c.Next()
Expand All @@ -58,6 +62,7 @@ type MetricsRecorder struct {
realModelName string
isStreaming bool
startTime time.Time
requestBody []byte
}

// processBody handles response processing after request completes
Expand All @@ -69,7 +74,8 @@ func (rec *MetricsRecorder) processBody(body []byte) {
}
}

func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
func (rec *MetricsRecorder) parseAndRecordMetrics(responseBody []byte) bool {
jsonData := gjson.ParseBytes(responseBody)
usage := jsonData.Get("usage")
if !usage.Exists() {
return false
Expand All @@ -87,14 +93,19 @@ func (rec *MetricsRecorder) parseAndRecordMetrics(jsonData gjson.Result) bool {
durationMs = int(jsonData.Get("timings.prompt_ms").Float() + jsonData.Get("timings.predicted_ms").Float())
}

rec.metricsMonitor.addMetrics(TokenMetrics{
metrics := TokenMetrics{
Timestamp: time.Now(),
Model: rec.realModelName,
InputTokens: inputTokens,
OutputTokens: outputTokens,
TokensPerSecond: tokensPerSecond,
DurationMs: durationMs,
})
}
if rec.metricsMonitor.logHTTPRequests {
metrics.RequestBody = string(rec.requestBody)
metrics.ResponseBody = string(responseBody)
}
rec.metricsMonitor.addMetrics(metrics)

return true
}
Expand Down Expand Up @@ -126,10 +137,8 @@ func (rec *MetricsRecorder) processStreamingResponse(body []byte) {
continue
}

if gjson.ValidBytes(data) {
if rec.parseAndRecordMetrics(gjson.ParseBytes(data)) {
return // short circuit if a metric was recorded
}
if gjson.ValidBytes(data) && rec.parseAndRecordMetrics(data) {
return // short circuit if a metric was recorded
}
}
}
Expand All @@ -141,7 +150,7 @@ func (rec *MetricsRecorder) processNonStreamingResponse(body []byte) {

// Parse JSON to extract usage information
if gjson.ValidBytes(body) {
rec.parseAndRecordMetrics(gjson.ParseBytes(body))
rec.parseAndRecordMetrics(body)
}
}

Expand Down
15 changes: 10 additions & 5 deletions proxy/metrics_monitor.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ type TokenMetrics struct {
OutputTokens int `json:"output_tokens"`
TokensPerSecond float64 `json:"tokens_per_second"`
DurationMs int `json:"duration_ms"`
RequestBody string `json:"request_body,omitempty"`
ResponseBody string `json:"response_body,omitempty"`
}

// TokenMetricsEvent represents a token metrics event
Expand All @@ -30,10 +32,11 @@ func (e TokenMetricsEvent) Type() uint32 {

// MetricsMonitor parses llama-server output for token statistics
type MetricsMonitor struct {
mu sync.RWMutex
metrics []TokenMetrics
maxMetrics int
nextID int
mu sync.RWMutex
metrics []TokenMetrics
maxMetrics int
nextID int
logHTTPRequests bool
}

func NewMetricsMonitor(config *Config) *MetricsMonitor {
Expand All @@ -43,13 +46,15 @@ func NewMetricsMonitor(config *Config) *MetricsMonitor {
}

mp := &MetricsMonitor{
maxMetrics: maxMetrics,
maxMetrics: maxMetrics,
logHTTPRequests: config.LogHTTPRequests,
}

return mp
}

// addMetrics adds a new metric to the collection and publishes an event
// If logHTTPRequests is enabled, it records the request and response bodies
func (mp *MetricsMonitor) addMetrics(metric TokenMetrics) {
mp.mu.Lock()
defer mp.mu.Unlock()
Expand Down
40 changes: 40 additions & 0 deletions proxy/proxymanager_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -773,3 +773,43 @@ func TestProxyManager_HealthEndpoint(t *testing.T) {
assert.Equal(t, http.StatusOK, rec.Code)
assert.Equal(t, "OK", rec.Body.String())
}

func TestProxyManager_RequestResponseBodyIsRecorded(t *testing.T) {
// Create config with logHTTPRequests enabled
config := AddDefaultGroupToConfig(Config{
HealthCheckTimeout: 15,
Models: map[string]ModelConfig{
"model1": getTestSimpleResponderConfig("model1"),
},
LogLevel: "error",
LogHTTPRequests: true,
MetricsMaxInMemory: 100,
})

proxy := New(config)
defer proxy.StopProcesses(StopWaitForInflightRequest)

// Make a request
reqBody := `{"model":"model1", "prompt": "test prompt"}`
req := httptest.NewRequest("POST", "/v1/chat/completions", bytes.NewBufferString(reqBody))
w := httptest.NewRecorder()

proxy.ServeHTTP(w, req)
assert.Equal(t, http.StatusOK, w.Code)

// Check that metrics were recorded
metrics := proxy.metricsMonitor.GetMetrics()
if !assert.NotEmpty(t, metrics, "metrics should be recorded") {
return
}

// Verify the last metric has request and response bodies
lastMetric := metrics[len(metrics)-1]
assert.NotEmpty(t, lastMetric.RequestBody, "request body should be recorded when logHTTPRequests is true")
assert.NotEmpty(t, lastMetric.ResponseBody, "response body should be recorded when logHTTPRequests is true")

// Verify the content matches what we sent and received
assert.Contains(t, lastMetric.RequestBody, "model1", "request body should contain the model name")
assert.Contains(t, lastMetric.RequestBody, "test prompt", "request body should contain the prompt")
assert.Contains(t, lastMetric.ResponseBody, "model1", "response body should contain the model name")
}
2 changes: 2 additions & 0 deletions ui/src/contexts/APIProvider.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ interface Metrics {
output_tokens: number;
tokens_per_second: number;
duration_ms: number;
request_body?: string;
response_body?: string;
}

interface LogData {
Expand Down
96 changes: 85 additions & 11 deletions ui/src/pages/Activity.tsx
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { useState, useEffect } from "react";
import { useState, useEffect, Fragment } from "react";
import { useAPI } from "../contexts/APIProvider";

const formatTimestamp = (timestamp: string): string => {
Expand All @@ -16,13 +16,26 @@ const formatDuration = (ms: number): string => {
const ActivityPage = () => {
const { metrics } = useAPI();
const [error, setError] = useState<string | null>(null);
const [expandedMetrics, setExpandedMetrics] = useState<Set<string>>(new Set());
const [parseJson, setParseJson] = useState<boolean>(false);

useEffect(() => {
if (metrics.length > 0) {
setError(null);
}
}, [metrics]);

const beautifyJson = (jsonString?: string): string => {
if (typeof jsonString !== "string")
return "";
try {
const parsed = JSON.parse(jsonString);
return JSON.stringify(parsed, null, 2);
} catch (e) {
return jsonString;
}
};

if (error) {
return (
<div className="p-6">
Expand All @@ -34,10 +47,79 @@ const ActivityPage = () => {
);
}

const toggleExpanded = (id: string) => {
setExpandedMetrics(prev => {
const newSet = new Set(prev);
if (newSet.has(id)) {
newSet.delete(id);
} else {
newSet.add(id);
}
return newSet;
});
};

const renderMetricRow = (metric: typeof metrics[0], index: number) => {
const key = `${metric.id}-${index}`;
const isExpanded = expandedMetrics.has(key);
const hasRequestData = metric.request_body && metric.response_body;

return (
<Fragment key={key}>
<tr>
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatTimestamp(metric.timestamp)}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.model}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.input_tokens.toLocaleString()}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.output_tokens.toLocaleString()}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatSpeed(metric.tokens_per_second)}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatDuration(metric.duration_ms)}</td>
{hasRequestData && (
<td className="px-6 py-4 whitespace-nowrap text-sm">
<button
onClick={() => toggleExpanded(key)}
className="text-blue-600 hover:text-blue-800 text-sm font-medium"
>
{isExpanded ? 'Hide' : 'Show'}
</button>
</td>
)}
</tr>
{isExpanded && hasRequestData && (
<tr>
<td colSpan={7} className="px-6 py-4 bg-gray-50 border-t">
<div className="mt-2">
<h4 className="font-bold text-sm mb-2">Request</h4>
<pre className="bg-white p-3 rounded border text-sm whitespace-pre-wrap break-all max-h-40 overflow-y-auto">
<code>{parseJson ? beautifyJson(metric.request_body) : metric.request_body}</code>
</pre>
<h4 className="font-bold text-sm mt-4 mb-2">Response</h4>
<pre className="bg-white p-3 rounded border text-sm whitespace-pre-wrap break-all max-h-40 overflow-y-auto">
<code>{parseJson ? beautifyJson(metric.response_body) : metric.response_body}</code>
</pre>
</div>
</td>
</tr>
)}
</Fragment>
);
};

return (
<div className="p-6">
<h1 className="text-2xl font-bold mb-4">Activity</h1>

<div className="mb-4">
<input
type="checkbox"
id="parse-json"
checked={parseJson}
onChange={(e) => setParseJson(e.target.checked)}
/>
<label htmlFor="parse-json" className="ml-2">
Parse request data as JSON and beautify
</label>
</div>

{metrics.length === 0 ? (
<div className="text-center py-8">
<p className="text-gray-600">No metrics data available</p>
Expand All @@ -53,19 +135,11 @@ const ActivityPage = () => {
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Output Tokens</th>
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Generation Speed</th>
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Duration</th>
<th className="px-6 py-3 text-left text-xs font-medium uppercase tracking-wider">Request data</th>
</tr>
</thead>
<tbody className="divide-y">
{metrics.map((metric, index) => (
<tr key={`${metric.id}-${index}`}>
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatTimestamp(metric.timestamp)}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.model}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.input_tokens.toLocaleString()}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{metric.output_tokens.toLocaleString()}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatSpeed(metric.tokens_per_second)}</td>
<td className="px-6 py-4 whitespace-nowrap text-sm">{formatDuration(metric.duration_ms)}</td>
</tr>
))}
{metrics.map(renderMetricRow)}
</tbody>
</table>
</div>
Expand Down