diff --git a/examples/machinehub/input.json b/examples/machinehub/input.json index b70a965b..3e2cc9c7 100644 --- a/examples/machinehub/input.json +++ b/examples/machinehub/input.json @@ -19,7 +19,8 @@ "metadata": { "name": "app-1-secret-1", "namespace": "team-1" - } + }, + "type": "kubernetes.io/tls" } } ] diff --git a/internal/cyberark/dataupload/dataupload.go b/internal/cyberark/dataupload/dataupload.go index 0f6b95ef..b290404f 100644 --- a/internal/cyberark/dataupload/dataupload.go +++ b/internal/cyberark/dataupload/dataupload.go @@ -12,7 +12,8 @@ import ( "net/http" "net/url" - "k8s.io/apimachinery/pkg/runtime" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/jetstack/preflight/pkg/version" ) @@ -54,29 +55,32 @@ type Snapshot struct { K8SVersion string `json:"k8s_version"` // Secrets is a list of Secret resources in the cluster. Not all Secret // types are included and only a subset of the Secret data is included. - Secrets []runtime.Object `json:"secrets"` + // + // Secrets are obtained by a DynamicClient, so they have type + // *unstructured.Unstructured. + Secrets []*unstructured.Unstructured `json:"secrets"` // ServiceAccounts is a list of ServiceAccount resources in the cluster. - ServiceAccounts []runtime.Object `json:"serviceaccounts"` + ServiceAccounts []client.Object `json:"serviceaccounts"` // Roles is a list of Role resources in the cluster. - Roles []runtime.Object `json:"roles"` + Roles []client.Object `json:"roles"` // ClusterRoles is a list of ClusterRole resources in the cluster. - ClusterRoles []runtime.Object `json:"clusterroles"` + ClusterRoles []client.Object `json:"clusterroles"` // RoleBindings is a list of RoleBinding resources in the cluster. - RoleBindings []runtime.Object `json:"rolebindings"` + RoleBindings []client.Object `json:"rolebindings"` // ClusterRoleBindings is a list of ClusterRoleBinding resources in the cluster. - ClusterRoleBindings []runtime.Object `json:"clusterrolebindings"` + ClusterRoleBindings []client.Object `json:"clusterrolebindings"` // Jobs is a list of Job resources in the cluster. - Jobs []runtime.Object `json:"jobs"` + Jobs []client.Object `json:"jobs"` // CronJobs is a list of CronJob resources in the cluster. - CronJobs []runtime.Object `json:"cronjobs"` + CronJobs []client.Object `json:"cronjobs"` // Deployments is a list of Deployment resources in the cluster. - Deployments []runtime.Object `json:"deployments"` + Deployments []client.Object `json:"deployments"` // Statefulsets is a list of StatefulSet resources in the cluster. - Statefulsets []runtime.Object `json:"statefulsets"` + Statefulsets []client.Object `json:"statefulsets"` // Daemonsets is a list of DaemonSet resources in the cluster. - Daemonsets []runtime.Object `json:"daemonsets"` + Daemonsets []client.Object `json:"daemonsets"` // Pods is a list of Pod resources in the cluster. - Pods []runtime.Object `json:"pods"` + Pods []client.Object `json:"pods"` } // PutSnapshot PUTs the supplied snapshot to an [AWS presigned URL] which it obtains via the CyberArk inventory API. diff --git a/pkg/client/client_cyberark.go b/pkg/client/client_cyberark.go index c2b14f35..b1267d99 100644 --- a/pkg/client/client_cyberark.go +++ b/pkg/client/client_cyberark.go @@ -2,15 +2,23 @@ package client import ( "context" + "crypto/x509" + "encoding/base64" + "encoding/pem" "fmt" "net/http" - "k8s.io/apimachinery/pkg/runtime" + "github.com/go-logr/logr" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" "k8s.io/apimachinery/pkg/util/sets" + "k8s.io/klog/v2" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/jetstack/preflight/api" "github.com/jetstack/preflight/internal/cyberark" "github.com/jetstack/preflight/internal/cyberark/dataupload" + "github.com/jetstack/preflight/pkg/logs" "github.com/jetstack/preflight/pkg/version" ) @@ -40,14 +48,20 @@ func NewCyberArk(httpClient *http.Client) (*CyberArkClient, error) { // PostDataReadingsWithOptions uploads data readings to CyberArk. // It converts the supplied data readings into a snapshot format expected by CyberArk. +// It then minimizes the snapshot to avoid uploading unnecessary data. // It initializes a data upload client with the configured HTTP client and credentials, // then uploads a snapshot. // The supplied Options are not used by this publisher. func (o *CyberArkClient) PostDataReadingsWithOptions(ctx context.Context, readings []*api.DataReading, _ Options) error { + log := klog.FromContext(ctx) var snapshot dataupload.Snapshot if err := convertDataReadings(defaultExtractorFunctions, readings, &snapshot); err != nil { return fmt.Errorf("while converting data readings: %s", err) } + + // Minimize the snapshot to reduce size and improve privacy + minimizeSnapshot(log.V(logs.Debug).WithName("minimizeSnapshot"), &snapshot) + snapshot.AgentVersion = version.PreflightVersion cfg, err := o.configLoader() @@ -86,9 +100,9 @@ func extractClusterIDAndServerVersionFromReading(reading *api.DataReading, targe } // extractResourceListFromReading converts the opaque data from a DynamicData -// data reading to runtime.Object resources, to allow access to the metadata and +// data reading to T resources, to allow access to the metadata and // other kubernetes API fields. -func extractResourceListFromReading(reading *api.DataReading, target *[]runtime.Object) error { +func extractResourceListFromReading[T client.Object](reading *api.DataReading, target *[]T) error { if reading == nil { return fmt.Errorf("programmer mistake: the DataReading must not be nil") } @@ -98,14 +112,15 @@ func extractResourceListFromReading(reading *api.DataReading, target *[]runtime. "programmer mistake: the DataReading must have data type *api.DynamicData. "+ "This DataReading (%s) has data type %T", reading.DataGatherer, reading.Data) } - resources := make([]runtime.Object, len(data.Items)) + resources := make([]T, len(data.Items)) for i, item := range data.Items { - if resource, ok := item.Resource.(runtime.Object); ok { + if resource, ok := item.Resource.(T); ok { resources[i] = resource } else { + expectedType := fmt.Sprintf("%T", new(T))[1:] // strip leading '*' return fmt.Errorf( - "programmer mistake: the DynamicData items must have Resource type runtime.Object. "+ - "This item (%d) has Resource type %T", i, item.Resource) + "programmer mistake: the DynamicData items must have Resource type %s. "+ + "This item (%d) has Resource type %T", expectedType, i, item.Resource) } } *target = resources @@ -190,3 +205,151 @@ func convertDataReadings( } return nil } + +// minimizeSnapshot reduces the size of the snapshot by removing unnecessary data. +// +// This reduces the bandwidth used when uploading the snapshot to CyberArk, +// it reduces the storage used by CyberArk to store the snapshot, and +// it provides better privacy for the cluster being scanned; only the necessary +// data is included in the snapshot. +// +// This is a best-effort attempt to minimize the snapshot size. Errors during +// minimization are logged but do not prevent the snapshot from being uploaded. +// +// It performs the following minimization steps: +// +// 1. Removal of non-clientauth TLS secrets: It filters out TLS secrets that do +// not contain a client certificate. This is done to avoid uploading large +// TLS secrets that are not relevant for the CyberArk Discovery and Context +// service. +func minimizeSnapshot(log logr.Logger, snapshot *dataupload.Snapshot) { + originalSecretCount := len(snapshot.Secrets) + filteredSecrets := make([]*unstructured.Unstructured, 0, originalSecretCount) + for _, secret := range snapshot.Secrets { + log := log.WithValues("name", secret.GetName(), "namespace", secret.GetNamespace()) + if isExcludableSecret(log, secret) { + log.Info("Dropped") + continue + } + filteredSecrets = append(filteredSecrets, secret) + } + snapshot.Secrets = filteredSecrets + log.Info("Minimized snapshot", "originalSecretCount", originalSecretCount, "filteredSecretCount", len(snapshot.Secrets)) +} + +// isExcludableSecret filters out TLS secrets that are definitely of no interest +// to CyberArk's Discovery and Context service, specifically TLS secrets that do +// not contain a client certificate. +// +// The Secret is kept if there is any doubt or if there is a problem decoding +// its contents. +func isExcludableSecret(log logr.Logger, unstructuredObj *unstructured.Unstructured) bool { + if unstructuredObj.GetKind() != "Secret" || unstructuredObj.GetAPIVersion() != "v1" { + log.Info("Object is not a core/v1 Secret", "apiVersion", unstructuredObj.GetAPIVersion(), "kind", unstructuredObj.GetKind()) + return false + } + + secretType, found, err := unstructured.NestedString(unstructuredObj.Object, "type") + if err != nil || !found { + log.Info("Secret object has no type") + return false + } + + if corev1.SecretType(secretType) != corev1.SecretTypeTLS { + log.Info("Secret of this type are never dropped", "type", secretType) + return false + } + + dataMap, found, err := unstructured.NestedMap(unstructuredObj.Object, "data") + if err != nil || !found { + log.Info("Secret data missing or not a map", "error", err, "decision", "drop") + return true + } + + return isExcludableTLSSecret(log, dataMap) +} + +// isExcludableTLSSecret checks if a TLS Secret contains a client certificate. +// It returns true if the Secret is a TLS Secret and its tls.crt does not +// contain a client certificate. +func isExcludableTLSSecret(log logr.Logger, dataMap map[string]interface{}) bool { + tlsCrtRaw, found := dataMap[corev1.TLSCertKey] + if !found { + log.Info("TLS Secret does not contain tls.crt key") + return true + } + + // Decode base64 if necessary (K8s secrets store data as base64-encoded strings) + var tlsCrtBytes []byte + switch v := tlsCrtRaw.(type) { + case string: + decoded, err := base64.StdEncoding.DecodeString(v) + if err != nil { + log.Info("Failed to decode tls.crt base64", "error", err.Error()) + return true + } + tlsCrtBytes = decoded + case []byte: + tlsCrtBytes = v + default: + log.Info("tls.crt is not a string or byte slice", "type", fmt.Sprintf("%T", v)) + return true + } + + // Parse PEM certificate chain + hasClientCert := searchPEM(tlsCrtBytes, func(block *pem.Block) bool { + if block.Type != "CERTIFICATE" || len(block.Bytes) == 0 { + return false + } + cert, err := x509.ParseCertificate(block.Bytes) + if err != nil { + log.Info("Failed to parse PEM block as X.509 certificate", "error", err.Error()) + return false + } + // Check if the certificate has the ClientAuth EKU + return isClientCertificate(cert) + }) + return !hasClientCert +} + +// searchPEM parses the given PEM data and applies the visitor function to each +// PEM block found. If the visitor function returns true for any block, the search +// stops and searchPEM returns true. If no blocks cause the visitor to return true, +// searchPEM returns false. +func searchPEM(data []byte, visitor func(*pem.Block) bool) bool { + if visitor == nil { + return false + } + // Parse the PEM encoded certificate chain + var block *pem.Block + rest := data + for { + block, rest = pem.Decode(rest) + if block == nil { + break + } + if visitor(block) { + return true + } + } + return false +} + +// isClientCertificate checks if the given certificate is a client certificate +// by checking if it has the ClientAuth EKU. +func isClientCertificate(cert *x509.Certificate) bool { + if cert == nil { + return false + } + // Skip CA certificates + if cert.IsCA { + return false + } + // Check if the certificate has the ClientAuth EKU + for _, eku := range cert.ExtKeyUsage { + if eku == x509.ExtKeyUsageClientAuth { + return true + } + } + return false +} diff --git a/pkg/client/client_cyberark_convertdatareadings_test.go b/pkg/client/client_cyberark_convertdatareadings_test.go index 82ab36f3..158d5913 100644 --- a/pkg/client/client_cyberark_convertdatareadings_test.go +++ b/pkg/client/client_cyberark_convertdatareadings_test.go @@ -1,15 +1,23 @@ package client import ( + "crypto/ecdsa" + "crypto/elliptic" + "crypto/rand" + "crypto/x509" + "crypto/x509/pkix" + "encoding/base64" + "encoding/pem" + "math/big" "testing" + "time" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - corev1 "k8s.io/api/core/v1" - metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/apis/meta/v1/unstructured" - "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/version" + "k8s.io/klog/v2/ktesting" + "sigs.k8s.io/controller-runtime/pkg/client" "github.com/jetstack/preflight/api" "github.com/jetstack/preflight/internal/cyberark/dataupload" @@ -145,7 +153,7 @@ func TestExtractResourceListFromReading(t *testing.T) { }, }, }, - expectError: `programmer mistake: the DynamicData items must have Resource type runtime.Object. ` + + expectError: `programmer mistake: the DynamicData items must have Resource type *unstructured.Unstructured. ` + `This item (0) has Resource type *api.DiscoveryData`, }, { @@ -184,7 +192,7 @@ func TestExtractResourceListFromReading(t *testing.T) { } for _, test := range tests { t.Run(test.name, func(t *testing.T) { - var resources []runtime.Object + var resources []*unstructured.Unstructured err := extractResourceListFromReading(test.reading, &resources) if test.expectError != "" { assert.EqualError(t, err, test.expectError) @@ -221,10 +229,14 @@ func TestConvertDataReadings(t *testing.T) { Data: &api.DynamicData{ Items: []*api.GatheredResource{ { - Resource: &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "app-1", - Namespace: "team-1", + Resource: &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "v1", + "kind": "Secret", + "metadata": map[string]interface{}{ + "name": "app-1", + "namespace": "team-1", + }, }, }, }, @@ -283,11 +295,15 @@ func TestConvertDataReadings(t *testing.T) { expectedSnapshot: dataupload.Snapshot{ ClusterID: "success-cluster-id", K8SVersion: "v1.21.0", - Secrets: []runtime.Object{ - &corev1.Secret{ - ObjectMeta: metav1.ObjectMeta{ - Name: "app-1", - Namespace: "team-1", + Secrets: []*unstructured.Unstructured{ + { + Object: map[string]interface{}{ + "apiVersion": "v1", + "kind": "Secret", + "metadata": map[string]interface{}{ + "name": "app-1", + "namespace": "team-1", + }, }, }, }, @@ -309,3 +325,254 @@ func TestConvertDataReadings(t *testing.T) { } } + +// TestMinimizeSnapshot tests the minimizeSnapshot function. +// It creates a snapshot with various secrets and service accounts, runs +// minimizeSnapshot on it, and checks that the resulting snapshot only contains +// the expected secrets and service accounts. +func TestMinimizeSnapshot(t *testing.T) { + secretWithClientCert := newTLSSecret("tls-secret-with-client", sampleCertificateChain(t, x509.ExtKeyUsageClientAuth, x509.ExtKeyUsageServerAuth)) + secretWithoutClientCert := newTLSSecret("tls-secret-without-client", sampleCertificateChain(t, x509.ExtKeyUsageServerAuth)) + opaqueSecret := newOpaqueSecret("opaque-secret") + serviceAccount := &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "v1", + "kind": "ServiceAccount", + "metadata": map[string]interface{}{ + "name": "my-service-account", + "namespace": "default", + }, + }, + } + + type testCase struct { + name string + inputSnapshot dataupload.Snapshot + expectedSnapshot dataupload.Snapshot + } + tests := []testCase{ + { + name: "empty snapshot", + inputSnapshot: dataupload.Snapshot{ + AgentVersion: "v1.0.0", + ClusterID: "cluster-1", + K8SVersion: "v1.21.0", + Secrets: []*unstructured.Unstructured{}, + ServiceAccounts: []client.Object{}, + Roles: []client.Object{}, + }, + expectedSnapshot: dataupload.Snapshot{ + AgentVersion: "v1.0.0", + ClusterID: "cluster-1", + K8SVersion: "v1.21.0", + Secrets: []*unstructured.Unstructured{}, + ServiceAccounts: []client.Object{}, + Roles: []client.Object{}, + }, + }, + { + name: "snapshot with various secrets and service accounts", + inputSnapshot: dataupload.Snapshot{ + AgentVersion: "v1.0.0", + ClusterID: "cluster-1", + K8SVersion: "v1.21.0", + Secrets: []*unstructured.Unstructured{ + secretWithClientCert, + secretWithoutClientCert, + opaqueSecret, + }, + ServiceAccounts: []client.Object{ + serviceAccount, + }, + Roles: []client.Object{}, + }, + expectedSnapshot: dataupload.Snapshot{ + AgentVersion: "v1.0.0", + ClusterID: "cluster-1", + K8SVersion: "v1.21.0", + Secrets: []*unstructured.Unstructured{ + secretWithClientCert, + opaqueSecret, + }, + ServiceAccounts: []client.Object{ + serviceAccount, + }, + Roles: []client.Object{}, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + log := ktesting.NewLogger(t, ktesting.DefaultConfig) + minimizeSnapshot(log, &test.inputSnapshot) + assert.Equal(t, test.expectedSnapshot, test.inputSnapshot) + }) + } +} + +// TestIsExcludableSecret tests the isExcludableSecret function. +func TestIsExcludableSecret(t *testing.T) { + type testCase struct { + name string + secret *unstructured.Unstructured + exclude bool + } + + tests := []testCase{ + { + name: "TLS secret with client cert in tls.crt", + secret: newTLSSecret("tls-secret-with-client", sampleCertificateChain(t, x509.ExtKeyUsageClientAuth)), + exclude: false, + }, + { + name: "TLS secret with non-client cert in tls.crt", + secret: newTLSSecret("tls-secret-without-client", sampleCertificateChain(t, x509.ExtKeyUsageServerAuth)), + exclude: true, + }, + { + name: "Non-secret", + secret: &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "cert-manager/v1", + "kind": "Certificate", + "metadata": map[string]interface{}{ + "name": "non-secret", + "namespace": "default", + }, + }, + }, + exclude: false, + }, + { + name: "Non-TLS secret", + secret: newOpaqueSecret("non-tls-secret"), + exclude: false, + }, + { + name: "TLS secret without tls.crt", + secret: newTLSSecret("tls-secret-with-no-cert", nil), + exclude: true, + }, + { + name: "TLS secret with empty tls.crt", + secret: newTLSSecret("tls-secret-with-empty-cert", ""), + exclude: true, + }, + { + name: "TLS secret with invalid base64 in tls.crt", + secret: newTLSSecret("tls-secret-with-invalid-cert", "invalid-base64"), + exclude: true, + }, + { + name: "TLS secret with invalid PEM in tls.crt", + secret: newTLSSecret("tls-secret-with-invalid-pem", base64.StdEncoding.EncodeToString([]byte("invalid-pem"))), + exclude: true, + }, + } + + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + log := ktesting.NewLogger(t, ktesting.DefaultConfig) + excluded := isExcludableSecret(log, tc.secret) + assert.Equal(t, tc.exclude, excluded, "case: %s", tc.name) + }) + } +} + +// newTLSSecret creates a Kubernetes TLS secret with the given name and certificate data. +// If crt is nil, the secret will not contain a "tls.crt" entry. +func newTLSSecret(name string, crt interface{}) *unstructured.Unstructured { + data := map[string]interface{}{"tls.key": "dummy-key"} + if crt != nil { + data["tls.crt"] = crt + } + return &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "v1", + "kind": "Secret", + "metadata": map[string]interface{}{ + "name": name, + "namespace": "default", + }, + "type": "kubernetes.io/tls", + "data": data, + }, + } +} + +// newOpaqueSecret creates a Kubernetes Opaque secret with the given name. +func newOpaqueSecret(name string) *unstructured.Unstructured { + return &unstructured.Unstructured{ + Object: map[string]interface{}{ + "apiVersion": "v1", + "kind": "Secret", + "metadata": map[string]interface{}{ + "name": name, + "namespace": "default", + }, + "type": "Opaque", + "data": map[string]interface{}{ + "key": "value", + }, + }, + } +} + +// sampleCertificateChain returns a PEM encoded sample certificate chain for testing purposes. +// The leaf certificate is signed by a self-signed CA certificate. +// Uses an eliptic curve key for the CA and leaf certificates for speed. +// The returned string is base64 encoded to match how TLS certificates +// are typically provided in Kubernetes secrets. +func sampleCertificateChain(t testing.TB, usages ...x509.ExtKeyUsage) string { + t.Helper() + + caPrivKey, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + require.NoError(t, err) + + caTemplate := x509.Certificate{ + SerialNumber: big.NewInt(1), + Subject: pkix.Name{ + Organization: []string{"Test CA"}, + CommonName: "Test CA", + }, + NotBefore: time.Now(), + NotAfter: time.Now().Add(24 * time.Hour), + KeyUsage: x509.KeyUsageCertSign | x509.KeyUsageCRLSign, + ExtKeyUsage: []x509.ExtKeyUsage{}, + BasicConstraintsValid: true, + IsCA: true, + } + + caCertDER, err := x509.CreateCertificate(rand.Reader, &caTemplate, &caTemplate, &caPrivKey.PublicKey, caPrivKey) + require.NoError(t, err) + + caCertPEM := pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: caCertDER, + }) + + clientPrivKey, err := ecdsa.GenerateKey(elliptic.P256(), rand.Reader) + require.NoError(t, err) + clientTemplate := x509.Certificate{ + SerialNumber: big.NewInt(2), + Subject: pkix.Name{ + Organization: []string{"Test Organization"}, + CommonName: "example.com", + }, + NotBefore: time.Now(), + NotAfter: time.Now().Add(24 * time.Hour), + KeyUsage: x509.KeyUsageDigitalSignature, + ExtKeyUsage: usages, + } + + clientCertDER, err := x509.CreateCertificate(rand.Reader, &clientTemplate, &caTemplate, &clientPrivKey.PublicKey, caPrivKey) + require.NoError(t, err) + + clientCertPEM := pem.EncodeToMemory(&pem.Block{ + Type: "CERTIFICATE", + Bytes: clientCertDER, + }) + + return base64.StdEncoding.EncodeToString(append(clientCertPEM, caCertPEM...)) +}