elastic · mrodm · Feb 27, 2025 · Feb 21, 2025 · Feb 21, 2025 · Feb 24, 2025
diff --git a/docs/howto/system_testing.md b/docs/howto/system_testing.md
@@ -434,6 +434,10 @@ for system tests.
 | skip_transform_validation | boolean |  | Disable or enable the transforms validation performed in system tests. |
 | vars | dictionary |  | Package level variables to set (i.e. declared in `$package_root/manifest.yml`). If not specified the defaults from the manifest are used. |
 | wait_for_data_timeout | duration |  | Amount of time to wait for data to be present in Elasticsearch. Defaults to 10m. |
+| assert.hit_count | integer |  | Exact number of documents to wait for being ingested. |
+| assert.min_count | integer |  | Minimum number of documents to wait for being ingested. |
+| assert.fields_present | []string|  | List of fields that must be present in the documents to stop waiting for new documents. |
+| assert.ingestion_idle_time | duration |  | Minimum time elapsed since the last document was ingested. |
 
 For example, the `apache/access` data stream's `test-access-log-config.yml` is
 shown below.
@@ -470,7 +474,25 @@ you can use the `input` option to select the stream to test. The first stream
 whose input type matches the `input` value will be tested. By default, the first
 stream declared in the manifest will be tested.
 
-To add an assertion on the number of hits in a given system test, consider this example from the `httpjson/generic` data stream's `test-expected-hit-count-config.yml`, shown below.
+#### Available assertions to wait for documents
+
+System tests allow to define different conditions to collect data from the integration service and index it into the correct Elasticsearch data stream.
+
+By default, `elastic-package` waits until there are more than zero documents ingested. The exact number of documents to be
+validated in this default scenario depends on how fast the documents are ingested.
+
+There are other 4 options available:
+- Wait for collecting exactly `assert.hit_count` documents into the data stream.
+    - It will fail if the final number of documents ingested into Elasticsearch is different from `assert.hit_count` documents.
+- Wait for collecting at least `assert.min_count` documents into the data stream.
+    - Once there have been `assert.min_count` or more documents ingested, `elastic-package` will proceed to validate the documents.
+    - This could be used to ensure that a wide range of different documents have been ingested into Elasticsearch.
+- Collect data into the data stream until all the fields defined in the list `assert.fields_present` are present in any of the documents.
+    - Each field in that list could be present in different documents.
+
+The following example shows how to add an assertion on the number of hits in a given system test using `assert.hit_count`.
+
+Consider this example from the `httpjson/generic` data stream's `test-expected-hit-count-config.yml`, shown below.
 
 ```yaml
 input: httpjson
@@ -519,6 +541,11 @@ inserts the value of `response_split` from the test configuration into the integ
 
 Returning to `test-expected-hit-count-config.yml`, when `assert.hit_count` is defined and `> 0` the test will assert that the number of hits in the array matches that value and fail when this is not true.
 
+#### Defining new Elastic Agents for a given test
+
+System tests allow to create specific an Elsatic Agent for each test with custom settings or additional software.
+Elastic Agents can be customized by defining the needed `agent.*` settings.
+
 As an example to add settings to create a new Elastic Agent in a given test,
 the`auditd_manager/audtid` data stream's `test-default-config.yml` is shown below:
 

diff --git a/internal/testrunner/runners/system/test_config.go b/internal/testrunner/runners/system/test_config.go
@@ -45,8 +45,14 @@ type testConfig struct {
 	SkipTransformValidation bool `config:"skip_transform_validation"`
 
 	Assert struct {
-		// Expected number of hits for a given test
+		// HitCount expected number of hits for a given test
 		HitCount int `config:"hit_count"`
+
+		// MinCount minimum number of hits for a given test
+		MinCount int `config:"min_count"`
+
+		// FieldsPresent list of fields that must be present in any of documents ingested
+		FieldsPresent []string `config:"fields_present"`
 	} `config:"assert"`
 
 	// NumericKeywordFields holds a list of fields that have keyword

diff --git a/internal/testrunner/runners/system/tester.go b/internal/testrunner/runners/system/tester.go
@@ -1355,51 +1355,7 @@ func (r *tester) prepareScenario(ctx context.Context, config *testConfig, stackC
 		return &scenario, nil
 	}
 
-	// Use custom timeout if the service can't collect data immediately.
-	waitForDataTimeout := waitForDataDefaultTimeout
-	if config.WaitForDataTimeout > 0 {
-		waitForDataTimeout = config.WaitForDataTimeout
-	}
-
-	// (TODO in future) Optionally exercise service to generate load.
-	logger.Debugf("checking for expected data in data stream (%s)...", waitForDataTimeout)
-	var hits *hits
-	oldHits := 0
-	passed, waitErr := wait.UntilTrue(ctx, func(ctx context.Context) (bool, error) {
-		var err error
-		hits, err = r.getDocs(ctx, scenario.dataStream)
-		if err != nil {
-			return false, err
-		}
-
-		if r.checkFailureStore {
-			failureStore, err := r.getFailureStoreDocs(ctx, scenario.dataStream)
-			if err != nil {
-				return false, fmt.Errorf("failed to check failure store: %w", err)
-			}
-			if n := len(failureStore); n > 0 {
-				// Interrupt loop earlier if there are failures in the document store.
-				logger.Debugf("Found %d hits in the failure store for %s", len(failureStore), scenario.dataStream)
-				return true, nil
-			}
-		}
-
-		if config.Assert.HitCount > 0 {
-			if hits.size() < config.Assert.HitCount {
-				return false, nil
-			}
-
-			ret := hits.size() == oldHits
-			if !ret {
-				oldHits = hits.size()
-				time.Sleep(4 * time.Second)
-			}
-
-			return ret, nil
-		}
-
-		return hits.size() > 0, nil
-	}, 1*time.Second, waitForDataTimeout)
+	hits, waitErr := r.waitForDocs(ctx, config, scenario.dataStream)
 
 	// before checking "waitErr" error , it is necessary to check if the service has finished with error
 	// to report it as a test case failed
@@ -1417,10 +1373,6 @@ func (r *tester) prepareScenario(ctx context.Context, config *testConfig, stackC
 		return nil, waitErr
 	}
 
-	if !passed {
-		return nil, testrunner.ErrTestCaseFailed{Reason: fmt.Sprintf("could not find hits in %s data stream", scenario.dataStream)}
-	}
-
 	// Get deprecation warnings after ensuring that there are ingested docs and thus the
 	// data stream exists.
 	scenario.deprecationWarnings, err = r.getDeprecationWarnings(ctx, scenario.dataStream)
@@ -1583,6 +1535,117 @@ func (r *tester) createServiceStateDir() error {
 	return nil
 }
 
+func (r *tester) waitForDocs(ctx context.Context, config *testConfig, dataStream string) (*hits, error) {
+	// Use custom timeout if the service can't collect data immediately.
+	waitForDataTimeout := waitForDataDefaultTimeout
+	if config.WaitForDataTimeout > 0 {
+		waitForDataTimeout = config.WaitForDataTimeout
+	}
+
+	if config.Assert.HitCount > elasticsearchQuerySize {
+		return nil, fmt.Errorf("invalid value for assert.hit_count (%d): it must be lower of the maximum query size (%d)", config.Assert.HitCount, elasticsearchQuerySize)
+	}
+
+	if config.Assert.MinCount > elasticsearchQuerySize {
+		return nil, fmt.Errorf("invalid value for assert.min_count (%d): it must be lower of the maximum query size (%d)", config.Assert.MinCount, elasticsearchQuerySize)
+	}
+
+	// (TODO in future) Optionally exercise service to generate load.
+	logger.Debugf("checking for expected data in data stream (%s)...", waitForDataTimeout)
+	var hits *hits
+	oldHits := 0
+	foundFields := map[string]any{}
+	passed, waitErr := wait.UntilTrue(ctx, func(ctx context.Context) (bool, error) {
+		var err error
+		hits, err = r.getDocs(ctx, dataStream)
+		if err != nil {
+			return false, err
+		}
+
+		defer func() {
+			oldHits = hits.size()
+		}()
+
+		if r.checkFailureStore {
+			failureStore, err := r.getFailureStoreDocs(ctx, dataStream)
+			if err != nil {
+				return false, fmt.Errorf("failed to check failure store: %w", err)
+			}
+			if n := len(failureStore); n > 0 {
+				// Interrupt loop earlier if there are failures in the document store.
+				logger.Debugf("Found %d hits in the failure store for %s", len(failureStore), dataStream)
+				return true, nil
+			}
+		}
+
+		assertHitCount := func() bool {
+			if config.Assert.HitCount == 0 {
+				// not enabled
+				return true
+			}
+			if hits.size() < config.Assert.HitCount {
+				return false
+			}
+
+			ret := hits.size() == oldHits
+			if !ret {
+				time.Sleep(4 * time.Second)
+			}
+
+			return ret
+		}()
+
+		assertFieldsPresent := func() bool {
+			if len(config.Assert.FieldsPresent) == 0 {
+				// not enabled
+				return true
+			}
+			if hits.size() == 0 {
+				// At least there should be one document ingested
+				return false
+			}
+			for _, f := range config.Assert.FieldsPresent {
+				if _, found := foundFields[f]; found {
+					continue
+				}
+				found := false
+				for _, d := range hits.Fields {
+					if _, err := d.GetValue(f); err == nil {
+						found = true
+						break
+					}
+				}
+				if !found {
+					return false
+				}
+				logger.Debugf("Found field %q in hits", f)
+				foundFields[f] = struct{}{}
+			}
+			return true
+		}()
+
+		assertMinCount := func() bool {
+			if config.Assert.MinCount > 0 {
+				return hits.size() >= config.Assert.MinCount
+			}
+			// By default at least one document
+			return hits.size() > 0
+		}()
+
+		return assertFieldsPresent && assertMinCount && assertHitCount, nil
+	}, 1*time.Second, waitForDataTimeout)
+
+	if waitErr != nil {
+		return nil, waitErr
+	}
+
+	if !passed {
+		return nil, testrunner.ErrTestCaseFailed{Reason: fmt.Sprintf("could not find the expected hits in %s data stream", dataStream)}
+	}
+
+	return hits, nil
+}
+
 func (r *tester) validateTestScenario(ctx context.Context, result *testrunner.ResultComposer, scenario *scenarioTest, config *testConfig) ([]testrunner.TestResult, error) {
 	if err := validateFailureStore(scenario.failureStore); err != nil {
 		return result.WithError(err)

diff --git a/test/packages/false_positives/failed_fields_present_assert.expected_errors b/test/packages/false_positives/failed_fields_present_assert.expected_errors
@@ -0,0 +1 @@
+<failure>test case failed: could not find the expected hits in logs-failed_fields_present_assert.test-[[:digit:]]+ data stream</failure>
diff --git a/test/packages/false_positives/failed_fields_present_assert/LICENSE.txt b/test/packages/false_positives/failed_fields_present_assert/LICENSE.txt
@@ -0,0 +1,93 @@
+Elastic License 2.0
+
+URL: https://www.elastic.co/licensing/elastic-license
+
+## Acceptance
+
+By using the software, you agree to all of the terms and conditions below.
+
+## Copyright License
+
+The licensor grants you a non-exclusive, royalty-free, worldwide,
+non-sublicensable, non-transferable license to use, copy, distribute, make
+available, and prepare derivative works of the software, in each case subject to
+the limitations and conditions below.
+
+## Limitations
+
+You may not provide the software to third parties as a hosted or managed
+service, where the service provides users with access to any substantial set of
+the features or functionality of the software.
+
+You may not move, change, disable, or circumvent the license key functionality
+in the software, and you may not remove or obscure any functionality in the
+software that is protected by the license key.
+
+You may not alter, remove, or obscure any licensing, copyright, or other notices
+of the licensor in the software. Any use of the licensor’s trademarks is subject
+to applicable law.
+
+## Patents
+
+The licensor grants you a license, under any patent claims the licensor can
+license, or becomes able to license, to make, have made, use, sell, offer for
+sale, import and have imported the software, in each case subject to the
+limitations and conditions in this license. This license does not cover any
+patent claims that you cause to be infringed by modifications or additions to
+the software. If you or your company make any written claim that the software
+infringes or contributes to infringement of any patent, your patent license for
+the software granted under these terms ends immediately. If your company makes
+such a claim, your patent license ends immediately for work on behalf of your
+company.
+
+## Notices
+
+You must ensure that anyone who gets a copy of any part of the software from you
+also gets a copy of these terms.
+
+If you modify the software, you must include in any modified copies of the
+software prominent notices stating that you have modified the software.
+
+## No Other Rights
+
+These terms do not imply any licenses other than those expressly granted in
+these terms.
+
+## Termination
+
+If you use the software in violation of these terms, such use is not licensed,
+and your licenses will automatically terminate. If the licensor provides you
+with a notice of your violation, and you cease all violation of this license no
+later than 30 days after you receive that notice, your licenses will be
+reinstated retroactively. However, if you violate these terms after such
+reinstatement, any additional violation of these terms will cause your licenses
+to terminate automatically and permanently.
+
+## No Liability
+
+*As far as the law allows, the software comes as is, without any warranty or
+condition, and the licensor will not be liable to you for any damages arising
+out of these terms or the use or nature of the software, under any kind of
+legal claim.*
+
+## Definitions
+
+The **licensor** is the entity offering these terms, and the **software** is the
+software the licensor makes available under these terms, including any portion
+of it.
+
+**you** refers to the individual or entity agreeing to these terms.
+
+**your company** is any legal entity, sole proprietorship, or other kind of
+organization that you work for, plus all organizations that have control over,
+are under the control of, or are under common control with that
+organization. **control** means ownership of substantially all the assets of an
+entity, or the power to direct its management and policies by vote, contract, or
+otherwise. Control can be direct or indirect.
+
+**your licenses** are all the licenses granted to you for the software under
+these terms.
+
+**use** means anything you do with the software requiring one of your licenses.
+
+**trademark** means trademarks, service marks, and similar rights.
diff --git a/test/packages/false_positives/failed_fields_present_assert/_dev/build/build.yml b/test/packages/false_positives/failed_fields_present_assert/_dev/build/build.yml
@@ -0,0 +1,3 @@
+dependencies:
+  ecs:
+    reference: [email protected]
diff --git a/test/packages/false_positives/failed_fields_present_assert/changelog.yml b/test/packages/false_positives/failed_fields_present_assert/changelog.yml
@@ -0,0 +1,6 @@
+# newer versions go on top
+- version: "0.0.1"
+  changes:
+    - description: Initial draft of the package
+      type: enhancement
+      link: https://github.com/elastic/integrations/pull/1 # FIXME Replace with the real PR link
diff --git a/...ed_fields_present_assert/data_stream/test/_dev/test/system/test-fields_present-config.yml b/...ed_fields_present_assert/data_stream/test/_dev/test/system/test-fields_present-config.yml
@@ -0,0 +1,33 @@
+vars: ~
+data_stream:
+  vars:
+    paths:
+      - "/custom/paths/logs.json"
+wait_for_data_timeout: 10s
+assert:
+  fields_present:
+    - target.file
+    - target.expected
+    - target.finish # this field is not present in the log file
+agent:
+  provisioning_script:
+    language: bash
+    contents: |
+      mkdir -p /custom/paths
+      cd /custom/paths
+      touch logs.json
+      # elastic-package just retrieves the 500 first documents in the search query
+      for i in $(seq 1 245) ; do
+        echo '{ "contents": "Message from file", "file": "logs.json"}'
+      done >> logs.json
+      echo '{ "contents": "Message from file", "file": "logs.json", "expected": "finish"}' >> logs.json
+      for i in $(seq 1 245); do
+        echo '{ "contents": "Message from file", "file": "logs.json"}'
+      done >> logs.json
+  pre_start_script:
+    language: sh
+    contents: |
+      export PATH=${PATH}:/custom/paths
+      mkdir -p /tmp/other/path
+      cd /tmp/other/path
+      echo "Pre-start: Current directory $(pwd)"
diff --git a/...false_positives/failed_fields_present_assert/data_stream/test/agent/stream/stream.yml.hbs b/...false_positives/failed_fields_present_assert/data_stream/test/agent/stream/stream.yml.hbs
@@ -0,0 +1,7 @@
+paths:
+{{#each paths as |path i|}}
+  - {{path}}
+{{/each}}
+exclude_files: [".gz$"]
+processors:
+  - add_locale: ~
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		<failure>test case failed: could not find the expected hits in logs-failed_fields_present_assert.test-[[:digit:]]+ data stream</failure>