Skip to content

Commit 068ca76

Browse files
authored
Account for simulated utilization threshold in WriteLoadConstraintDeciderIT (#133894)
1 parent 5a4c3ab commit 068ca76

File tree

3 files changed

+30
-13
lines changed

3 files changed

+30
-13
lines changed

muted-tests.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -519,9 +519,6 @@ tests:
519519
- class: org.elasticsearch.xpack.esql.action.RandomizedTimeSeriesIT
520520
method: testGroupBySubset
521521
issue: https://github.com/elastic/elasticsearch/issues/133220
522-
- class: org.elasticsearch.cluster.routing.allocation.decider.WriteLoadConstraintDeciderIT
523-
method: testHighNodeWriteLoadPreventsNewShardAllocation
524-
issue: https://github.com/elastic/elasticsearch/issues/133857
525522
- class: org.elasticsearch.xpack.kql.parser.KqlParserBooleanQueryTests
526523
method: testParseOrQuery
527524
issue: https://github.com/elastic/elasticsearch/issues/133863

server/src/internalClusterTest/java/org/elasticsearch/cluster/routing/allocation/decider/WriteLoadConstraintDeciderIT.java

Lines changed: 18 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,7 @@
5050

5151
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_REPLICAS;
5252
import static org.elasticsearch.cluster.metadata.IndexMetadata.SETTING_NUMBER_OF_SHARDS;
53+
import static org.elasticsearch.cluster.routing.ShardMovementWriteLoadSimulator.calculateUtilizationForWriteLoad;
5354

5455
@ESIntegTestCase.ClusterScope(scope = ESIntegTestCase.Scope.TEST, numDataNodes = 0)
5556
public class WriteLoadConstraintDeciderIT extends ESIntegTestCase {
@@ -68,6 +69,8 @@ protected Collection<Class<? extends Plugin>> getMockPlugins() {
6869
*/
6970
public void testHighNodeWriteLoadPreventsNewShardAllocation() {
7071
int randomUtilizationThresholdPercent = randomIntBetween(50, 100);
72+
int numberOfWritePoolThreads = randomIntBetween(2, 20);
73+
float shardWriteLoad = randomFloatBetween(0.0f, 0.01f, false);
7174
Settings settings = Settings.builder()
7275
.put(
7376
WriteLoadConstraintSettings.WRITE_LOAD_DECIDER_ENABLED_SETTING.getKey(),
@@ -115,7 +118,14 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() {
115118
);
116119

117120
String indexName = randomIdentifier();
118-
int randomNumberOfShards = randomIntBetween(15, 40); // Pick a high number of shards, so it is clear assignment is not accidental.
121+
int randomNumberOfShards = randomIntBetween(10, 20); // Pick a high number of shards, so it is clear assignment is not accidental.
122+
123+
// Calculate the maximum utilization a node can report while still being able to accept all relocating shards
124+
double additionalLoadFromAllShards = calculateUtilizationForWriteLoad(
125+
shardWriteLoad * randomNumberOfShards,
126+
numberOfWritePoolThreads
127+
);
128+
int maxUtilizationPercent = randomUtilizationThresholdPercent - (int) (additionalLoadFromAllShards * 100) - 1;
119129

120130
var verifyAssignmentToFirstNodeListener = ClusterServiceUtils.addMasterTemporaryStateListener(clusterState -> {
121131
var indexRoutingTable = clusterState.routingTable().index(indexName);
@@ -154,20 +164,20 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() {
154164
final DiscoveryNode thirdDiscoveryNode = getDiscoveryNode(thirdDataNodeName);
155165
final NodeUsageStatsForThreadPools firstNodeNonHotSpottingNodeStats = createNodeUsageStatsForThreadPools(
156166
firstDiscoveryNode,
157-
2,
158-
0.5f,
167+
numberOfWritePoolThreads,
168+
randomIntBetween(0, maxUtilizationPercent) / 100f,
159169
0
160170
);
161171
final NodeUsageStatsForThreadPools secondNodeNonHotSpottingNodeStats = createNodeUsageStatsForThreadPools(
162172
secondDiscoveryNode,
163-
2,
164-
0.5f,
173+
numberOfWritePoolThreads,
174+
randomIntBetween(0, maxUtilizationPercent) / 100f,
165175
0
166176
);
167177
final NodeUsageStatsForThreadPools thirdNodeHotSpottingNodeStats = createNodeUsageStatsForThreadPools(
168178
thirdDiscoveryNode,
169-
2,
170-
randomUtilizationThresholdPercent + 1 / 100,
179+
numberOfWritePoolThreads,
180+
(randomUtilizationThresholdPercent + 1) / 100f,
171181
0
172182
);
173183

@@ -197,12 +207,11 @@ public void testHighNodeWriteLoadPreventsNewShardAllocation() {
197207
.getMetadata()
198208
.getProject()
199209
.index(indexName);
200-
double shardWriteLoadDefault = 0.2;
201210
MockTransportService.getInstance(firstDataNodeName)
202211
.addRequestHandlingBehavior(IndicesStatsAction.NAME + "[n]", (handler, request, channel, task) -> {
203212
List<ShardStats> shardStats = new ArrayList<>(indexMetadata.getNumberOfShards());
204213
for (int i = 0; i < indexMetadata.getNumberOfShards(); i++) {
205-
shardStats.add(createShardStats(indexMetadata, i, shardWriteLoadDefault, firstDataNodeId));
214+
shardStats.add(createShardStats(indexMetadata, i, shardWriteLoad, firstDataNodeId));
206215
}
207216
TransportIndicesStatsAction instance = internalCluster().getInstance(TransportIndicesStatsAction.class, firstDataNodeName);
208217
channel.sendResponse(instance.new NodeResponse(firstDataNodeId, indexMetadata.getNumberOfShards(), shardStats, List.of()));

server/src/main/java/org/elasticsearch/cluster/routing/ShardMovementWriteLoadSimulator.java

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -124,10 +124,21 @@ public static float updateNodeUtilizationWithShardMovements(
124124
float shardWriteLoadDelta,
125125
int numberOfWriteThreads
126126
) {
127-
float newNodeUtilization = nodeUtilization + (shardWriteLoadDelta / numberOfWriteThreads);
127+
float newNodeUtilization = nodeUtilization + calculateUtilizationForWriteLoad(shardWriteLoadDelta, numberOfWriteThreads);
128128
return (float) Math.max(newNodeUtilization, 0.0);
129129
}
130130

131+
/**
132+
* Calculate what percentage utilization increase would result from adding some amount of write-load
133+
*
134+
* @param totalShardWriteLoad The write-load being added/removed
135+
* @param numberOfThreads The number of threads in the node-being-added-to's write thread pool
136+
* @return The change in percentage utilization
137+
*/
138+
public static float calculateUtilizationForWriteLoad(float totalShardWriteLoad, int numberOfThreads) {
139+
return totalShardWriteLoad / numberOfThreads;
140+
}
141+
131142
/**
132143
* Adjust the max thread pool queue latency by accounting for whether shard has moved away from the node.
133144
* @param maxThreadPoolQueueLatencyMillis The current max thread pool queue latency.

0 commit comments

Comments
 (0)