29
29
#include " runtime/data_stream_mgr.h"
30
30
#include " runtime/descriptors.h"
31
31
#include " runtime/exec_env.h"
32
+ #include " runtime/local_pass_through_buffer.h"
32
33
#include " runtime/runtime_state.h"
33
34
#include " serde/compress_strategy.h"
34
35
#include " serde/protobuf_serde.h"
@@ -46,13 +47,15 @@ class ExchangeSinkOperator::Channel {
46
47
// how much tuple data is getting accumulated before being sent; it only applies
47
48
// when data is added via add_row() and not sent directly via send_batch().
48
49
Channel (ExchangeSinkOperator* parent, const TNetworkAddress& brpc_dest, const TUniqueId& fragment_instance_id,
49
- PlanNodeId dest_node_id, int32_t num_shuffles, bool enable_exchange_pass_through, bool enable_exchange_perf)
50
+ PlanNodeId dest_node_id, int32_t num_shuffles, bool enable_exchange_pass_through, bool enable_exchange_perf,
51
+ PassThroughChunkBuffer* pass_through_chunk_buffer)
50
52
: _parent(parent),
51
53
_brpc_dest_addr (brpc_dest),
52
54
_fragment_instance_id(fragment_instance_id),
53
55
_dest_node_id(dest_node_id),
54
56
_enable_exchange_pass_through(enable_exchange_pass_through),
55
57
_enable_exchange_perf(enable_exchange_perf),
58
+ _pass_through_context(pass_through_chunk_buffer, fragment_instance_id, dest_node_id),
56
59
_chunks(num_shuffles) {}
57
60
58
61
// Initialize channel.
@@ -114,6 +117,7 @@ class ExchangeSinkOperator::Channel {
114
117
// enable it to profile exchange's performance, which ignores computing local data for exchange_speed/_bytes,
115
118
// because local data isn't accessed by remote network.
116
119
const bool _enable_exchange_perf;
120
+ PassThroughContext _pass_through_context;
117
121
118
122
bool _is_first_chunk = true ;
119
123
std::shared_ptr<PInternalService_RecoverableStub> _brpc_stub = nullptr ;
@@ -123,8 +127,6 @@ class ExchangeSinkOperator::Channel {
123
127
// If pipeline level shuffle is disable, the size of _chunks
124
128
// always be 1
125
129
std::vector<std::unique_ptr<Chunk>> _chunks;
126
- ChunkPassThroughVectorPtr _pass_through_chunks;
127
- int64_t _pass_through_physical_bytes = 0 ;
128
130
PTransmitChunkParamsPtr _chunk_request;
129
131
size_t _current_request_bytes = 0 ;
130
132
@@ -152,6 +154,7 @@ bool ExchangeSinkOperator::Channel::_check_use_pass_through() {
152
154
}
153
155
154
156
void ExchangeSinkOperator::Channel::_prepare_pass_through () {
157
+ _pass_through_context.init ();
155
158
_use_pass_through = _check_use_pass_through ();
156
159
}
157
160
@@ -226,8 +229,6 @@ Status ExchangeSinkOperator::Channel::send_one_chunk(RuntimeState* state, const
226
229
_chunk_request->set_node_id (_dest_node_id);
227
230
_chunk_request->set_sender_id (_parent->_sender_id );
228
231
_chunk_request->set_be_number (_parent->_be_number );
229
- _pass_through_chunks = std::make_unique<ChunkPassThroughVector>();
230
- _pass_through_physical_bytes = 0 ;
231
232
if (_parent->_is_pipeline_level_shuffle ) {
232
233
_chunk_request->set_is_pipeline_level_shuffle (true );
233
234
}
@@ -236,19 +237,18 @@ Status ExchangeSinkOperator::Channel::send_one_chunk(RuntimeState* state, const
236
237
// If chunk is not null, append it to request
237
238
if (chunk != nullptr ) {
238
239
if (_use_pass_through) {
239
- int64_t before_bytes = CurrentThread::current ().get_consumed_bytes ();
240
- auto clone = chunk->clone_unique ();
241
- int64_t physical_bytes = CurrentThread::current ().get_consumed_bytes () - before_bytes;
242
- _pass_through_physical_bytes += physical_bytes;
243
240
size_t chunk_size = serde::ProtobufChunkSerde::max_serialized_size (*chunk);
244
- _pass_through_chunks->emplace_back (std::move (clone), driver_sequence, chunk_size, physical_bytes);
245
- COUNTER_UPDATE (_parent->_bytes_pass_through_counter , chunk_size);
241
+ // -1 means disable pipeline level shuffle
242
+ TRY_CATCH_BAD_ALLOC (
243
+ _pass_through_context.append_chunk (_parent->_sender_id , chunk, chunk_size,
244
+ _parent->_is_pipeline_level_shuffle ? driver_sequence : -1 ));
246
245
_current_request_bytes += chunk_size;
246
+ COUNTER_UPDATE (_parent->_bytes_pass_through_counter , chunk_size);
247
+ COUNTER_SET (_parent->_pass_through_buffer_peak_mem_usage , _pass_through_context.total_bytes ());
247
248
} else {
248
249
if (_parent->_is_pipeline_level_shuffle ) {
249
250
_chunk_request->add_driver_sequences (driver_sequence);
250
251
}
251
-
252
252
auto pchunk = _chunk_request->add_chunks ();
253
253
TRY_CATCH_BAD_ALLOC (RETURN_IF_ERROR (_parent->serialize_chunk (chunk, pchunk, &_is_first_chunk)));
254
254
_current_request_bytes += pchunk->data ().size ();
@@ -261,21 +261,12 @@ Status ExchangeSinkOperator::Channel::send_one_chunk(RuntimeState* state, const
261
261
_chunk_request->set_eos (eos);
262
262
_chunk_request->set_use_pass_through (_use_pass_through);
263
263
butil::IOBuf attachment;
264
- int64_t physical_bytes = _use_pass_through ? _pass_through_physical_bytes
265
- : _parent->construct_brpc_attachment (_chunk_request, attachment);
266
- TransmitChunkInfo info = {this ->_fragment_instance_id ,
267
- _brpc_stub,
268
- std::move (_chunk_request),
269
- std::move (_pass_through_chunks),
270
- state->exec_env ()->stream_mgr (),
271
- attachment,
272
- physical_bytes,
273
- _brpc_dest_addr};
264
+ int64_t attachment_physical_bytes = _parent->construct_brpc_attachment (_chunk_request, attachment);
265
+ TransmitChunkInfo info = {this ->_fragment_instance_id , _brpc_stub, std::move (_chunk_request), attachment,
266
+ attachment_physical_bytes, _brpc_dest_addr};
274
267
RETURN_IF_ERROR (_parent->_buffer ->add_request (info));
275
268
_current_request_bytes = 0 ;
276
269
_chunk_request.reset ();
277
- _pass_through_chunks = std::make_unique<ChunkPassThroughVector>();
278
- _pass_through_physical_bytes = 0 ;
279
270
*is_real_sent = true ;
280
271
}
281
272
@@ -293,8 +284,8 @@ Status ExchangeSinkOperator::Channel::send_chunk_request(RuntimeState* state, PT
293
284
chunk_request->set_be_number (_parent->_be_number );
294
285
chunk_request->set_eos (false );
295
286
chunk_request->set_use_pass_through (_use_pass_through);
296
- TransmitChunkInfo info = {this ->_fragment_instance_id , _brpc_stub, std::move (chunk_request), nullptr ,
297
- state-> exec_env ()-> stream_mgr (), attachment, attachment_physical_bytes, _brpc_dest_addr};
287
+ TransmitChunkInfo info = {this ->_fragment_instance_id , _brpc_stub, std::move (chunk_request), attachment ,
288
+ attachment_physical_bytes, _brpc_dest_addr};
298
289
RETURN_IF_ERROR (_parent->_buffer ->add_request (info));
299
290
300
291
return Status::OK ();
@@ -351,6 +342,10 @@ ExchangeSinkOperator::ExchangeSinkOperator(
351
342
_output_columns(output_columns),
352
343
_num_sinkers(num_sinkers) {
353
344
std::map<int64_t , int64_t > fragment_id_to_channel_index;
345
+ RuntimeState* state = fragment_ctx->runtime_state ();
346
+
347
+ PassThroughChunkBuffer* pass_through_chunk_buffer =
348
+ state->exec_env ()->stream_mgr ()->get_pass_through_chunk_buffer (state->query_id ());
354
349
355
350
_channels.reserve (destinations.size ());
356
351
std::vector<int > driver_sequence_per_channel (destinations.size (), 0 );
@@ -364,7 +359,7 @@ ExchangeSinkOperator::ExchangeSinkOperator(
364
359
} else {
365
360
std::unique_ptr<Channel> channel = std::make_unique<Channel>(
366
361
this , destination.brpc_server , fragment_instance_id, dest_node_id, _num_shuffles_per_channel,
367
- enable_exchange_pass_through, enable_exchange_perf);
362
+ enable_exchange_pass_through, enable_exchange_perf, pass_through_chunk_buffer );
368
363
_channels.emplace_back (channel.get ());
369
364
_instance_id2channel.emplace (fragment_instance_id.lo , std::move (channel));
370
365
}
@@ -460,6 +455,9 @@ Status ExchangeSinkOperator::prepare(RuntimeState* state) {
460
455
_shuffle_chunk_append_counter = ADD_COUNTER (_unique_metrics, " ShuffleChunkAppendCounter" , TUnit::UNIT);
461
456
_shuffle_chunk_append_timer = ADD_TIMER (_unique_metrics, " ShuffleChunkAppendTime" );
462
457
_compress_timer = ADD_TIMER (_unique_metrics, " CompressTime" );
458
+ _pass_through_buffer_peak_mem_usage = _unique_metrics->AddHighWaterMarkCounter (
459
+ " PassThroughBufferPeakMemoryUsage" , TUnit::BYTES,
460
+ RuntimeProfile::Counter::create_strategy (TUnit::BYTES, TCounterMergeType::SKIP_FIRST_MERGE));
463
461
464
462
for (auto & [_, channel] : _instance_id2channel) {
465
463
RETURN_IF_ERROR (channel->init (state));
@@ -651,10 +649,8 @@ Status ExchangeSinkOperator::set_finishing(RuntimeState* state) {
651
649
butil::IOBuf attachment;
652
650
const int64_t attachment_physical_bytes = construct_brpc_attachment (_chunk_request, attachment);
653
651
for (const auto & [_, channel] : _instance_id2channel) {
654
- if (!channel->use_pass_through ()) {
655
- PTransmitChunkParamsPtr copy = std::make_shared<PTransmitChunkParams>(*_chunk_request);
656
- RETURN_IF_ERROR (channel->send_chunk_request (state, copy, attachment, attachment_physical_bytes));
657
- }
652
+ PTransmitChunkParamsPtr copy = std::make_shared<PTransmitChunkParams>(*_chunk_request);
653
+ RETURN_IF_ERROR (channel->send_chunk_request (state, copy, attachment, attachment_physical_bytes));
658
654
}
659
655
_current_request_bytes = 0 ;
660
656
_chunk_request.reset ();
0 commit comments