gtk96
diff --git a/‎flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/IntermediateResult.java
+25 b/‎flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/IntermediateResult.java
+25
diff --git a/‎flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/IntermediateResultInfo.java
+14-2 b/‎flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/IntermediateResultInfo.java
+14-2
diff --git a/‎flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/IntermediateResultPartition.java
+1-1 b/‎flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/IntermediateResultPartition.java
+1-1
diff --git a/‎flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/VertexInputInfoComputationUtils.java
+25-7 b/‎flink-runtime/src/main/java/org/apache/flink/runtime/executiongraph/VertexInputInfoComputationUtils.java
+25-7
diff --git a/‎flink-runtime/src/main/java/org/apache/flink/runtime/jobgraph/IntermediateDataSet.java
+12 b/‎flink-runtime/src/main/java/org/apache/flink/runtime/jobgraph/IntermediateDataSet.java
+12
diff --git a/‎flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adaptivebatch/AbstractBlockingResultInfo.java
+11-2 b/‎flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adaptivebatch/AbstractBlockingResultInfo.java
+11-2
diff --git a/‎flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adaptivebatch/AdaptiveBatchScheduler.java
+63-5 b/‎flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adaptivebatch/AdaptiveBatchScheduler.java
+63-5
diff --git a/‎flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adaptivebatch/AdaptiveExecutionHandlerFactory.java
+3-1 b/‎flink-runtime/src/main/java/org/apache/flink/runtime/scheduler/adaptivebatch/AdaptiveExecutionHandlerFactory.java
+3-1
@@ -63,6 +63,7 @@ public class IntermediateResult {
     private final int numParallelProducers;
 
     private final ExecutionPlanSchedulingContext executionPlanSchedulingContext;
+    private final boolean singleSubpartitionContainsAllData;
 
     private int partitionsAssigned;
 
@@ -102,6 +103,8 @@ public IntermediateResult(
         this.shuffleDescriptorCache = new HashMap<>();
 
         this.executionPlanSchedulingContext = checkNotNull(executionPlanSchedulingContext);
+
+        this.singleSubpartitionContainsAllData = intermediateDataSet.isBroadcast();
     }
 
     public boolean areAllConsumerVerticesCreated() {
@@ -199,6 +202,16 @@ public DistributionPattern getConsumingDistributionPattern() {
         return intermediateDataSet.getDistributionPattern();
     }
 
+    /**
+     * Determines whether the associated intermediate data set uses a broadcast distribution
+     * pattern.
+     *
+     * <p>A broadcast distribution pattern indicates that all data produced by this intermediate
+     * data set should be broadcast to every downstream consumer.
+     *
+     * @return true if the intermediate data set is using a broadcast distribution pattern; false
+     *     otherwise.
+     */
     public boolean isBroadcast() {
         return intermediateDataSet.isBroadcast();
     }
@@ -207,6 +220,18 @@ public boolean isForward() {
         return intermediateDataSet.isForward();
     }
 
+    /**
+     * Checks if a single subpartition contains all the produced data. This condition indicate that
+     * the data was intended to be broadcast to all consumers. If the decision to broadcast was made
+     * before the data production, this flag would likely be set accordingly. Conversely, if the
+     * broadcasting decision was made post-production, this flag will be false.
+     *
+     * @return true if a single subpartition contains all the data; false otherwise.
+     */
+    public boolean isSingleSubpartitionContainsAllData() {
+        return singleSubpartitionContainsAllData;
+    }
+
     public int getConnectionIndex() {
         return connectionIndex;
     }
 
@@ -29,9 +29,21 @@ public interface IntermediateResultInfo {
     IntermediateDataSetID getResultId();
 
     /**
-     * Whether it is a broadcast result.
+     * Checks whether there is a single subpartition that contains all the produced data.
      *
-     * @return whether it is a broadcast result
+     * @return true if one subpartition that contains all the data; false otherwise.
+     */
+    boolean isSingleSubpartitionContainsAllData();
+
+    /**
+     * Determines whether the associated intermediate data set uses a broadcast distribution
+     * pattern.
+     *
+     * <p>A broadcast distribution pattern indicates that all data produced by this intermediate
+     * data set should be broadcast to every downstream consumer.
+     *
+     * @return true if the intermediate data set is using a broadcast distribution pattern; false
+     *     otherwise.
      */
     boolean isBroadcast();
 
 
@@ -151,7 +151,7 @@ public int getNumberOfSubpartitions() {
     }
 
     private int computeNumberOfSubpartitionsForDynamicGraph() {
-        if (totalResult.isBroadcast() || totalResult.isForward()) {
+        if (totalResult.isSingleSubpartitionContainsAllData() || totalResult.isForward()) {
             // for dynamic graph and broadcast result, and forward result, we only produced one
             // subpartition, and all the downstream vertices should consume this subpartition.
             return 1;
 
@@ -84,7 +84,8 @@ public static Map<IntermediateDataSetID, JobVertexInputInfo> computeVertexInputI
                                 parallelism,
                                 input::getNumSubpartitions,
                                 isDynamicGraph,
-                                input.isBroadcast()));
+                                input.isBroadcast(),
+                                input.isSingleSubpartitionContainsAllData()));
             }
         }
 
@@ -124,6 +125,7 @@ static JobVertexInputInfo computeVertexInputInfoForPointwise(
                                 1,
                                 () -> numOfSubpartitionsRetriever.apply(start),
                                 isDynamicGraph,
+                                false,
                                 false);
                 executionVertexInputInfos.add(
                         new ExecutionVertexInputInfo(index, partitionRange, subpartitionRange));
@@ -145,6 +147,7 @@ static JobVertexInputInfo computeVertexInputInfoForPointwise(
                                     numConsumers,
                                     () -> numOfSubpartitionsRetriever.apply(finalPartitionNum),
                                     isDynamicGraph,
+                                    false,
                                     false);
                     executionVertexInputInfos.add(
                             new ExecutionVertexInputInfo(i, partitionRange, subpartitionRange));
@@ -165,14 +168,16 @@ static JobVertexInputInfo computeVertexInputInfoForPointwise(
      * @param numOfSubpartitionsRetriever a retriever to get the number of subpartitions
      * @param isDynamicGraph whether is dynamic graph
      * @param isBroadcast whether the edge is broadcast
+     * @param isSingleSubpartitionContainsAllData whether single subpartition contains all data
      * @return the computed {@link JobVertexInputInfo}
      */
     static JobVertexInputInfo computeVertexInputInfoForAllToAll(
             int sourceCount,
             int targetCount,
             Function<Integer, Integer> numOfSubpartitionsRetriever,
             boolean isDynamicGraph,
-            boolean isBroadcast) {
+            boolean isBroadcast,
+            boolean isSingleSubpartitionContainsAllData) {
         final List<ExecutionVertexInputInfo> executionVertexInputInfos = new ArrayList<>();
         IndexRange partitionRange = new IndexRange(0, sourceCount - 1);
         for (int i = 0; i < targetCount; ++i) {
@@ -182,7 +187,8 @@ static JobVertexInputInfo computeVertexInputInfoForAllToAll(
                             targetCount,
                             () -> numOfSubpartitionsRetriever.apply(0),
                             isDynamicGraph,
-                            isBroadcast);
+                            isBroadcast,
+                            isSingleSubpartitionContainsAllData);
             executionVertexInputInfos.add(
                     new ExecutionVertexInputInfo(i, partitionRange, subpartitionRange));
         }
@@ -199,6 +205,7 @@ static JobVertexInputInfo computeVertexInputInfoForAllToAll(
      * @param numOfSubpartitionsSupplier a supplier to get the number of subpartitions
      * @param isDynamicGraph whether is dynamic graph
      * @param isBroadcast whether the edge is broadcast
+     * @param isSingleSubpartitionContainsAllData whether single subpartition contains all data
      * @return the computed subpartition range
      */
     @VisibleForTesting
@@ -207,16 +214,22 @@ static IndexRange computeConsumedSubpartitionRange(
             int numConsumers,
             Supplier<Integer> numOfSubpartitionsSupplier,
             boolean isDynamicGraph,
-            boolean isBroadcast) {
+            boolean isBroadcast,
+            boolean isSingleSubpartitionContainsAllData) {
         int consumerIndex = consumerSubtaskIndex % numConsumers;
         if (!isDynamicGraph) {
             return new IndexRange(consumerIndex, consumerIndex);
         } else {
             int numSubpartitions = numOfSubpartitionsSupplier.get();
             if (isBroadcast) {
-                // broadcast results have only one subpartition, and be consumed multiple times.
-                checkArgument(numSubpartitions == 1);
-                return new IndexRange(0, 0);
+                if (isSingleSubpartitionContainsAllData) {
+                    // early decided broadcast results have only one subpartition, and be consumed
+                    // multiple times.
+                    checkArgument(numSubpartitions == 1);
+                    return new IndexRange(0, 0);
+                } else {
+                    return new IndexRange(0, numSubpartitions - 1);
+                }
             } else {
                 checkArgument(consumerIndex < numConsumers);
                 checkArgument(numConsumers <= numSubpartitions);
@@ -246,6 +259,11 @@ public boolean isBroadcast() {
             return intermediateResult.isBroadcast();
         }
 
+        @Override
+        public boolean isSingleSubpartitionContainsAllData() {
+            return intermediateResult.isSingleSubpartitionContainsAllData();
+        }
+
         @Override
         public boolean isPointwise() {
             return intermediateResult.getConsumingDistributionPattern()
 
@@ -134,6 +134,18 @@ public void configure(
         }
     }
 
+    public void updateOutputPattern(
+            DistributionPattern distributionPattern, boolean isBroadcast, boolean isForward) {
+        checkState(consumers.isEmpty(), "The output job edges have already been added.");
+        checkState(
+                numJobEdgesToCreate == 1,
+                "Modification is not allowed when the subscribing output is reused.");
+
+        this.distributionPattern = distributionPattern;
+        this.isBroadcast = isBroadcast;
+        this.isForward = isForward;
+    }
+
     public void increaseNumJobEdgesToCreate() {
         this.numJobEdgesToCreate++;
     }
 
@@ -22,6 +22,7 @@
 import org.apache.flink.runtime.executiongraph.ResultPartitionBytes;
 import org.apache.flink.runtime.jobgraph.IntermediateDataSetID;
 
+import java.util.Collections;
 import java.util.HashMap;
 import java.util.Map;
 
@@ -44,11 +45,14 @@ abstract class AbstractBlockingResultInfo implements BlockingResultInfo {
     protected final Map<Integer, long[]> subpartitionBytesByPartitionIndex;
 
     AbstractBlockingResultInfo(
-            IntermediateDataSetID resultId, int numOfPartitions, int numOfSubpartitions) {
+            IntermediateDataSetID resultId,
+            int numOfPartitions,
+            int numOfSubpartitions,
+            Map<Integer, long[]> subpartitionBytesByPartitionIndex) {
         this.resultId = checkNotNull(resultId);
         this.numOfPartitions = numOfPartitions;
         this.numOfSubpartitions = numOfSubpartitions;
-        this.subpartitionBytesByPartitionIndex = new HashMap<>();
+        this.subpartitionBytesByPartitionIndex = new HashMap<>(subpartitionBytesByPartitionIndex);
     }
 
     @Override
@@ -72,4 +76,9 @@ public void resetPartitionInfo(int partitionIndex) {
     int getNumOfRecordedPartitions() {
         return subpartitionBytesByPartitionIndex.size();
     }
+
+    @Override
+    public Map<Integer, long[]> getSubpartitionBytesByPartitionIndex() {
+        return Collections.unmodifiableMap(subpartitionBytesByPartitionIndex);
+    }
 }
@@ -274,9 +274,14 @@ public void onNewJobVerticesAdded(List<JobVertex> newVertices, int pendingOperat
         // 4. update json plan
         getExecutionGraph().setJsonPlan(JsonPlanGenerator.generatePlan(getJobGraph()));
 
-        // 5. try aggregate subpartition bytes
+        // 5. In broadcast join optimization, results might be written first with a hash
+        // method and then read with a broadcast method. Therefore, we need to update the
+        // result info:
+        // 1. Update the DistributionPattern to reflect the optimized data distribution.
+        // 2. Aggregate subpartition bytes when possible for efficiency.
         for (JobVertex newVertex : newVertices) {
             for (JobEdge input : newVertex.getInputs()) {
+                tryUpdateResultInfo(input.getSourceId(), input.getDistributionPattern());
                 Optional.ofNullable(blockingResultInfos.get(input.getSourceId()))
                         .ifPresent(this::maybeAggregateSubpartitionBytes);
             }
@@ -490,7 +495,8 @@ private void updateResultPartitionBytesMetrics(
                             result.getId(),
                             (ignored, resultInfo) -> {
                                 if (resultInfo == null) {
-                                    resultInfo = createFromIntermediateResult(result);
+                                    resultInfo =
+                                            createFromIntermediateResult(result, new HashMap<>());
                                 }
                                 resultInfo.recordPartitionInfo(
                                         partitionId.getPartitionNumber(), partitionBytes);
@@ -500,6 +506,16 @@ private void updateResultPartitionBytesMetrics(
                 });
     }
 
+    /**
+     * Aggregates subpartition bytes if all conditions are met. This method checks whether the
+     * result info instance is of type {@link AllToAllBlockingResultInfo}, whether all consumer
+     * vertices are created, and whether all consumer vertices are initialized. If these conditions
+     * are satisfied, the fine-grained statistic info will not be required by consumer vertices, and
+     * then we could aggregate the subpartition bytes.
+     *
+     * @param resultInfo the BlockingResultInfo instance to potentially aggregate subpartition bytes
+     *     for.
+     */
     private void maybeAggregateSubpartitionBytes(BlockingResultInfo resultInfo) {
         IntermediateResult intermediateResult =
                 getExecutionGraph().getAllIntermediateResults().get(resultInfo.getResultId());
@@ -937,21 +953,24 @@ private static void resetDynamicParallelism(Iterable<JobVertex> vertices) {
         }
     }
 
-    private static BlockingResultInfo createFromIntermediateResult(IntermediateResult result) {
+    private static BlockingResultInfo createFromIntermediateResult(
+            IntermediateResult result, Map<Integer, long[]> subpartitionBytesByPartitionIndex) {
         checkArgument(result != null);
         // Note that for dynamic graph, different partitions in the same result have the same number
         // of subpartitions.
         if (result.getConsumingDistributionPattern() == DistributionPattern.POINTWISE) {
             return new PointwiseBlockingResultInfo(
                     result.getId(),
                     result.getNumberOfAssignedPartitions(),
-                    result.getPartitions()[0].getNumberOfSubpartitions());
+                    result.getPartitions()[0].getNumberOfSubpartitions(),
+                    subpartitionBytesByPartitionIndex);
         } else {
             return new AllToAllBlockingResultInfo(
                     result.getId(),
                     result.getNumberOfAssignedPartitions(),
                     result.getPartitions()[0].getNumberOfSubpartitions(),
-                    result.isBroadcast());
+                    result.isSingleSubpartitionContainsAllData(),
+                    subpartitionBytesByPartitionIndex);
         }
     }
 
@@ -965,6 +984,45 @@ SpeculativeExecutionHandler getSpeculativeExecutionHandler() {
         return speculativeExecutionHandler;
     }
 
+    /**
+     * Tries to update the result information for a given IntermediateDataSetID according to the
+     * specified DistributionPattern. This ensures consistency between the distribution pattern and
+     * the stored result information.
+     *
+     * <p>The result information is updated under the following conditions:
+     *
+     * <ul>
+     *   <li>If the target pattern is ALL_TO_ALL and the current result info is POINTWISE, a new
+     *       BlockingResultInfo is created and stored.
+     *   <li>If the target pattern is POINTWISE and the current result info is ALL_TO_ALL, a
+     *       conversion is similarly triggered.
+     *   <li>Additionally, for ALL_TO_ALL patterns, the status of broadcast of the result info
+     *       should be updated.
+     * </ul>
+     *
+     * @param id The ID of the intermediate dataset to update.
+     * @param targetPattern The target distribution pattern to apply.
+     */
+    private void tryUpdateResultInfo(IntermediateDataSetID id, DistributionPattern targetPattern) {
+        if (blockingResultInfos.containsKey(id)) {
+            BlockingResultInfo resultInfo = blockingResultInfos.get(id);
+            IntermediateResult result = getExecutionGraph().getAllIntermediateResults().get(id);
+
+            if ((targetPattern == DistributionPattern.ALL_TO_ALL && resultInfo.isPointwise())
+                    || (targetPattern == DistributionPattern.POINTWISE
+                            && !resultInfo.isPointwise())) {
+
+                BlockingResultInfo newInfo =
+                        createFromIntermediateResult(
+                                result, resultInfo.getSubpartitionBytesByPartitionIndex());
+
+                blockingResultInfos.put(id, newInfo);
+            } else if (resultInfo instanceof AllToAllBlockingResultInfo) {
+                ((AllToAllBlockingResultInfo) resultInfo).setBroadcast(result.isBroadcast());
+            }
+        }
+    }
+
     private class DefaultBatchJobRecoveryContext implements BatchJobRecoveryContext {
 
         private final FailoverStrategy restartStrategyOnResultConsumable =
 
@@ -21,6 +21,7 @@
 import org.apache.flink.runtime.jobgraph.JobGraph;
 import org.apache.flink.streaming.api.graph.ExecutionPlan;
 import org.apache.flink.streaming.api.graph.StreamGraph;
+import org.apache.flink.util.DynamicCodeLoadingException;
 
 import java.util.concurrent.Executor;
 
@@ -46,7 +47,8 @@ public class AdaptiveExecutionHandlerFactory {
     public static AdaptiveExecutionHandler create(
             ExecutionPlan executionPlan,
             ClassLoader userClassLoader,
-            Executor serializationExecutor) {
+            Executor serializationExecutor)
+            throws DynamicCodeLoadingException {
         if (executionPlan instanceof JobGraph) {
             return new NonAdaptiveExecutionHandler((JobGraph) executionPlan);
         } else {
Original file line number	Diff line number	Diff line change
`@@ -151,7 +151,7 @@ public int getNumberOfSubpartitions() {`
`151`	`151`	`}`
`152`	`152`
`153`	`153`	`private int computeNumberOfSubpartitionsForDynamicGraph() {`
`154`		`- if (totalResult.isBroadcast() \|\| totalResult.isForward()) {`
	`154`	`+ if (totalResult.isSingleSubpartitionContainsAllData() \|\| totalResult.isForward()) {`
`155`	`155`	`// for dynamic graph and broadcast result, and forward result, we only produced one`
`156`	`156`	`// subpartition, and all the downstream vertices should consume this subpartition.`
`157`	`157`	`return 1;`
Original file line number	Diff line number	Diff line change
`@@ -134,6 +134,18 @@ public void configure(`
`134`	`134`	`}`
`135`	`135`	`}`
`136`	`136`
	`137`	`+ public void updateOutputPattern(`
	`138`	`+ DistributionPattern distributionPattern, boolean isBroadcast, boolean isForward) {`
	`139`	`+ checkState(consumers.isEmpty(), "The output job edges have already been added.");`
	`140`	`+ checkState(`
	`141`	`+ numJobEdgesToCreate == 1,`
	`142`	`+ "Modification is not allowed when the subscribing output is reused.");`
	`143`	`+`
	`144`	`+ this.distributionPattern = distributionPattern;`
	`145`	`+ this.isBroadcast = isBroadcast;`
	`146`	`+ this.isForward = isForward;`
	`147`	`+ }`
	`148`	`+`
`137`	`149`	`public void increaseNumJobEdgesToCreate() {`
`138`	`150`	`this.numJobEdgesToCreate++;`
`139`	`151`	`}`