<?xml version="1.0" encoding="utf-8" standalone="yes"?><rss version="2.0" xmlns:atom="http://www.w3.org/2005/Atom" xmlns:content="http://purl.org/rss/1.0/modules/content/"><channel><title>Scala on Ralph's Blog</title><link>https://pothos.dpdns.org/tags/scala/</link><description>Recent content in Scala on Ralph's Blog</description><generator>Hugo -- 0.147.7</generator><language>zh-cn</language><lastBuildDate>Thu, 25 Dec 2025 00:00:00 +0000</lastBuildDate><atom:link href="https://pothos.dpdns.org/tags/scala/index.xml" rel="self" type="application/rss+xml"/><item><title>12.1 Spark 源码解析</title><link>https://pothos.dpdns.org/posts/12.1-spark-%E6%BA%90%E7%A0%81%E8%A7%A3%E6%9E%90/</link><pubDate>Wed, 24 Dec 2025 00:00:00 +0000</pubDate><guid>https://pothos.dpdns.org/posts/12.1-spark-%E6%BA%90%E7%A0%81%E8%A7%A3%E6%9E%90/</guid><description>&lt;h1 id="121-spark源码解析">12.1 Spark源码解析&lt;/h1>
&lt;h2 id="目录">目录&lt;/h2>
&lt;details>
&lt;summary>点击展开目录&lt;/summary>
&lt;h3 id="一spark核心架构与初始化">一、Spark核心架构与初始化&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#11-sparkcontext%E5%88%9D%E5%A7%8B%E5%8C%96%E6%B5%81%E7%A8%8B">1.1 SparkContext初始化流程&lt;/a>&lt;/li>
&lt;li>&lt;a href="#12-%E8%BF%90%E8%A1%8C%E7%8E%AF%E5%A2%83%E6%9E%84%E5%BB%BA">1.2 运行环境构建&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="二rdd设计与实现">二、RDD设计与实现&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#21-rdd%E6%A0%B8%E5%BF%83%E6%8A%BD%E8%B1%A1">2.1 RDD核心抽象&lt;/a>&lt;/li>
&lt;li>&lt;a href="#22-rdd%E4%BA%94%E5%A4%A7%E7%89%B9%E6%80%A7">2.2 RDD五大特性&lt;/a>&lt;/li>
&lt;li>&lt;a href="#23-rdd%E6%93%8D%E4%BD%9C%E6%89%A7%E8%A1%8C">2.3 RDD操作执行&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="三任务调度系统">三、任务调度系统&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#31-dagscheduler%E8%B0%83%E5%BA%A6%E5%99%A8">3.1 DAGScheduler调度器&lt;/a>&lt;/li>
&lt;li>&lt;a href="#32-stage%E5%88%92%E5%88%86%E7%AE%97%E6%B3%95">3.2 Stage划分算法&lt;/a>&lt;/li>
&lt;li>&lt;a href="#33-taskscheduler%E4%BB%BB%E5%8A%A1%E8%B0%83%E5%BA%A6">3.3 TaskScheduler任务调度&lt;/a>&lt;/li>
&lt;li>&lt;a href="#34-task%E6%89%A7%E8%A1%8C%E6%9C%BA%E5%88%B6">3.4 Task执行机制&lt;/a>&lt;/li>
&lt;li>&lt;a href="#35-%E4%BB%BB%E5%8A%A1%E5%88%86%E5%8F%91%E4%B8%8E%E8%B0%83%E5%BA%A6%E6%B5%81%E7%A8%8B">3.5 任务分发与调度流程&lt;/a>&lt;/li>
&lt;li>&lt;a href="#36-%E5%AE%B9%E9%94%99%E4%B8%8E%E7%9B%91%E6%8E%A7%E6%9C%BA%E5%88%B6">3.6 容错与监控机制&lt;/a>&lt;/li>
&lt;li>&lt;a href="#37-%E5%A4%B1%E8%B4%A5%E9%87%8D%E8%AF%95%E6%9C%BA%E5%88%B6">3.7 失败重试机制&lt;/a>&lt;/li>
&lt;li>&lt;a href="#38-rdd%E8%A1%80%E7%BB%9F%E6%81%A2%E5%A4%8D">3.8 RDD血统恢复&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="四内存管理系统">四、内存管理系统&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#41-%E7%BB%9F%E4%B8%80%E5%86%85%E5%AD%98%E7%AE%A1%E7%90%86">4.1 统一内存管理&lt;/a>&lt;/li>
&lt;li>&lt;a href="#42-%E7%AE%97%E5%AD%90%E5%86%85%E5%AD%98%E5%AD%98%E5%82%A8">4.2 算子内存存储&lt;/a>&lt;/li>
&lt;li>&lt;a href="#43-%E5%86%85%E5%AD%98%E7%9B%91%E6%8E%A7%E4%B8%8E%E4%BC%98%E5%8C%96">4.3 内存监控与优化&lt;/a>&lt;/li>
&lt;li>&lt;a href="#44-%E5%86%85%E5%AD%98%E7%AE%A1%E7%90%86%E7%B3%BB%E7%BB%9F%E9%AB%98%E7%BA%A7%E7%89%B9%E6%80%A7">4.4 内存管理系统（高级特性）&lt;/a>&lt;/li>
&lt;li>&lt;a href="#45-%E7%BB%9F%E4%B8%80%E5%86%85%E5%AD%98%E7%AE%A1%E7%90%86%E8%AF%A6%E7%BB%86%E5%AE%9E%E7%8E%B0">4.5 统一内存管理（详细实现）&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="五shuffle机制实现">五、Shuffle机制实现&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#51-sort-shuffle%E6%A0%B8%E5%BF%83">5.1 Sort Shuffle核心&lt;/a>&lt;/li>
&lt;li>&lt;a href="#52-unsafeshufflewriter">5.2 UnsafeShuffleWriter&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="六存储系统设计">六、存储系统设计&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#61-blockmanager%E5%AD%98%E5%82%A8">6.1 BlockManager存储&lt;/a>&lt;/li>
&lt;li>&lt;a href="#62-%E7%BC%93%E5%AD%98%E6%9C%BA%E5%88%B6">6.2 缓存机制&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="七网络通信系统">七、网络通信系统&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#71-%E7%BD%91%E7%BB%9C%E4%BC%A0%E8%BE%93%E6%9C%8D%E5%8A%A1">7.1 网络传输服务&lt;/a>&lt;/li>
&lt;li>&lt;a href="#72-block%E4%BC%A0%E8%BE%93%E6%9C%BA%E5%88%B6">7.2 Block传输机制&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="八动态资源分配">八、动态资源分配&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#81-%E8%B5%84%E6%BA%90%E5%88%86%E9%85%8D%E7%AD%96%E7%95%A5">8.1 资源分配策略&lt;/a>&lt;/li>
&lt;li>&lt;a href="#82-%E5%8A%A8%E6%80%81%E4%BC%B8%E7%BC%A9%E7%AE%97%E6%B3%95">8.2 动态伸缩算法&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="九spark-sql执行引擎">九、Spark SQL执行引擎&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#91-catalyst%E4%BC%98%E5%8C%96%E5%99%A8%E6%A0%B8%E5%BF%83">9.1 Catalyst优化器核心&lt;/a>&lt;/li>
&lt;li>&lt;a href="#92-%E4%BB%A3%E7%A0%81%E7%94%9F%E6%88%90%E4%B8%8E%E6%89%A7%E8%A1%8C">9.2 代码生成与执行&lt;/a>&lt;/li>
&lt;li>&lt;a href="#93-%E5%88%97%E5%BC%8F%E5%AD%98%E5%82%A8%E4%B8%8E%E5%90%91%E9%87%8F%E5%8C%96">9.3 列式存储与向量化&lt;/a>&lt;/li>
&lt;li>&lt;a href="#94-%E8%87%AA%E9%80%82%E5%BA%94%E6%9F%A5%E8%AF%A2%E6%89%A7%E8%A1%8Caqe">9.4 自适应查询执行(AQE)&lt;/a>&lt;/li>
&lt;li>&lt;a href="#95-%E7%AA%97%E5%8F%A3%E5%87%BD%E6%95%B0%E5%AE%9E%E7%8E%B0%E5%8E%9F%E7%90%86%E4%BB%A5-lag-%E4%B8%BA%E4%BE%8B">9.5 窗口函数实现原理（以 Lag 为例）&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="十广播变量与累加器">十、广播变量与累加器&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#101-%E5%B9%BF%E6%92%AD%E5%8F%98%E9%87%8F%E5%AE%9E%E7%8E%B0%E6%9C%BA%E5%88%B6">10.1 广播变量实现机制&lt;/a>&lt;/li>
&lt;li>&lt;a href="#102-%E7%B4%AF%E5%8A%A0%E5%99%A8%E6%BA%90%E7%A0%81%E5%88%86%E6%9E%90">10.2 累加器源码分析&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="十一检查点与容错机制">十一、检查点与容错机制&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#111-%E6%A3%80%E6%9F%A5%E7%82%B9%E6%9C%BA%E5%88%B6%E5%AE%9E%E7%8E%B0">11.1 检查点机制实现&lt;/a>&lt;/li>
&lt;li>&lt;a href="#112-%E5%A4%B1%E8%B4%A5%E9%87%8D%E8%AF%95%E4%B8%8E%E8%A1%80%E7%BB%9F%E6%81%A2%E5%A4%8D">11.2 失败重试与血统恢复&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="十二集群管理器集成">十二、集群管理器集成&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#121-yarn%E9%9B%86%E6%88%90%E6%BA%90%E7%A0%81">12.1 YARN集成源码&lt;/a>&lt;/li>
&lt;li>&lt;a href="#122-kubernetes%E9%9B%86%E6%88%90">12.2 Kubernetes集成&lt;/a>&lt;/li>
&lt;/ul>
&lt;hr>
&lt;/details>
&lt;h2 id="一spark核心架构与初始化-1">一、Spark核心架构与初始化&lt;/h2>
&lt;h3 id="11-sparkcontext初始化流程">1.1 SparkContext初始化流程&lt;/h3>
&lt;h4 id="sparkcontext初始化流程图">SparkContext初始化流程图&lt;/h4>
&lt;div class="mermaid">graph TD
A[SparkContext构造] --> B[创建SparkConf配置]
B --> C[创建SparkEnv运行环境]
C --> D[创建StatusTracker状态跟踪器]
D --> E[创建TaskScheduler任务调度器]
E --> F[创建DAGScheduler DAG调度器]
F --> G[启动TaskScheduler]
G --> H[设置默认并行度]
H --> I[SparkContext初始化完成]
C --> C1[创建SerializerManager]
C --> C2[创建BlockManager]
C --> C3[创建MemoryManager]
C --> C4[创建MetricsSystem]
E --> E1[根据master创建调度器]
E1 --> E2[Standalone模式]
E1 --> E3[YARN模式]
E1 --> E4[Local模式]
style A fill:#e1f5fe
style I fill:#e8f5e8
style C fill:#fff3e0
style F fill:#f3e5f5
&lt;/div>
&lt;h3 id="12-运行环境构建">1.2 运行环境构建&lt;/h3>
&lt;h4 id="sparkcontext初始化源码分析">SparkContext初始化源码分析&lt;/h4>
&lt;div class="highlight">&lt;pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;">&lt;code class="language-scala" data-lang="scala">&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">// SparkContext.scala 核心初始化流程
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span>&lt;span style="color:#66d9ef">class&lt;/span> &lt;span style="color:#a6e22e">SparkContext&lt;/span>&lt;span style="color:#f92672">(&lt;/span>config&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">SparkConf&lt;/span>&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">extends&lt;/span> &lt;span style="color:#a6e22e">Logging&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 1. 创建SparkEnv - 核心运行环境
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">val&lt;/span> env&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">SparkEnv&lt;/span> &lt;span style="color:#f92672">=&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#a6e22e">SparkEnv&lt;/span>&lt;span style="color:#f92672">.&lt;/span>createDriverEnv&lt;span style="color:#f92672">(&lt;/span>conf&lt;span style="color:#f92672">,&lt;/span> isLocal&lt;span style="color:#f92672">,&lt;/span> listenerBus&lt;span style="color:#f92672">,&lt;/span> numCores&lt;span style="color:#f92672">,&lt;/span> mockOutputCommitCoordinator&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 2. 创建状态跟踪器
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">val&lt;/span> statusTracker &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#66d9ef">new&lt;/span> &lt;span style="color:#a6e22e">SparkStatusTracker&lt;/span>&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">this&lt;/span>&lt;span style="color:#f92672">,&lt;/span> sparkUI&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 3. 创建任务调度器
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">val&lt;/span> &lt;span style="color:#f92672">(&lt;/span>sched&lt;span style="color:#f92672">,&lt;/span> ts&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#a6e22e">SparkContext&lt;/span>&lt;span style="color:#f92672">.&lt;/span>createTaskScheduler&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">this&lt;/span>&lt;span style="color:#f92672">,&lt;/span> master&lt;span style="color:#f92672">,&lt;/span> deployMode&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">val&lt;/span> taskScheduler &lt;span style="color:#66d9ef">=&lt;/span> ts
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 4. 创建DAG调度器
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">val&lt;/span> dagScheduler &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#66d9ef">new&lt;/span> &lt;span style="color:#a6e22e">DAGScheduler&lt;/span>&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">this&lt;/span>&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 5. 启动任务调度器
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> taskScheduler&lt;span style="color:#f92672">.&lt;/span>start&lt;span style="color:#f92672">()&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 6. 设置默认并行度
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">val&lt;/span> defaultParallelism&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Int&lt;/span> &lt;span style="color:#f92672">=&lt;/span> taskScheduler&lt;span style="color:#f92672">.&lt;/span>defaultParallelism
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 核心方法：创建RDD
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> parallelize&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T:&lt;/span> &lt;span style="color:#66d9ef">ClassTag&lt;/span>&lt;span style="color:#f92672">](&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> seq&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Seq&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">],&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> numSlices&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Int&lt;/span> &lt;span style="color:#f92672">=&lt;/span> defaultParallelism&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">RDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">]&lt;/span> &lt;span style="color:#66d9ef">=&lt;/span> withScope &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">new&lt;/span> &lt;span style="color:#a6e22e">ParallelCollectionRDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">](&lt;/span>&lt;span style="color:#66d9ef">this&lt;/span>&lt;span style="color:#f92672">,&lt;/span> seq&lt;span style="color:#f92672">,&lt;/span> numSlices&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#a6e22e">Map&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">Int&lt;/span>, &lt;span style="color:#66d9ef">Seq&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">String&lt;/span>&lt;span style="color:#f92672">]]())&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 核心方法：提交作业
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> runJob&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>, &lt;span style="color:#66d9ef">U:&lt;/span> &lt;span style="color:#66d9ef">ClassTag&lt;/span>&lt;span style="color:#f92672">](&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> rdd&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">RDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">],&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> func&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">TaskContext&lt;/span>&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#66d9ef">Iterator&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">])&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span> U&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> partitions&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Seq&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">Int&lt;/span>&lt;span style="color:#f92672">],&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> resultHandler&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">Int&lt;/span>&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#66d9ef">U&lt;/span>&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span> &lt;span style="color:#a6e22e">Unit&lt;/span>&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Unit&lt;/span> &lt;span style="color:#f92672">=&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> dagScheduler&lt;span style="color:#f92672">.&lt;/span>runJob&lt;span style="color:#f92672">(&lt;/span>rdd&lt;span style="color:#f92672">,&lt;/span> func&lt;span style="color:#f92672">,&lt;/span> partitions&lt;span style="color:#f92672">,&lt;/span> callSite&lt;span style="color:#f92672">,&lt;/span> resultHandler&lt;span style="color:#f92672">,&lt;/span> localProperties&lt;span style="color:#f92672">.&lt;/span>get&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/div>&lt;hr>
&lt;h2 id="二rdd设计与实现-1">二、RDD设计与实现&lt;/h2>
&lt;h3 id="21-rdd核心抽象">2.1 RDD核心抽象&lt;/h3>
&lt;h4 id="rdd五大特性实现流程">RDD五大特性实现流程&lt;/h4>
&lt;div class="mermaid">graph LR
A[RDD实例化] --> B[getPartitions&lt;br/>获取分区列表]
B --> C[compute&lt;br/>定义计算函数]
C --> D[getDependencies&lt;br/>设置依赖关系]
D --> E[partitioner&lt;br/>设置分区器]
E --> F[getPreferredLocations&lt;br/>位置偏好]
F --> G[RDD创建完成]
style A fill:#e1f5fe
style G fill:#e8f5e8
&lt;/div>
&lt;h3 id="22-rdd五大特性">2.2 RDD五大特性&lt;/h3>
&lt;h4 id="rdd源码核心实现">RDD源码核心实现&lt;/h4>
&lt;div class="highlight">&lt;pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;">&lt;code class="language-scala" data-lang="scala">&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">// RDD.scala 核心抽象
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span>&lt;span style="color:#66d9ef">abstract&lt;/span> &lt;span style="color:#66d9ef">class&lt;/span> &lt;span style="color:#a6e22e">RDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T:&lt;/span> &lt;span style="color:#66d9ef">ClassTag&lt;/span>&lt;span style="color:#f92672">](&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#a6e22e">@transient&lt;/span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">var&lt;/span> &lt;span style="color:#a6e22e">_sc&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">SparkContext&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#a6e22e">@transient&lt;/span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">var&lt;/span> deps&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Seq&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">Dependency&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">]]&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">extends&lt;/span> &lt;span style="color:#a6e22e">Serializable&lt;/span> &lt;span style="color:#66d9ef">with&lt;/span> &lt;span style="color:#a6e22e">Logging&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 五大特性的具体实现
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 1. 分区列表
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">protected&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> getPartitions&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Array&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">Partition&lt;/span>&lt;span style="color:#f92672">]&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 2. 计算函数
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> compute&lt;span style="color:#f92672">(&lt;/span>split&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Partition&lt;/span>&lt;span style="color:#f92672">,&lt;/span> context&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">TaskContext&lt;/span>&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Iterator&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">]&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 3. 依赖关系
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">protected&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> getDependencies&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Seq&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">Dependency&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">]]&lt;/span> &lt;span style="color:#66d9ef">=&lt;/span> deps
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 4. 分区器（可选）
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#a6e22e">@transient&lt;/span> &lt;span style="color:#66d9ef">val&lt;/span> partitioner&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Option&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">Partitioner&lt;/span>&lt;span style="color:#f92672">]&lt;/span> &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#a6e22e">None&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 5. 位置偏好（可选）
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">protected&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> getPreferredLocations&lt;span style="color:#f92672">(&lt;/span>split&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Partition&lt;/span>&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Seq&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">String&lt;/span>&lt;span style="color:#f92672">]&lt;/span> &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#a6e22e">Nil&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// Transformation操作实现
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> map&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">U:&lt;/span> &lt;span style="color:#66d9ef">ClassTag&lt;/span>&lt;span style="color:#f92672">](&lt;/span>f&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">T&lt;/span> &lt;span style="color:#f92672">=&amp;gt;&lt;/span> U&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">RDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">U&lt;/span>&lt;span style="color:#f92672">]&lt;/span> &lt;span style="color:#66d9ef">=&lt;/span> withScope &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> cleanF &lt;span style="color:#66d9ef">=&lt;/span> sc&lt;span style="color:#f92672">.&lt;/span>clean&lt;span style="color:#f92672">(&lt;/span>f&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">new&lt;/span> &lt;span style="color:#a6e22e">MapPartitionsRDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">U&lt;/span>, &lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">](&lt;/span>&lt;span style="color:#66d9ef">this&lt;/span>&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#f92672">(&lt;/span>context&lt;span style="color:#f92672">,&lt;/span> pid&lt;span style="color:#f92672">,&lt;/span> iter&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span> iter&lt;span style="color:#f92672">.&lt;/span>map&lt;span style="color:#f92672">(&lt;/span>cleanF&lt;span style="color:#f92672">))&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">def&lt;/span> filter&lt;span style="color:#f92672">(&lt;/span>f&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">T&lt;/span> &lt;span style="color:#f92672">=&amp;gt;&lt;/span> &lt;span style="color:#a6e22e">Boolean&lt;/span>&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">RDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">]&lt;/span> &lt;span style="color:#66d9ef">=&lt;/span> withScope &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> cleanF &lt;span style="color:#66d9ef">=&lt;/span> sc&lt;span style="color:#f92672">.&lt;/span>clean&lt;span style="color:#f92672">(&lt;/span>f&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">new&lt;/span> &lt;span style="color:#a6e22e">MapPartitionsRDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>, &lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">](&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">this&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">(&lt;/span>context&lt;span style="color:#f92672">,&lt;/span> pid&lt;span style="color:#f92672">,&lt;/span> iter&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span> iter&lt;span style="color:#f92672">.&lt;/span>filter&lt;span style="color:#f92672">(&lt;/span>cleanF&lt;span style="color:#f92672">),&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> preservesPartitioning &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#66d9ef">true&lt;/span>&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">def&lt;/span> reduceByKey&lt;span style="color:#f92672">(&lt;/span>func&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span> T&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">RDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">]&lt;/span> &lt;span style="color:#66d9ef">=&lt;/span> self&lt;span style="color:#f92672">.&lt;/span>withScope &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> reduceByKey&lt;span style="color:#f92672">(&lt;/span>defaultPartitioner&lt;span style="color:#f92672">(&lt;/span>self&lt;span style="color:#f92672">),&lt;/span> func&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// Action操作实现
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> collect&lt;span style="color:#f92672">()&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Array&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">]&lt;/span> &lt;span style="color:#66d9ef">=&lt;/span> withScope &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> results &lt;span style="color:#66d9ef">=&lt;/span> sc&lt;span style="color:#f92672">.&lt;/span>runJob&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">this&lt;/span>&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#f92672">(&lt;/span>iter&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Iterator&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">])&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span> iter&lt;span style="color:#f92672">.&lt;/span>toArray&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#a6e22e">Array&lt;/span>&lt;span style="color:#f92672">.&lt;/span>concat&lt;span style="color:#f92672">(&lt;/span>results&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#66d9ef">*&lt;/span>&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">def&lt;/span> count&lt;span style="color:#f92672">()&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Long&lt;/span> &lt;span style="color:#f92672">=&lt;/span> sc&lt;span style="color:#f92672">.&lt;/span>runJob&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">this&lt;/span>&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#a6e22e">Utils&lt;/span>&lt;span style="color:#f92672">.&lt;/span>getIteratorSize &lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">).&lt;/span>sum
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">def&lt;/span> foreach&lt;span style="color:#f92672">(&lt;/span>f&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">T&lt;/span> &lt;span style="color:#f92672">=&amp;gt;&lt;/span> &lt;span style="color:#a6e22e">Unit&lt;/span>&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Unit&lt;/span> &lt;span style="color:#f92672">=&lt;/span> withScope &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> cleanF &lt;span style="color:#66d9ef">=&lt;/span> sc&lt;span style="color:#f92672">.&lt;/span>clean&lt;span style="color:#f92672">(&lt;/span>f&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> sc&lt;span style="color:#f92672">.&lt;/span>runJob&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">this&lt;/span>&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#f92672">(&lt;/span>iter&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Iterator&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">])&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span> iter&lt;span style="color:#f92672">.&lt;/span>foreach&lt;span style="color:#f92672">(&lt;/span>cleanF&lt;span style="color:#f92672">))&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/div>&lt;h3 id="23-rdd操作执行">2.3 RDD操作执行&lt;/h3>
&lt;h4 id="rdd操作执行流程图">RDD操作执行流程图&lt;/h4>
&lt;div class="mermaid">graph TD
A[RDD操作调用] --> B{操作类型}
B -->|Transformation| C[创建新RDD]
B -->|Action| D[触发作业执行]
C --> C1[构建RDD血统]
C1 --> C2[设置依赖关系]
C2 --> C3[返回新RDD对象]
C3 --> E[等待Action触发]
D --> D1[调用SparkContext.runJob]
D1 --> D2[DAGScheduler.runJob]
D2 --> D3[构建DAG图]
D3 --> D4[划分Stage]
D4 --> D5[提交Task]
D5 --> D6[Executor执行]
D6 --> D7[返回结果]
style C fill:#e8f5e8
style D fill:#ffebee
style D3 fill:#fff3e0
style D6 fill:#e1f5fe
&lt;/div>
&lt;hr>
&lt;h2 id="三任务调度系统-1">三、任务调度系统&lt;/h2>
&lt;h3 id="31-dagscheduler调度器">3.1 DAGScheduler调度器&lt;/h3>
&lt;h4 id="dagscheduler作业提交流程图">DAGScheduler作业提交流程图&lt;/h4>
&lt;div class="mermaid">graph TD
A[用户调用Action] --> B[SparkContext.runJob]
B --> C[DAGScheduler.runJob]
C --> D[创建ActiveJob]
D --> E[submitJob]
E --> F[构建DAG图]
F --> G[findMissingPartitions]
G --> H[getMissingParentStages]
H --> I{是否有父Stage}
I -->|有| J[递归提交父Stage]
I -->|无| K[submitMissingTasks]
J --> L[等待父Stage完成]
L --> K
K --> M[创建TaskSet]
M --> N[TaskScheduler.submitTasks]
N --> O[分发Task到Executor]
O --> P[Task执行完成]
P --> Q[Stage完成]
Q --> R[检查后续Stage]
R --> S[Job完成]
style A fill:#e1f5fe
style F fill:#fff3e0
style K fill:#e8f5e8
style S fill:#c8e6c9
&lt;/div>
&lt;h3 id="32-stage划分算法">3.2 Stage划分算法&lt;/h3>
&lt;h4 id="stage划分算法流程图">Stage划分算法流程图&lt;/h4>
&lt;div class="mermaid">graph TD
A[开始Stage划分] --> B[从最终RDD开始]
B --> C[遍历RDD依赖]
C --> D{依赖类型}
D -->|窄依赖| E[加入当前Stage]
D -->|宽依赖| F[创建新Stage边界]
E --> G[继续遍历父RDD]
F --> H[创建ShuffleMapStage]
G --> C
H --> I[递归处理父RDD]
I --> C
C --> J{是否还有未处理RDD}
J -->|是| C
J -->|否| K[Stage划分完成]
style A fill:#e1f5fe
style F fill:#ffebee
style H fill:#fff3e0
style K fill:#e8f5e8
&lt;/div>
&lt;h4 id="dagscheduler源码分析">DAGScheduler源码分析&lt;/h4>
&lt;div class="highlight">&lt;pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;">&lt;code class="language-scala" data-lang="scala">&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">// DAGScheduler.scala 核心调度逻辑
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span>&lt;span style="color:#66d9ef">class&lt;/span> &lt;span style="color:#a6e22e">DAGScheduler&lt;/span>&lt;span style="color:#f92672">(&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">private&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">scheduler&lt;/span>&lt;span style="color:#f92672">]&lt;/span> &lt;span style="color:#66d9ef">val&lt;/span> sc&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">SparkContext&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">private&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">scheduler&lt;/span>&lt;span style="color:#f92672">]&lt;/span> &lt;span style="color:#66d9ef">val&lt;/span> taskScheduler&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">TaskScheduler&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> listenerBus&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">LiveListenerBus&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> mapOutputTracker&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">MapOutputTrackerMaster&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> blockManagerMaster&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">BlockManagerMaster&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> env&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">SparkEnv&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> clock&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Clock&lt;/span> &lt;span style="color:#f92672">=&lt;/span> &lt;span style="color:#66d9ef">new&lt;/span> &lt;span style="color:#a6e22e">SystemClock&lt;/span>&lt;span style="color:#f92672">())&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">extends&lt;/span> &lt;span style="color:#a6e22e">Logging&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 事件处理循环
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">val&lt;/span> eventProcessLoop &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#66d9ef">new&lt;/span> &lt;span style="color:#a6e22e">DAGSchedulerEventProcessLoop&lt;/span>&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">this&lt;/span>&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 提交作业的核心方法
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> runJob&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>, &lt;span style="color:#66d9ef">U&lt;/span>&lt;span style="color:#f92672">](&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> rdd&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">RDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">],&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> func&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">TaskContext&lt;/span>&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#66d9ef">Iterator&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">T&lt;/span>&lt;span style="color:#f92672">])&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span> U&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> partitions&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Seq&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">Int&lt;/span>&lt;span style="color:#f92672">],&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> callSite&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">CallSite&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> resultHandler&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">Int&lt;/span>&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#66d9ef">U&lt;/span>&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span> &lt;span style="color:#a6e22e">Unit&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> properties&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Properties&lt;/span>&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Unit&lt;/span> &lt;span style="color:#f92672">=&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> start &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#a6e22e">System&lt;/span>&lt;span style="color:#f92672">.&lt;/span>nanoTime
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> waiter &lt;span style="color:#66d9ef">=&lt;/span> submitJob&lt;span style="color:#f92672">(&lt;/span>rdd&lt;span style="color:#f92672">,&lt;/span> func&lt;span style="color:#f92672">,&lt;/span> partitions&lt;span style="color:#f92672">,&lt;/span> callSite&lt;span style="color:#f92672">,&lt;/span> resultHandler&lt;span style="color:#f92672">,&lt;/span> properties&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#a6e22e">ThreadUtils&lt;/span>&lt;span style="color:#f92672">.&lt;/span>awaitReady&lt;span style="color:#f92672">(&lt;/span>waiter&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#a6e22e">Duration&lt;/span>&lt;span style="color:#f92672">.&lt;/span>&lt;span style="color:#a6e22e">Inf&lt;/span>&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> waiter&lt;span style="color:#f92672">.&lt;/span>value&lt;span style="color:#f92672">.&lt;/span>get &lt;span style="color:#66d9ef">match&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">case&lt;/span> scala&lt;span style="color:#f92672">.&lt;/span>util&lt;span style="color:#f92672">.&lt;/span>&lt;span style="color:#a6e22e">Success&lt;/span>&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> logInfo&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#e6db74">&amp;#34;Job %d finished: %s, took %f s&amp;#34;&lt;/span>&lt;span style="color:#f92672">.&lt;/span>format
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">(&lt;/span>waiter&lt;span style="color:#f92672">.&lt;/span>jobId&lt;span style="color:#f92672">,&lt;/span> callSite&lt;span style="color:#f92672">.&lt;/span>shortForm&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#a6e22e">System&lt;/span>&lt;span style="color:#f92672">.&lt;/span>nanoTime &lt;span style="color:#f92672">-&lt;/span> start&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#f92672">/&lt;/span> &lt;span style="color:#ae81ff">1&lt;/span>e9&lt;span style="color:#f92672">))&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">case&lt;/span> scala&lt;span style="color:#f92672">.&lt;/span>util&lt;span style="color:#f92672">.&lt;/span>&lt;span style="color:#a6e22e">Failure&lt;/span>&lt;span style="color:#f92672">(&lt;/span>exception&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> logInfo&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#e6db74">&amp;#34;Job %d failed: %s, took %f s&amp;#34;&lt;/span>&lt;span style="color:#f92672">.&lt;/span>format
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">(&lt;/span>waiter&lt;span style="color:#f92672">.&lt;/span>jobId&lt;span style="color:#f92672">,&lt;/span> callSite&lt;span style="color:#f92672">.&lt;/span>shortForm&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#a6e22e">System&lt;/span>&lt;span style="color:#f92672">.&lt;/span>nanoTime &lt;span style="color:#f92672">-&lt;/span> start&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#f92672">/&lt;/span> &lt;span style="color:#ae81ff">1&lt;/span>e9&lt;span style="color:#f92672">))&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">throw&lt;/span> exception
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// Stage划分核心算法
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> getOrCreateShuffleMapStage&lt;span style="color:#f92672">(&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> shuffleDep&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">ShuffleDependency&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>, &lt;span style="color:#66d9ef">_&lt;/span>, &lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">],&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> firstJobId&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Int&lt;/span>&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">ShuffleMapStage&lt;/span> &lt;span style="color:#f92672">=&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> shuffleIdToMapStage&lt;span style="color:#f92672">.&lt;/span>get&lt;span style="color:#f92672">(&lt;/span>shuffleDep&lt;span style="color:#f92672">.&lt;/span>shuffleId&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">match&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">case&lt;/span> &lt;span style="color:#a6e22e">Some&lt;/span>&lt;span style="color:#f92672">(&lt;/span>stage&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> stage
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">case&lt;/span> &lt;span style="color:#a6e22e">None&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 递归创建父Stage
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> getMissingAncestorShuffleDependencies&lt;span style="color:#f92672">(&lt;/span>shuffleDep&lt;span style="color:#f92672">.&lt;/span>rdd&lt;span style="color:#f92672">).&lt;/span>foreach &lt;span style="color:#f92672">{&lt;/span> dep &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">if&lt;/span> &lt;span style="color:#f92672">(!&lt;/span>shuffleIdToMapStage&lt;span style="color:#f92672">.&lt;/span>contains&lt;span style="color:#f92672">(&lt;/span>dep&lt;span style="color:#f92672">.&lt;/span>shuffleId&lt;span style="color:#f92672">))&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> createShuffleMapStage&lt;span style="color:#f92672">(&lt;/span>dep&lt;span style="color:#f92672">,&lt;/span> firstJobId&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> createShuffleMapStage&lt;span style="color:#f92672">(&lt;/span>shuffleDep&lt;span style="color:#f92672">,&lt;/span> firstJobId&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 查找缺失的父依赖
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> getMissingAncestorShuffleDependencies&lt;span style="color:#f92672">(&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> rdd&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">RDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">])&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">ArrayStack&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">ShuffleDependency&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>, &lt;span style="color:#66d9ef">_&lt;/span>, &lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">]]&lt;/span> &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> ancestors &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#66d9ef">new&lt;/span> &lt;span style="color:#a6e22e">ArrayStack&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">ShuffleDependency&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>, &lt;span style="color:#66d9ef">_&lt;/span>, &lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">]]&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> visited &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#66d9ef">new&lt;/span> &lt;span style="color:#a6e22e">HashSet&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">RDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">]]&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> waitingForVisit &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#66d9ef">new&lt;/span> &lt;span style="color:#a6e22e">ArrayStack&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">RDD&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">]]&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> waitingForVisit&lt;span style="color:#f92672">.&lt;/span>push&lt;span style="color:#f92672">(&lt;/span>rdd&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">while&lt;/span> &lt;span style="color:#f92672">(&lt;/span>waitingForVisit&lt;span style="color:#f92672">.&lt;/span>nonEmpty&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> toVisit &lt;span style="color:#66d9ef">=&lt;/span> waitingForVisit&lt;span style="color:#f92672">.&lt;/span>pop&lt;span style="color:#f92672">()&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">if&lt;/span> &lt;span style="color:#f92672">(!&lt;/span>visited&lt;span style="color:#f92672">(&lt;/span>toVisit&lt;span style="color:#f92672">))&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> visited &lt;span style="color:#f92672">+=&lt;/span> toVisit
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> toVisit&lt;span style="color:#f92672">.&lt;/span>dependencies&lt;span style="color:#f92672">.&lt;/span>foreach &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">case&lt;/span> shuffleDep&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">ShuffleDependency&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>, &lt;span style="color:#66d9ef">_&lt;/span>, &lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">]&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">if&lt;/span> &lt;span style="color:#f92672">(!&lt;/span>shuffleIdToMapStage&lt;span style="color:#f92672">.&lt;/span>contains&lt;span style="color:#f92672">(&lt;/span>shuffleDep&lt;span style="color:#f92672">.&lt;/span>shuffleId&lt;span style="color:#f92672">))&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> ancestors&lt;span style="color:#f92672">.&lt;/span>push&lt;span style="color:#f92672">(&lt;/span>shuffleDep&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> waitingForVisit&lt;span style="color:#f92672">.&lt;/span>push&lt;span style="color:#f92672">(&lt;/span>shuffleDep&lt;span style="color:#f92672">.&lt;/span>rdd&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">case&lt;/span> narrowDep&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">NarrowDependency&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">]&lt;/span> &lt;span style="color:#66d9ef">=&amp;gt;&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> waitingForVisit&lt;span style="color:#f92672">.&lt;/span>push&lt;span style="color:#f92672">(&lt;/span>narrowDep&lt;span style="color:#f92672">.&lt;/span>rdd&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> ancestors
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 提交Stage
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">private&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> submitStage&lt;span style="color:#f92672">(&lt;/span>stage&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Stage&lt;/span>&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Unit&lt;/span> &lt;span style="color:#f92672">=&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> jobId &lt;span style="color:#66d9ef">=&lt;/span> activeJobForStage&lt;span style="color:#f92672">(&lt;/span>stage&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">if&lt;/span> &lt;span style="color:#f92672">(&lt;/span>jobId&lt;span style="color:#f92672">.&lt;/span>isDefined&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">if&lt;/span> &lt;span style="color:#f92672">(!&lt;/span>waitingStages&lt;span style="color:#f92672">(&lt;/span>stage&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#f92672">&amp;amp;&amp;amp;&lt;/span> &lt;span style="color:#f92672">!&lt;/span>runningStages&lt;span style="color:#f92672">(&lt;/span>stage&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#f92672">&amp;amp;&amp;amp;&lt;/span> &lt;span style="color:#f92672">!&lt;/span>failedStages&lt;span style="color:#f92672">(&lt;/span>stage&lt;span style="color:#f92672">))&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> missing &lt;span style="color:#66d9ef">=&lt;/span> getMissingParentStages&lt;span style="color:#f92672">(&lt;/span>stage&lt;span style="color:#f92672">).&lt;/span>sortBy&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">.&lt;/span>id&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">if&lt;/span> &lt;span style="color:#f92672">(&lt;/span>missing&lt;span style="color:#f92672">.&lt;/span>isEmpty&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> submitMissingTasks&lt;span style="color:#f92672">(&lt;/span>stage&lt;span style="color:#f92672">,&lt;/span> jobId&lt;span style="color:#f92672">.&lt;/span>get&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span> &lt;span style="color:#66d9ef">else&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">for&lt;/span> &lt;span style="color:#f92672">(&lt;/span>parent &lt;span style="color:#66d9ef">&amp;lt;-&lt;/span> missing&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> submitStage&lt;span style="color:#f92672">(&lt;/span>parent&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> waitingStages &lt;span style="color:#f92672">+=&lt;/span> stage
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/div>&lt;h3 id="33-内存监控与优化">3.3 内存监控与优化&lt;/h3>
&lt;h4 id="内存存储状态监控">内存存储状态监控&lt;/h4>
&lt;div class="highlight">&lt;pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;">&lt;code class="language-scala" data-lang="scala">&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">// 内存使用监控组件
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span>&lt;span style="color:#66d9ef">class&lt;/span> &lt;span style="color:#a6e22e">MemoryMonitor&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 监控Map的内存使用
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> monitorMapMemory&lt;span style="color:#f92672">(&lt;/span>map&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">SizeTrackingAppendOnlyMap&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>, &lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">])&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">MemoryUsage&lt;/span> &lt;span style="color:#f92672">=&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> estimatedSize &lt;span style="color:#66d9ef">=&lt;/span> map&lt;span style="color:#f92672">.&lt;/span>estimateSize&lt;span style="color:#f92672">()&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> currentMemory &lt;span style="color:#66d9ef">=&lt;/span> map&lt;span style="color:#f92672">.&lt;/span>currentMemory
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> maxMemory &lt;span style="color:#66d9ef">=&lt;/span> map&lt;span style="color:#f92672">.&lt;/span>maxMemory
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#a6e22e">MemoryUsage&lt;/span>&lt;span style="color:#f92672">(&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> estimatedSize &lt;span style="color:#66d9ef">=&lt;/span> estimatedSize&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> currentMemory &lt;span style="color:#66d9ef">=&lt;/span> currentMemory&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> maxMemory &lt;span style="color:#66d9ef">=&lt;/span> maxMemory&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> utilization &lt;span style="color:#66d9ef">=&lt;/span> currentMemory&lt;span style="color:#f92672">.&lt;/span>toDouble &lt;span style="color:#f92672">/&lt;/span> maxMemory
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 监控Spill状态
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> monitorSpillStatus&lt;span style="color:#f92672">(&lt;/span>externalMap&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">ExternalAppendOnlyMap&lt;/span>&lt;span style="color:#f92672">[&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>, &lt;span style="color:#66d9ef">_&lt;/span>, &lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">])&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">SpillStatus&lt;/span> &lt;span style="color:#f92672">=&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> spillCount &lt;span style="color:#66d9ef">=&lt;/span> externalMap&lt;span style="color:#f92672">.&lt;/span>spills&lt;span style="color:#f92672">.&lt;/span>size
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> totalSpillSize &lt;span style="color:#66d9ef">=&lt;/span> externalMap&lt;span style="color:#f92672">.&lt;/span>spills&lt;span style="color:#f92672">.&lt;/span>map&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#66d9ef">_&lt;/span>&lt;span style="color:#f92672">.&lt;/span>size&lt;span style="color:#f92672">).&lt;/span>sum
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#a6e22e">SpillStatus&lt;/span>&lt;span style="color:#f92672">(&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> spillCount &lt;span style="color:#66d9ef">=&lt;/span> spillCount&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> totalSpillSize &lt;span style="color:#66d9ef">=&lt;/span> totalSpillSize&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> averageSpillSize &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#66d9ef">if&lt;/span> &lt;span style="color:#f92672">(&lt;/span>spillCount &lt;span style="color:#f92672">&amp;gt;&lt;/span> &lt;span style="color:#ae81ff">0&lt;/span>&lt;span style="color:#f92672">)&lt;/span> totalSpillSize &lt;span style="color:#f92672">/&lt;/span> spillCount &lt;span style="color:#66d9ef">else&lt;/span> &lt;span style="color:#ae81ff">0&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#66d9ef">case&lt;/span> &lt;span style="color:#66d9ef">class&lt;/span> &lt;span style="color:#a6e22e">MemoryUsage&lt;/span>&lt;span style="color:#f92672">(&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> estimatedSize&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Long&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> currentMemory&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Long&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> maxMemory&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Long&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> utilization&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Double&lt;/span>&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#66d9ef">case&lt;/span> &lt;span style="color:#66d9ef">class&lt;/span> &lt;span style="color:#a6e22e">SpillStatus&lt;/span>&lt;span style="color:#f92672">(&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> spillCount&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Int&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> totalSpillSize&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Long&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> averageSpillSize&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Long&lt;/span>&lt;span style="color:#f92672">)&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/div>&lt;h4 id="内存存储监控流程图">内存存储监控流程图&lt;/h4>
&lt;div class="mermaid">graph TD
A[输入数据] --> B[PartitionedAppendOnlyMap]
B --> C{内存是否足够?}
C -->|是| D[内存聚合]
C -->|否| E[Spill到磁盘]
D --> F[返回结果]
E --> G[ExternalAppendOnlyMap]
G --> H[合并内存和磁盘数据]
H --> F
I[MemoryMonitor] --> B
I --> G
J[SpillMonitor] --> E
&lt;/div>
&lt;h4 id="内存存储优化策略">内存存储优化策略&lt;/h4>
&lt;div class="highlight">&lt;pre tabindex="0" style="color:#f8f8f2;background-color:#272822;-moz-tab-size:4;-o-tab-size:4;tab-size:4;">&lt;code class="language-scala" data-lang="scala">&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">// 内存分配优化
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span>&lt;span style="color:#66d9ef">class&lt;/span> &lt;span style="color:#a6e22e">MemoryOptimizer&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 动态调整内存阈值
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> adjustMemoryThreshold&lt;span style="color:#f92672">(&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> currentMemory&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Long&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> maxMemory&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Long&lt;/span>&lt;span style="color:#f92672">,&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> spillCount&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Int&lt;/span>&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Long&lt;/span> &lt;span style="color:#f92672">=&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> utilization &lt;span style="color:#66d9ef">=&lt;/span> currentMemory&lt;span style="color:#f92672">.&lt;/span>toDouble &lt;span style="color:#f92672">/&lt;/span> maxMemory
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">if&lt;/span> &lt;span style="color:#f92672">(&lt;/span>utilization &lt;span style="color:#f92672">&amp;gt;&lt;/span> &lt;span style="color:#ae81ff">0.8&lt;/span> &lt;span style="color:#f92672">&amp;amp;&amp;amp;&lt;/span> spillCount &lt;span style="color:#f92672">&amp;gt;&lt;/span> &lt;span style="color:#ae81ff">0&lt;/span>&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 内存使用率高且有Spill，降低阈值
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#f92672">(&lt;/span>maxMemory &lt;span style="color:#f92672">*&lt;/span> &lt;span style="color:#ae81ff">0.6&lt;/span>&lt;span style="color:#f92672">).&lt;/span>toLong
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span> &lt;span style="color:#66d9ef">else&lt;/span> &lt;span style="color:#66d9ef">if&lt;/span> &lt;span style="color:#f92672">(&lt;/span>utilization &lt;span style="color:#f92672">&amp;lt;&lt;/span> &lt;span style="color:#ae81ff">0.5&lt;/span> &lt;span style="color:#f92672">&amp;amp;&amp;amp;&lt;/span> spillCount &lt;span style="color:#f92672">==&lt;/span> &lt;span style="color:#ae81ff">0&lt;/span>&lt;span style="color:#f92672">)&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 内存使用率低且无Spill，提高阈值
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#f92672">(&lt;/span>maxMemory &lt;span style="color:#f92672">*&lt;/span> &lt;span style="color:#ae81ff">0.9&lt;/span>&lt;span style="color:#f92672">).&lt;/span>toLong
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span> &lt;span style="color:#66d9ef">else&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 保持当前阈值
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#f92672">(&lt;/span>maxMemory &lt;span style="color:#f92672">*&lt;/span> &lt;span style="color:#ae81ff">0.8&lt;/span>&lt;span style="color:#f92672">).&lt;/span>toLong
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#75715e">// 优化Map初始容量
&lt;/span>&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#75715e">&lt;/span> &lt;span style="color:#66d9ef">def&lt;/span> optimizeInitialCapacity&lt;span style="color:#f92672">(&lt;/span>dataSize&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Long&lt;/span>&lt;span style="color:#f92672">)&lt;/span>&lt;span style="color:#66d9ef">:&lt;/span> &lt;span style="color:#66d9ef">Int&lt;/span> &lt;span style="color:#f92672">=&lt;/span> &lt;span style="color:#f92672">{&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#66d9ef">val&lt;/span> estimatedSize &lt;span style="color:#66d9ef">=&lt;/span> &lt;span style="color:#f92672">(&lt;/span>dataSize &lt;span style="color:#f92672">*&lt;/span> &lt;span style="color:#ae81ff">1.2&lt;/span>&lt;span style="color:#f92672">).&lt;/span>toInt
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> math&lt;span style="color:#f92672">.&lt;/span>max&lt;span style="color:#f92672">(&lt;/span>&lt;span style="color:#ae81ff">64&lt;/span>&lt;span style="color:#f92672">,&lt;/span> math&lt;span style="color:#f92672">.&lt;/span>min&lt;span style="color:#f92672">(&lt;/span>estimatedSize&lt;span style="color:#f92672">,&lt;/span> &lt;span style="color:#ae81ff">1024&lt;/span> &lt;span style="color:#f92672">*&lt;/span> &lt;span style="color:#ae81ff">1024&lt;/span>&lt;span style="color:#f92672">))&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span> &lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;span style="display:flex;">&lt;span>&lt;span style="color:#f92672">}&lt;/span>
&lt;/span>&lt;/span>&lt;/code>&lt;/pre>&lt;/div>&lt;hr>
&lt;h3 id="34-taskscheduler任务调度">3.4 TaskScheduler任务调度&lt;/h3>
&lt;h4 id="dag的生成与依赖分析">DAG的生成与依赖分析&lt;/h4>
&lt;p>&lt;strong>任务提交完整流程图&lt;/strong>：&lt;/p></description></item><item><title>28.scala</title><link>https://pothos.dpdns.org/posts/28.scala/</link><pubDate>Thu, 25 Dec 2025 00:00:00 +0000</pubDate><guid>https://pothos.dpdns.org/posts/28.scala/</guid><description>&lt;h1 id="28-scala语法指南">28. Scala语法指南&lt;/h1>
&lt;h2 id="目录">目录&lt;/h2>
&lt;details>
&lt;summary>点击展开目录&lt;/summary>
&lt;h3 id="1-scala概述">&lt;a href="#1-scala%E6%A6%82%E8%BF%B0">1. Scala概述&lt;/a>&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#11-scala%E7%AE%80%E4%BB%8B">1.1 Scala简介&lt;/a>&lt;/li>
&lt;li>&lt;a href="#12-scala%E7%89%B9%E7%82%B9">1.2 Scala特点&lt;/a>&lt;/li>
&lt;li>&lt;a href="#13-scala%E4%B8%8Ejava%E5%AF%B9%E6%AF%94">1.3 Scala与Java对比&lt;/a>&lt;/li>
&lt;li>&lt;a href="#14-%E5%BC%80%E5%8F%91%E7%8E%AF%E5%A2%83%E6%90%AD%E5%BB%BA">1.4 开发环境搭建&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="2-基础语法">&lt;a href="#2-%E5%9F%BA%E7%A1%80%E8%AF%AD%E6%B3%95">2. 基础语法&lt;/a>&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#21-%E5%8F%98%E9%87%8F%E4%B8%8E%E5%B8%B8%E9%87%8F">2.1 变量与常量&lt;/a>&lt;/li>
&lt;li>&lt;a href="#22-%E6%95%B0%E6%8D%AE%E7%B1%BB%E5%9E%8B">2.2 数据类型&lt;/a>&lt;/li>
&lt;li>&lt;a href="#23-%E6%93%8D%E4%BD%9C%E7%AC%A6">2.3 操作符&lt;/a>&lt;/li>
&lt;li>&lt;a href="#24-%E6%8E%A7%E5%88%B6%E7%BB%93%E6%9E%84">2.4 控制结构&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="3-函数与方法">&lt;a href="#3-%E5%87%BD%E6%95%B0%E4%B8%8E%E6%96%B9%E6%B3%95">3. 函数与方法&lt;/a>&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#31-%E5%87%BD%E6%95%B0%E5%AE%9A%E4%B9%89">3.1 函数定义&lt;/a>&lt;/li>
&lt;li>&lt;a href="#32-%E6%96%B9%E6%B3%95%E4%B8%8E%E5%87%BD%E6%95%B0%E5%8C%BA%E5%88%AB">3.2 方法与函数区别&lt;/a>&lt;/li>
&lt;li>&lt;a href="#33-%E9%AB%98%E9%98%B6%E5%87%BD%E6%95%B0">3.3 高阶函数&lt;/a>&lt;/li>
&lt;li>&lt;a href="#34-%E5%8C%BF%E5%90%8D%E5%87%BD%E6%95%B0%E4%B8%8E%E6%9F%AF%E9%87%8C%E5%8C%96">3.4 匿名函数与柯里化&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="4-面向对象编程">&lt;a href="#4-%E9%9D%A2%E5%90%91%E5%AF%B9%E8%B1%A1%E7%BC%96%E7%A8%8B">4. 面向对象编程&lt;/a>&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#41-%E7%B1%BB%E4%B8%8E%E5%AF%B9%E8%B1%A1">4.1 类与对象&lt;/a>&lt;/li>
&lt;li>&lt;a href="#42-%E6%9E%84%E9%80%A0%E5%99%A8">4.2 构造器&lt;/a>&lt;/li>
&lt;li>&lt;a href="#43-%E7%BB%A7%E6%89%BF%E4%B8%8E%E5%A4%9A%E6%80%81">4.3 继承与多态&lt;/a>&lt;/li>
&lt;li>&lt;a href="#44-%E7%89%B9%E8%B4%A8trait">4.4 特质(Trait)&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="5-集合框架">&lt;a href="#5-%E9%9B%86%E5%90%88%E6%A1%86%E6%9E%B6">5. 集合框架&lt;/a>&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#51-%E9%9B%86%E5%90%88%E6%A1%86%E6%9E%B6%E6%95%B4%E4%BD%93%E6%9E%B6%E6%9E%84">5.1 集合框架整体架构&lt;/a>&lt;/li>
&lt;li>&lt;a href="#52-listsetmap%E8%AF%A6%E8%A7%A3">5.2 List、Set、Map详解&lt;/a>&lt;/li>
&lt;li>&lt;a href="#53-scala%E4%B8%8Ejava%E9%9B%86%E5%90%88%E4%BA%92%E8%BD%AC%E5%AE%9E%E6%88%98">5.3 Scala与Java集合互转实战&lt;/a>&lt;/li>
&lt;li>&lt;a href="#54-%E5%8F%AF%E5%8F%98%E4%B8%8E%E4%B8%8D%E5%8F%AF%E5%8F%98%E9%9B%86%E5%90%88">5.4 可变与不可变集合&lt;/a>&lt;/li>
&lt;li>&lt;a href="#55-%E9%9B%86%E5%90%88%E6%93%8D%E4%BD%9C%E6%96%B9%E6%B3%95">5.5 集合操作方法&lt;/a>&lt;/li>
&lt;li>&lt;a href="#56-%E9%9B%86%E5%90%88%E6%80%A7%E8%83%BD%E5%AF%B9%E6%AF%94%E4%B8%8E%E9%80%89%E6%8B%A9%E7%AD%96%E7%95%A5">5.6 集合性能对比与选择策略&lt;/a>&lt;/li>
&lt;li>&lt;a href="#57-%E9%9B%86%E5%90%88%E6%80%A7%E8%83%BD%E5%AF%B9%E6%AF%94">5.7 集合性能对比&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="6-模式匹配">&lt;a href="#6-%E6%A8%A1%E5%BC%8F%E5%8C%B9%E9%85%8D">6. 模式匹配&lt;/a>&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#61-%E5%9F%BA%E6%9C%AC%E6%A8%A1%E5%BC%8F%E5%8C%B9%E9%85%8D">6.1 基本模式匹配&lt;/a>&lt;/li>
&lt;li>&lt;a href="#62-%E6%A1%88%E4%BE%8B%E7%B1%BB%E6%A8%A1%E5%BC%8F">6.2 案例类模式&lt;/a>&lt;/li>
&lt;li>&lt;a href="#63-%E9%9B%86%E5%90%88%E6%A8%A1%E5%BC%8F%E5%8C%B9%E9%85%8D">6.3 集合模式匹配&lt;/a>&lt;/li>
&lt;li>&lt;a href="#64-%E6%8F%90%E5%8F%96%E5%99%A8extractor">6.4 提取器&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="7-高级特性">&lt;a href="#7-%E9%AB%98%E7%BA%A7%E7%89%B9%E6%80%A7">7. 高级特性&lt;/a>&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#71-%E9%9A%90%E5%BC%8F%E8%BD%AC%E6%8D%A2%E4%B8%8E%E9%9A%90%E5%BC%8F%E5%8F%82%E6%95%B0">7.1 隐式转换与隐式参数&lt;/a>&lt;/li>
&lt;li>&lt;a href="#72-%E6%B3%9B%E5%9E%8B%E4%B8%8E%E7%B1%BB%E5%9E%8B%E5%8F%82%E6%95%B0">7.2 泛型与类型参数&lt;/a>&lt;/li>
&lt;li>&lt;a href="#73-%E5%8D%8F%E5%8F%98%E4%B8%8E%E9%80%86%E5%8F%98">7.3 协变与逆变&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="8-函数式编程">&lt;a href="#8-%E5%87%BD%E6%95%B0%E5%BC%8F%E7%BC%96%E7%A8%8B">8. 函数式编程&lt;/a>&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#81-%E4%B8%8D%E5%8F%AF%E5%8F%98%E6%80%A7">8.1 不可变性&lt;/a>&lt;/li>
&lt;li>&lt;a href="#82-%E5%87%BD%E6%95%B0%E7%BB%84%E5%90%88">8.2 函数组合&lt;/a>&lt;/li>
&lt;li>&lt;a href="#83-monads%E6%A6%82%E5%BF%B5">8.3 Monads概念&lt;/a>&lt;/li>
&lt;li>&lt;a href="#84-optiontryelther">8.4 Option、Try、Either&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="9-并发编程">&lt;a href="#9-%E5%B9%B6%E5%8F%91%E7%BC%96%E7%A8%8B">9. 并发编程&lt;/a>&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#91-actor%E6%A8%A1%E5%9E%8B">9.1 Actor模型&lt;/a>&lt;/li>
&lt;li>&lt;a href="#92-future%E4%B8%8Epromise">9.2 Future与Promise&lt;/a>&lt;/li>
&lt;li>&lt;a href="#93-%E5%B9%B6%E8%A1%8C%E9%9B%86%E5%90%88">9.3 并行集合&lt;/a>&lt;/li>
&lt;li>&lt;a href="#94-%E5%90%8C%E6%AD%A5%E6%9C%BA%E5%88%B6">9.4 同步机制&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="10-系统交互与外部调用">&lt;a href="#10-%E7%B3%BB%E7%BB%9F%E4%BA%A4%E4%BA%92%E4%B8%8E%E5%A4%96%E9%83%A8%E8%B0%83%E7%94%A8">10. 系统交互与外部调用&lt;/a>&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#101-%E6%89%A7%E8%A1%8Cshell%E5%91%BD%E4%BB%A4">10.1 执行Shell命令&lt;/a>&lt;/li>
&lt;li>&lt;a href="#102-%E6%96%87%E4%BB%B6%E7%B3%BB%E7%BB%9F%E6%93%8D%E4%BD%9C">10.2 文件系统操作&lt;/a>&lt;/li>
&lt;li>&lt;a href="#103-%E8%BF%9B%E7%A8%8B%E7%AE%A1%E7%90%86">10.3 进程管理&lt;/a>&lt;/li>
&lt;li>&lt;a href="#104-%E7%B3%BB%E7%BB%9F%E5%B1%9E%E6%80%A7%E4%B8%8E%E7%8E%AF%E5%A2%83%E5%8F%98%E9%87%8F">10.4 系统属性与环境变量&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="11-scala面试题集">&lt;a href="#11-scala%E9%9D%A2%E8%AF%95%E9%A2%98%E9%9B%86">11. Scala面试题集&lt;/a>&lt;/h3>
&lt;ul>
&lt;li>&lt;a href="#111-%E5%9F%BA%E7%A1%80%E8%AF%AD%E6%B3%95%E9%A2%98">11.1 基础语法题&lt;/a>&lt;/li>
&lt;li>&lt;a href="#112-%E9%9D%A2%E5%90%91%E5%AF%B9%E8%B1%A1%E9%A2%98">11.2 面向对象题&lt;/a>&lt;/li>
&lt;li>&lt;a href="#113-%E5%87%BD%E6%95%B0%E5%BC%8F%E7%BC%96%E7%A8%8B%E9%A2%98">11.3 函数式编程题&lt;/a>&lt;/li>
&lt;li>&lt;a href="#114-%E9%AB%98%E7%BA%A7%E7%89%B9%E6%80%A7%E9%A2%98">11.4 高级特性题&lt;/a>&lt;/li>
&lt;/ul>
&lt;h3 id="12-总结与进阶方向">&lt;a href="#12-%E6%80%BB%E7%BB%93">12. 总结与进阶方向&lt;/a>&lt;/h3>
&lt;hr>
&lt;/details>
&lt;h2 id="1-scala概述-1">1. Scala概述&lt;/h2>
&lt;h3 id="11-scala简介">1.1 Scala简介&lt;/h3>
&lt;p>&lt;strong>Scala&lt;/strong>（Scalable Language）是一种运行在JVM上的多范式编程语言，由Martin Odersky在2003年设计。它&lt;strong>seamlessly结合了面向对象编程和函数式编程&lt;/strong>的特性，旨在构建可伸缩的软件系统。*
&lt;em>核心设计理念&lt;/em>*：&lt;/p></description></item></channel></rss>