千家信息网

我的Spark源码核心SparkContext走读全纪录

发表于:2025-02-04 作者:千家信息网编辑
千家信息网最后更新 2025年02月04日,我的Spark源码核心SparkContext走读全纪录Dirver Program(SparkConf) package org.apache.spark.SparkConfMaster packa
千家信息网最后更新 2025年02月04日我的Spark源码核心SparkContext走读全纪录

我的Spark源码核心SparkContext走读全纪录



Dirver Program(SparkConf) package org.apache.spark.SparkConf

Master package org.apache.spark.deploy.master


SparkContext package org.apache.spark.SparkContext


Stage package org.apache.spark.scheduler.Stage

Task package org.apache.spark.scheduler.Task

DAGScheduler package org.apache.spark.scheduler

TaskScheduler package org.apache.spark.scheduler.TaskScheduler

TaskSchedulerImpl package org.apache.spark.scheduler

Worker package org.apache.spark.deploy.worker

Executor package org.apache.spark.executor

BlockManager package org.apache.spark.storage

TaskSet package org.apache.spark.scheduler


//初始化后开始创建

// Create and start the scheduler

val (sched, ts) = SparkContext.createTaskScheduler(this, master)

_schedulerBackend = sched

_taskScheduler = ts

_dagScheduler = new DAGScheduler(this)

_heartbeatReceiver.send(TaskSchedulerIsSet)

/**

* Create a task scheduler based on a given master URL.

* Return a 2-tuple of the scheduler backend and the task scheduler.

*/

private def createTaskScheduler(

sc: SparkContext,

master: String): (SchedulerBackend, TaskScheduler) = {


master match {

case "local" =>


实例化一个

val scheduler = new TaskSchedulerImpl(sc)

构建masterUrls:

val masterUrls = localCluster.start()

据说是非常关键的backend:

val backend = new SparkDeploySchedulerBackend(scheduler, sc, masterUrls)

scheduler.initialize(backend)

backend.shutdownCallback = (backend: SparkDeploySchedulerBackend) => {

localCluster.stop()

}

(backend, scheduler)



0