framework

package
v0.20.0 Latest Latest
Warning

This package is not in the latest version of its module.

Go to latest
Published: Mar 20, 2025 License: Apache-2.0 Imports: 29 Imported by: 0

Documentation

Index

Constants

This section is empty.

Variables

This section is empty.

Functions

func CloseSession

func CloseSession(ssn *Session)

func RegisterAction

func RegisterAction(act Action)

func RegisterPluginBuilder

func RegisterPluginBuilder(name string, pc func(map[string]string) Plugin)

Types

type Action

type Action interface {
	// The unique name of Action.
	Name() ActionType

	// Execute allocates the cluster's resources into each queue.
	Execute(ssn *Session)
}

Action is the interface of scheduler action.

func GetAction

func GetAction(name string) (Action, bool)

type ActionType

type ActionType string
const (
	Reclaim           ActionType = "reclaim"
	Preempt           ActionType = "preempt"
	Allocate          ActionType = "allocate"
	Consolidation     ActionType = "consolidation"
	StaleGangEviction ActionType = "stalegangeviction"
)

type Checkpoint

type Checkpoint int

type Event

type Event struct {
	Task *pod_info.PodInfo
}

type EventHandler

type EventHandler struct {
	AllocateFunc   func(event *Event)
	DeallocateFunc func(event *Event)
}

type Operation

type Operation interface {
	Name() string
	TaskInfo() *pod_info.PodInfo
	Reverse() error
}

type Plugin

type Plugin interface {
	// The unique name of Plugin.
	Name() string

	OnSessionOpen(ssn *Session)
	OnSessionClose(ssn *Session)
}

type PluginBuilder

type PluginBuilder func(map[string]string) Plugin

func GetPluginBuilder

func GetPluginBuilder(name string) (PluginBuilder, bool)

type ReverseOperation

type ReverseOperation func() error

type Session

type Session struct {
	UID   types.UID
	Cache cache.Cache

	PodGroupInfos map[common_info.PodGroupID]*podgroup_info.PodGroupInfo
	Nodes         map[string]*node_info.NodeInfo
	Queues        map[common_info.QueueID]*queue_info.QueueInfo
	ConfigMaps    map[common_info.ConfigMapID]*configmap_info.ConfigMapInfo

	GpuOrderFns                           []api.GpuOrderFn
	NodePreOrderFns                       []api.NodePreOrderFn
	NodeOrderFns                          []api.NodeOrderFn
	TaskOrderFns                          []common_info.CompareFn
	JobOrderFns                           []common_info.CompareFn
	QueueOrderFns                         []common_info.CompareTwoFactors
	CanReclaimResourcesFns                []api.CanReclaimResourcesFn
	ReclaimableFns                        []api.EvictableFn
	OnJobSolutionStartFns                 []api.OnJobSolutionStartFn
	GetQueueAllocatedResourcesFns         []api.QueueResource
	GetQueueDeservedResourcesFns          []api.QueueResource
	GetQueueFairShareFns                  []api.QueueResource
	IsNonPreemptibleJobOverQueueQuotaFns  []api.IsJobOverCapacityFn
	IsJobOverCapacityFns                  []api.IsJobOverCapacityFn
	IsTaskAllocationOnNodeOverCapacityFns []api.IsTaskAllocationOverCapacityFn
	PrePredicateFns                       []api.PrePredicateFn
	PredicateFns                          []api.PredicateFn

	Config *conf.SchedulerConfiguration
	// contains filtered or unexported fields
}

func OpenSession

func OpenSession(cache cache.Cache, config *conf.SchedulerConfiguration,
	schedulerParams *conf.SchedulerParams, sessionId types.UID) (*Session, error)

func (*Session) AddCanReclaimResourcesFn

func (ssn *Session) AddCanReclaimResourcesFn(crf api.CanReclaimResourcesFn)

func (*Session) AddEventHandler

func (ssn *Session) AddEventHandler(eh *EventHandler)

func (*Session) AddGPUOrderFn

func (ssn *Session) AddGPUOrderFn(gof api.GpuOrderFn)

func (*Session) AddGetQueueAllocatedResourcesFn

func (ssn *Session) AddGetQueueAllocatedResourcesFn(of api.QueueResource)

func (*Session) AddGetQueueDeservedResourcesFn

func (ssn *Session) AddGetQueueDeservedResourcesFn(of api.QueueResource)

func (*Session) AddGetQueueFairShareFn

func (ssn *Session) AddGetQueueFairShareFn(of api.QueueResource)

func (*Session) AddIsJobOverCapacityFn

func (ssn *Session) AddIsJobOverCapacityFn(of api.IsJobOverCapacityFn)

func (*Session) AddIsNonPreemptibleJobOverQueueQuotaFns

func (ssn *Session) AddIsNonPreemptibleJobOverQueueQuotaFns(of api.IsJobOverCapacityFn)

func (*Session) AddIsTaskAllocationOnNodeOverCapacityFn

func (ssn *Session) AddIsTaskAllocationOnNodeOverCapacityFn(of api.IsTaskAllocationOverCapacityFn)

func (*Session) AddJobOrderFn

func (ssn *Session) AddJobOrderFn(jof common_info.CompareFn)

func (*Session) AddNodeOrderFn

func (ssn *Session) AddNodeOrderFn(nof api.NodeOrderFn)

func (*Session) AddNodePreOrderFn

func (ssn *Session) AddNodePreOrderFn(npof api.NodePreOrderFn)

func (*Session) AddOnJobSolutionStartFn

func (ssn *Session) AddOnJobSolutionStartFn(jssf api.OnJobSolutionStartFn)

func (*Session) AddPrePredicateFn

func (ssn *Session) AddPrePredicateFn(pf api.PrePredicateFn)

func (*Session) AddPredicateFn

func (ssn *Session) AddPredicateFn(pf api.PredicateFn)

func (*Session) AddQueueOrderFn

func (ssn *Session) AddQueueOrderFn(qof common_info.CompareTwoFactors)

func (*Session) AddReclaimableFn

func (ssn *Session) AddReclaimableFn(rf api.EvictableFn)

func (*Session) AddTaskOrderFn

func (ssn *Session) AddTaskOrderFn(tof common_info.CompareFn)

func (*Session) AllowConsolidatingReclaim

func (ssn *Session) AllowConsolidatingReclaim() bool

func (*Session) BindPod

func (ssn *Session) BindPod(pod *pod_info.PodInfo) error

func (*Session) CanReclaimResources

func (ssn *Session) CanReclaimResources(reclaimer *reclaimer_info.ReclaimerInfo) bool

func (*Session) CountLeafQueues

func (ssn *Session) CountLeafQueues() int

func (*Session) Evict

func (ssn *Session) Evict(pod *pod_info.PodInfo, message string, evictionMetadata eviction_info.EvictionMetadata) error

func (*Session) FittingGPUs

func (ssn *Session) FittingGPUs(node *node_info.NodeInfo, pod *pod_info.PodInfo) []string

FittingGPUs returns a list of GPUs that fit the pod, sorted by fit score (descending) Returned list will consist of: 1. Shared GPUs 2. api.WholeGpuIndicator (to indicate fit order of whole GPUs compared to shared ones) (For example: [api.WholeGpuIndicator, 0, 1] means that a whole (non-shared) GPU fits the best, then GPU 0, then GPU 1)

func (*Session) FittingNode

func (ssn *Session) FittingNode(task *pod_info.PodInfo, node *node_info.NodeInfo, writeFittingDelta bool) bool

func (*Session) GetGlobalDefaultStalenessGracePeriod

func (s *Session) GetGlobalDefaultStalenessGracePeriod() time.Duration

func (*Session) GetJobsDepth

func (ssn *Session) GetJobsDepth(action ActionType) int

func (*Session) GetK8sStateForPod

func (ssn *Session) GetK8sStateForPod(uid types.UID) k8s_internal.SessionState

func (*Session) GetMaxNumberConsolidationPreemptees

func (ssn *Session) GetMaxNumberConsolidationPreemptees() int

func (*Session) GetSchedulerName

func (ssn *Session) GetSchedulerName() string

func (*Session) GpuOrderFn

func (ssn *Session) GpuOrderFn(task *pod_info.PodInfo, node *node_info.NodeInfo, gpuIdx string) (float64, error)

func (*Session) InternalK8sPlugins

func (ssn *Session) InternalK8sPlugins() *k8splugins.K8sPlugins

func (*Session) IsInferencePreemptible

func (ssn *Session) IsInferencePreemptible() bool

func (*Session) IsJobOverQueueCapacityFn

func (ssn *Session) IsJobOverQueueCapacityFn(job *podgroup_info.PodGroupInfo,
	tasksToAllocate []*pod_info.PodInfo) *api.SchedulableResult

func (*Session) IsNonPreemptibleJobOverQueueQuotaFn

func (ssn *Session) IsNonPreemptibleJobOverQueueQuotaFn(job *podgroup_info.PodGroupInfo,
	tasksToAllocate []*pod_info.PodInfo) *api.SchedulableResult

func (*Session) IsRestrictNodeSchedulingEnabled

func (ssn *Session) IsRestrictNodeSchedulingEnabled() bool

func (*Session) IsTaskAllocationOnNodeOverCapacityFn

func (ssn *Session) IsTaskAllocationOnNodeOverCapacityFn(task *pod_info.PodInfo, job *podgroup_info.PodGroupInfo,
	node *node_info.NodeInfo) *api.SchedulableResult

func (*Session) JobOrderFn

func (ssn *Session) JobOrderFn(l, r interface{}) bool

func (*Session) NodeOrderFn

func (ssn *Session) NodeOrderFn(task *pod_info.PodInfo, node *node_info.NodeInfo) (float64, error)

func (*Session) NodePoolName

func (ssn *Session) NodePoolName() string

func (*Session) NodePreOrderFn

func (ssn *Session) NodePreOrderFn(task *pod_info.PodInfo, fittingNodes []*node_info.NodeInfo)

func (*Session) OnJobSolutionStart

func (ssn *Session) OnJobSolutionStart()

func (*Session) OrderedNodesByTask

func (ssn *Session) OrderedNodesByTask(nodes []*node_info.NodeInfo, task *pod_info.PodInfo) []*node_info.NodeInfo

func (*Session) OverrideAllowConsolidatingReclaim

func (ssn *Session) OverrideAllowConsolidatingReclaim(allowConsolidatingReclaim bool)

OverrideAllowConsolidatingReclaim overrides the value returned by allowConsolidatingReclaim. Use for testing purposes.

func (*Session) OverrideGlobalDefaultStalenessGracePeriod

func (s *Session) OverrideGlobalDefaultStalenessGracePeriod(t time.Duration)

OverrideGlobalDefaultStalenessGracePeriod overrides the value returned by GetGlobalDefaultStalenessGracePeriod. Use for testing purposes.

func (*Session) OverrideInferencePreemptible

func (ssn *Session) OverrideInferencePreemptible(isInferencePreemptible bool)

OverrideInferencePreemptible overrides the value returned by IsInferencePreemptible. Use for testing purposes.

func (*Session) OverrideMaxNumberConsolidationPreemptees

func (ssn *Session) OverrideMaxNumberConsolidationPreemptees(maxPreemptees int)

func (*Session) OverrideSchedulerName

func (ssn *Session) OverrideSchedulerName(name string)

func (*Session) PrePredicateFn

func (ssn *Session) PrePredicateFn(task *pod_info.PodInfo, job *podgroup_info.PodGroupInfo) error

func (*Session) PredicateFn

func (ssn *Session) PredicateFn(task *pod_info.PodInfo, job *podgroup_info.PodGroupInfo, node *node_info.NodeInfo) error

func (*Session) QueueAllocatedResources

func (ssn *Session) QueueAllocatedResources(queue *queue_info.QueueInfo) *resource_info.ResourceRequirements

func (*Session) QueueDeservedResources

func (ssn *Session) QueueDeservedResources(queue *queue_info.QueueInfo) *resource_info.ResourceRequirements

func (*Session) QueueFairShare

func (ssn *Session) QueueFairShare(queue *queue_info.QueueInfo) *resource_info.ResourceRequirements

func (*Session) QueueOrderFn

func (ssn *Session) QueueOrderFn(lQ, rQ, lT, rT interface{}) bool

func (*Session) Reclaimable

func (ssn *Session) Reclaimable(
	reclaimer *reclaimer_info.ReclaimerInfo,
	reclaimeeResourcesByQueue map[common_info.QueueID][]*resource_info.Resource,
) bool

func (*Session) ScheduleCSIStorage

func (ssn *Session) ScheduleCSIStorage() bool

func (*Session) Statement

func (ssn *Session) Statement() *Statement

func (*Session) String

func (ssn *Session) String() string

func (*Session) TaskOrderFn

func (ssn *Session) TaskOrderFn(l, r interface{}) bool

func (*Session) UseSchedulingSignatures

func (ssn *Session) UseSchedulingSignatures() bool

type Statement

type Statement struct {
	// contains filtered or unexported fields
}

func (*Statement) Allocate

func (s *Statement) Allocate(task *pod_info.PodInfo, hostname string) error

func (*Statement) Checkpoint

func (s *Statement) Checkpoint() Checkpoint

func (*Statement) Commit

func (s *Statement) Commit() error

func (*Statement) ConvertAllAllocatedToPipelined

func (s *Statement) ConvertAllAllocatedToPipelined(jobID common_info.PodGroupID) error

func (*Statement) Discard

func (s *Statement) Discard()

func (*Statement) Evict

func (s *Statement) Evict(reclaimeeTask *pod_info.PodInfo, message string,
	evictionMetadata eviction_info.EvictionMetadata) error

func (*Statement) Pipeline

func (s *Statement) Pipeline(task *pod_info.PodInfo, hostname string, updateTaskIfExistsOnNode bool) error

func (*Statement) Rollback

func (s *Statement) Rollback(cp Checkpoint) error

func (*Statement) Unevict

func (s *Statement) Unevict(taskToUnevict *pod_info.PodInfo) error

Jump to

Keyboard shortcuts

? : This menu
/ : Search site
f or F : Jump to
y or Y : Canonical URL