Documentation
¶
Index ¶
- func CloseSession(ssn *Session)
- func RegisterAction(act Action)
- func RegisterPluginBuilder(name string, pc func(map[string]string) Plugin)
- type Action
- type ActionType
- type Checkpoint
- type Event
- type EventHandler
- type Operation
- type Plugin
- type PluginBuilder
- type ReverseOperation
- type Session
- func (ssn *Session) AddCanReclaimResourcesFn(crf api.CanReclaimResourcesFn)
- func (ssn *Session) AddEventHandler(eh *EventHandler)
- func (ssn *Session) AddGPUOrderFn(gof api.GpuOrderFn)
- func (ssn *Session) AddGetQueueAllocatedResourcesFn(of api.QueueResource)
- func (ssn *Session) AddGetQueueDeservedResourcesFn(of api.QueueResource)
- func (ssn *Session) AddGetQueueFairShareFn(of api.QueueResource)
- func (ssn *Session) AddIsJobOverCapacityFn(of api.IsJobOverCapacityFn)
- func (ssn *Session) AddIsNonPreemptibleJobOverQueueQuotaFns(of api.IsJobOverCapacityFn)
- func (ssn *Session) AddIsTaskAllocationOnNodeOverCapacityFn(of api.IsTaskAllocationOverCapacityFn)
- func (ssn *Session) AddJobOrderFn(jof common_info.CompareFn)
- func (ssn *Session) AddNodeOrderFn(nof api.NodeOrderFn)
- func (ssn *Session) AddNodePreOrderFn(npof api.NodePreOrderFn)
- func (ssn *Session) AddOnJobSolutionStartFn(jssf api.OnJobSolutionStartFn)
- func (ssn *Session) AddPrePredicateFn(pf api.PrePredicateFn)
- func (ssn *Session) AddPredicateFn(pf api.PredicateFn)
- func (ssn *Session) AddQueueOrderFn(qof common_info.CompareTwoFactors)
- func (ssn *Session) AddReclaimableFn(rf api.EvictableFn)
- func (ssn *Session) AddTaskOrderFn(tof common_info.CompareFn)
- func (ssn *Session) AllowConsolidatingReclaim() bool
- func (ssn *Session) BindPod(pod *pod_info.PodInfo) error
- func (ssn *Session) CanReclaimResources(reclaimer *reclaimer_info.ReclaimerInfo) bool
- func (ssn *Session) CountLeafQueues() int
- func (ssn *Session) Evict(pod *pod_info.PodInfo, message string, ...) error
- func (ssn *Session) FittingGPUs(node *node_info.NodeInfo, pod *pod_info.PodInfo) []string
- func (ssn *Session) FittingNode(task *pod_info.PodInfo, node *node_info.NodeInfo, writeFittingDelta bool) bool
- func (s *Session) GetGlobalDefaultStalenessGracePeriod() time.Duration
- func (ssn *Session) GetJobsDepth(action ActionType) int
- func (ssn *Session) GetK8sStateForPod(uid types.UID) k8s_internal.SessionState
- func (ssn *Session) GetMaxNumberConsolidationPreemptees() int
- func (ssn *Session) GetSchedulerName() string
- func (ssn *Session) GpuOrderFn(task *pod_info.PodInfo, node *node_info.NodeInfo, gpuIdx string) (float64, error)
- func (ssn *Session) InternalK8sPlugins() *k8splugins.K8sPlugins
- func (ssn *Session) IsInferencePreemptible() bool
- func (ssn *Session) IsJobOverQueueCapacityFn(job *podgroup_info.PodGroupInfo, tasksToAllocate []*pod_info.PodInfo) *api.SchedulableResult
- func (ssn *Session) IsNonPreemptibleJobOverQueueQuotaFn(job *podgroup_info.PodGroupInfo, tasksToAllocate []*pod_info.PodInfo) *api.SchedulableResult
- func (ssn *Session) IsRestrictNodeSchedulingEnabled() bool
- func (ssn *Session) IsTaskAllocationOnNodeOverCapacityFn(task *pod_info.PodInfo, job *podgroup_info.PodGroupInfo, ...) *api.SchedulableResult
- func (ssn *Session) JobOrderFn(l, r interface{}) bool
- func (ssn *Session) NodeOrderFn(task *pod_info.PodInfo, node *node_info.NodeInfo) (float64, error)
- func (ssn *Session) NodePoolName() string
- func (ssn *Session) NodePreOrderFn(task *pod_info.PodInfo, fittingNodes []*node_info.NodeInfo)
- func (ssn *Session) OnJobSolutionStart()
- func (ssn *Session) OrderedNodesByTask(nodes []*node_info.NodeInfo, task *pod_info.PodInfo) []*node_info.NodeInfo
- func (ssn *Session) OverrideAllowConsolidatingReclaim(allowConsolidatingReclaim bool)
- func (s *Session) OverrideGlobalDefaultStalenessGracePeriod(t time.Duration)
- func (ssn *Session) OverrideInferencePreemptible(isInferencePreemptible bool)
- func (ssn *Session) OverrideMaxNumberConsolidationPreemptees(maxPreemptees int)
- func (ssn *Session) OverrideSchedulerName(name string)
- func (ssn *Session) PrePredicateFn(task *pod_info.PodInfo, job *podgroup_info.PodGroupInfo) error
- func (ssn *Session) PredicateFn(task *pod_info.PodInfo, job *podgroup_info.PodGroupInfo, ...) error
- func (ssn *Session) QueueAllocatedResources(queue *queue_info.QueueInfo) *resource_info.ResourceRequirements
- func (ssn *Session) QueueDeservedResources(queue *queue_info.QueueInfo) *resource_info.ResourceRequirements
- func (ssn *Session) QueueFairShare(queue *queue_info.QueueInfo) *resource_info.ResourceRequirements
- func (ssn *Session) QueueOrderFn(lQ, rQ, lT, rT interface{}) bool
- func (ssn *Session) Reclaimable(reclaimer *reclaimer_info.ReclaimerInfo, ...) bool
- func (ssn *Session) ScheduleCSIStorage() bool
- func (ssn *Session) Statement() *Statement
- func (ssn *Session) String() string
- func (ssn *Session) TaskOrderFn(l, r interface{}) bool
- func (ssn *Session) UseSchedulingSignatures() bool
- type Statement
- func (s *Statement) Allocate(task *pod_info.PodInfo, hostname string) error
- func (s *Statement) Checkpoint() Checkpoint
- func (s *Statement) Commit() error
- func (s *Statement) ConvertAllAllocatedToPipelined(jobID common_info.PodGroupID) error
- func (s *Statement) Discard()
- func (s *Statement) Evict(reclaimeeTask *pod_info.PodInfo, message string, ...) error
- func (s *Statement) Pipeline(task *pod_info.PodInfo, hostname string, updateTaskIfExistsOnNode bool) error
- func (s *Statement) Rollback(cp Checkpoint) error
- func (s *Statement) Unevict(taskToUnevict *pod_info.PodInfo) error
Constants ¶
This section is empty.
Variables ¶
This section is empty.
Functions ¶
func CloseSession ¶
func CloseSession(ssn *Session)
func RegisterAction ¶
func RegisterAction(act Action)
Types ¶
type Action ¶
type Action interface { // The unique name of Action. Name() ActionType // Execute allocates the cluster's resources into each queue. Execute(ssn *Session) }
Action is the interface of scheduler action.
type ActionType ¶
type ActionType string
const ( Reclaim ActionType = "reclaim" Preempt ActionType = "preempt" Allocate ActionType = "allocate" Consolidation ActionType = "consolidation" StaleGangEviction ActionType = "stalegangeviction" )
type Checkpoint ¶
type Checkpoint int
type EventHandler ¶
type PluginBuilder ¶
func GetPluginBuilder ¶
func GetPluginBuilder(name string) (PluginBuilder, bool)
type ReverseOperation ¶
type ReverseOperation func() error
type Session ¶
type Session struct { UID types.UID Cache cache.Cache PodGroupInfos map[common_info.PodGroupID]*podgroup_info.PodGroupInfo Nodes map[string]*node_info.NodeInfo Queues map[common_info.QueueID]*queue_info.QueueInfo ConfigMaps map[common_info.ConfigMapID]*configmap_info.ConfigMapInfo GpuOrderFns []api.GpuOrderFn NodePreOrderFns []api.NodePreOrderFn NodeOrderFns []api.NodeOrderFn TaskOrderFns []common_info.CompareFn JobOrderFns []common_info.CompareFn QueueOrderFns []common_info.CompareTwoFactors CanReclaimResourcesFns []api.CanReclaimResourcesFn ReclaimableFns []api.EvictableFn OnJobSolutionStartFns []api.OnJobSolutionStartFn GetQueueAllocatedResourcesFns []api.QueueResource GetQueueDeservedResourcesFns []api.QueueResource IsNonPreemptibleJobOverQueueQuotaFns []api.IsJobOverCapacityFn IsJobOverCapacityFns []api.IsJobOverCapacityFn IsTaskAllocationOnNodeOverCapacityFns []api.IsTaskAllocationOverCapacityFn PrePredicateFns []api.PrePredicateFn PredicateFns []api.PredicateFn Config *conf.SchedulerConfiguration // contains filtered or unexported fields }
func OpenSession ¶
func OpenSession(cache cache.Cache, config *conf.SchedulerConfiguration, schedulerParams *conf.SchedulerParams, sessionId types.UID) (*Session, error)
func (*Session) AddCanReclaimResourcesFn ¶
func (ssn *Session) AddCanReclaimResourcesFn(crf api.CanReclaimResourcesFn)
func (*Session) AddEventHandler ¶
func (ssn *Session) AddEventHandler(eh *EventHandler)
func (*Session) AddGPUOrderFn ¶
func (ssn *Session) AddGPUOrderFn(gof api.GpuOrderFn)
func (*Session) AddGetQueueAllocatedResourcesFn ¶
func (ssn *Session) AddGetQueueAllocatedResourcesFn(of api.QueueResource)
func (*Session) AddGetQueueDeservedResourcesFn ¶
func (ssn *Session) AddGetQueueDeservedResourcesFn(of api.QueueResource)
func (*Session) AddGetQueueFairShareFn ¶
func (ssn *Session) AddGetQueueFairShareFn(of api.QueueResource)
func (*Session) AddIsJobOverCapacityFn ¶
func (ssn *Session) AddIsJobOverCapacityFn(of api.IsJobOverCapacityFn)
func (*Session) AddIsNonPreemptibleJobOverQueueQuotaFns ¶
func (ssn *Session) AddIsNonPreemptibleJobOverQueueQuotaFns(of api.IsJobOverCapacityFn)
func (*Session) AddIsTaskAllocationOnNodeOverCapacityFn ¶
func (ssn *Session) AddIsTaskAllocationOnNodeOverCapacityFn(of api.IsTaskAllocationOverCapacityFn)
func (*Session) AddJobOrderFn ¶
func (ssn *Session) AddJobOrderFn(jof common_info.CompareFn)
func (*Session) AddNodeOrderFn ¶
func (ssn *Session) AddNodeOrderFn(nof api.NodeOrderFn)
func (*Session) AddNodePreOrderFn ¶
func (ssn *Session) AddNodePreOrderFn(npof api.NodePreOrderFn)
func (*Session) AddOnJobSolutionStartFn ¶
func (ssn *Session) AddOnJobSolutionStartFn(jssf api.OnJobSolutionStartFn)
func (*Session) AddPrePredicateFn ¶
func (ssn *Session) AddPrePredicateFn(pf api.PrePredicateFn)
func (*Session) AddPredicateFn ¶
func (ssn *Session) AddPredicateFn(pf api.PredicateFn)
func (*Session) AddQueueOrderFn ¶
func (ssn *Session) AddQueueOrderFn(qof common_info.CompareTwoFactors)
func (*Session) AddReclaimableFn ¶
func (ssn *Session) AddReclaimableFn(rf api.EvictableFn)
func (*Session) AddTaskOrderFn ¶
func (ssn *Session) AddTaskOrderFn(tof common_info.CompareFn)
func (*Session) AllowConsolidatingReclaim ¶
func (*Session) CanReclaimResources ¶
func (ssn *Session) CanReclaimResources(reclaimer *reclaimer_info.ReclaimerInfo) bool
func (*Session) CountLeafQueues ¶
func (*Session) Evict ¶
func (ssn *Session) Evict(pod *pod_info.PodInfo, message string, evictionMetadata eviction_info.EvictionMetadata) error
func (*Session) FittingGPUs ¶
FittingGPUs returns a list of GPUs that fit the pod, sorted by fit score (descending) Returned list will consist of: 1. Shared GPUs 2. api.WholeGpuIndicator (to indicate fit order of whole GPUs compared to shared ones) (For example: [api.WholeGpuIndicator, 0, 1] means that a whole (non-shared) GPU fits the best, then GPU 0, then GPU 1)
func (*Session) FittingNode ¶
func (*Session) GetGlobalDefaultStalenessGracePeriod ¶
func (*Session) GetJobsDepth ¶
func (ssn *Session) GetJobsDepth(action ActionType) int
func (*Session) GetK8sStateForPod ¶
func (ssn *Session) GetK8sStateForPod(uid types.UID) k8s_internal.SessionState
func (*Session) GetMaxNumberConsolidationPreemptees ¶
func (*Session) GetSchedulerName ¶
func (*Session) GpuOrderFn ¶
func (*Session) InternalK8sPlugins ¶
func (ssn *Session) InternalK8sPlugins() *k8splugins.K8sPlugins
func (*Session) IsInferencePreemptible ¶
func (*Session) IsJobOverQueueCapacityFn ¶
func (ssn *Session) IsJobOverQueueCapacityFn(job *podgroup_info.PodGroupInfo, tasksToAllocate []*pod_info.PodInfo) *api.SchedulableResult
func (*Session) IsNonPreemptibleJobOverQueueQuotaFn ¶
func (ssn *Session) IsNonPreemptibleJobOverQueueQuotaFn(job *podgroup_info.PodGroupInfo, tasksToAllocate []*pod_info.PodInfo) *api.SchedulableResult
func (*Session) IsRestrictNodeSchedulingEnabled ¶
func (*Session) IsTaskAllocationOnNodeOverCapacityFn ¶
func (ssn *Session) IsTaskAllocationOnNodeOverCapacityFn(task *pod_info.PodInfo, job *podgroup_info.PodGroupInfo, node *node_info.NodeInfo) *api.SchedulableResult
func (*Session) JobOrderFn ¶
func (*Session) NodeOrderFn ¶
func (*Session) NodePoolName ¶
func (*Session) NodePreOrderFn ¶
func (*Session) OnJobSolutionStart ¶
func (ssn *Session) OnJobSolutionStart()
func (*Session) OrderedNodesByTask ¶
func (*Session) OverrideAllowConsolidatingReclaim ¶
OverrideAllowConsolidatingReclaim overrides the value returned by allowConsolidatingReclaim. Use for testing purposes.
func (*Session) OverrideGlobalDefaultStalenessGracePeriod ¶
OverrideGlobalDefaultStalenessGracePeriod overrides the value returned by GetGlobalDefaultStalenessGracePeriod. Use for testing purposes.
func (*Session) OverrideInferencePreemptible ¶
OverrideInferencePreemptible overrides the value returned by IsInferencePreemptible. Use for testing purposes.
func (*Session) OverrideMaxNumberConsolidationPreemptees ¶
func (*Session) OverrideSchedulerName ¶
func (*Session) PrePredicateFn ¶
func (ssn *Session) PrePredicateFn(task *pod_info.PodInfo, job *podgroup_info.PodGroupInfo) error
func (*Session) PredicateFn ¶
func (ssn *Session) PredicateFn(task *pod_info.PodInfo, job *podgroup_info.PodGroupInfo, node *node_info.NodeInfo) error
func (*Session) QueueAllocatedResources ¶
func (ssn *Session) QueueAllocatedResources(queue *queue_info.QueueInfo) *resource_info.ResourceRequirements
func (*Session) QueueDeservedResources ¶
func (ssn *Session) QueueDeservedResources(queue *queue_info.QueueInfo) *resource_info.ResourceRequirements
func (*Session) QueueFairShare ¶
func (ssn *Session) QueueFairShare(queue *queue_info.QueueInfo) *resource_info.ResourceRequirements
func (*Session) QueueOrderFn ¶
func (*Session) Reclaimable ¶
func (ssn *Session) Reclaimable( reclaimer *reclaimer_info.ReclaimerInfo, reclaimeeResourcesByQueue map[common_info.QueueID][]*resource_info.Resource, ) bool
func (*Session) ScheduleCSIStorage ¶
func (*Session) TaskOrderFn ¶
func (*Session) UseSchedulingSignatures ¶
type Statement ¶
type Statement struct {
// contains filtered or unexported fields
}
func (*Statement) Checkpoint ¶
func (s *Statement) Checkpoint() Checkpoint
func (*Statement) ConvertAllAllocatedToPipelined ¶
func (s *Statement) ConvertAllAllocatedToPipelined(jobID common_info.PodGroupID) error
func (*Statement) Evict ¶
func (s *Statement) Evict(reclaimeeTask *pod_info.PodInfo, message string, evictionMetadata eviction_info.EvictionMetadata) error
func (*Statement) Rollback ¶
func (s *Statement) Rollback(cp Checkpoint) error