--- etc/slurm.conf.example.orig 2023-11-21 22:33:29 UTC +++ etc/slurm.conf.example @@ -8,8 +8,9 @@ # Put this file on all nodes of your cluster. # See the slurm.conf man page for more information. # -ClusterName=cluster -SlurmctldHost=linux0 +ClusterName=Beastie +# Short hostname of the head node +SlurmctldHost=head #SlurmctldHost= # #DisableRootJobs=NO @@ -41,7 +42,7 @@ ProctrackType=proctrack/cgroup #PrologFlags= #PrologSlurmctld= #PropagatePrioProcess=0 -#PropagateResourceLimits= +PropagateResourceLimits=NONE #PropagateResourceLimitsExcept= #RebootProgram= ReturnToService=1 @@ -58,6 +59,8 @@ TaskPlugin=task/affinity SwitchType=switch/none #TaskEpilog= TaskPlugin=task/affinity +TaskPluginParam=cores +# For debugging: TaskPluginParam=cores,verbose #TaskProlog= #TopologyPlugin=topology/tree #TmpFS=/tmp @@ -88,11 +91,12 @@ Waittime=0 # # # SCHEDULING -#DefMemPerCPU=0 +DefMemPerCPU=256 #MaxMemPerCPU=0 #SchedulerTimeSlice=30 SchedulerType=sched/backfill SelectType=select/cons_tres +SelectTypeParameters=CR_Core_Memory # # # JOB PRIORITY @@ -115,9 +119,11 @@ SelectType=select/cons_tres #AccountingStorageHost= #AccountingStoragePass= #AccountingStoragePort= -AccountingStorageType=accounting_storage/none +#AccountingStorageType=accounting_storage/slurmdb +#AccountingStorageLoc=/home/slurm/Accounting +#AccountingStoreJobComment=YES #AccountingStorageUser= -#AccountingStoreFlags= +AccountingStoreFlags=job_comment #JobCompHost= #JobCompLoc= #JobCompPass= @@ -128,9 +134,9 @@ SlurmctldDebug=info JobAcctGatherFrequency=30 JobAcctGatherType=jobacct_gather/none SlurmctldDebug=info -SlurmctldLogFile=/var/log/slurmctld.log +SlurmctldLogFile=/var/log/slurm/slurmctld SlurmdDebug=info -SlurmdLogFile=/var/log/slurmd.log +SlurmdLogFile=/var/log/slurm/slurmd #SlurmSchedLogFile= #SlurmSchedLogLevel= #DebugFlags= @@ -148,6 +154,41 @@ SlurmdLogFile=/var/log/slurmd.log #SuspendTime= # # + +############################################################################ +# Enable power saving if remote IPMI power-on is available on compute nodes. +# If unavailable on some nodes, list them in SuspendExcNodes. +# SlurmUser must be a member of operator and wheel and have a valid +# login shell in order to execute shutdown on compute nodes. +# If you prefer to control power manually, see the following scripts +# from the SPCM port: + # +# auto-ipmi-remote-power +# cluster-power-saver +# cluster-power-waster +# cluster-ipmi-power-on +############################################################################ + +# SuspendProgram=/usr/local/etc/spcm/slurm-node-suspend +# SuspendTime should be >= SuspendTimeout + ResumeTimeout. +# SuspendTime=600 +# SuspendTimeout=60 +# +# ResumeProgram=/usr/local/etc/spcm/slurm-node-resume +# ResumeTimeout=300 +# BatchStartTimeout=300 +# +# Exempt compute nodes that double as file servers or don't have IPMI +# remote power-on enabled. +# +# SuspendExcNodes=compute-001 + +# # COMPUTE NODES -NodeName=linux[1-32] CPUs=1 State=UNKNOWN -PartitionName=debug Nodes=ALL Default=YES MaxTime=INFINITE State=UP +# Set RealMemory < avail memory in /var/run/dmesg.boot +# Note that it may change slightly following freebsd-update +NodeName=compute-[001-002] Sockets=2 CoresPerSocket=6 RealMemory=30000 State=UNKNOWN +# NodeName=compute-256g-[001-002] Sockets=2 CoresPerSocket=6 RealMemory=250000 State=UNKNOWN +# PartitionName=debug Nodes=ALL Default=NO MaxTime=INFINITE State=UP +PartitionName=batch Nodes=compute-[001-002] Default=YES MaxTime=INFINITE State=UP +# PartitionName=256g Nodes=compute-256g-[001-002] Default=NO MaxTime=INFINITE State=UP