LSF 1-node 構成で LSF クラスタを構築 で作った lsf1 に、lsf2 を追加する
# useradd -u 600 -g users lsfadmin # id -a lsfadmin uid=600(lsfadmin) gid=100(users) groups=100(users)
uid/gid はクラスタ全体で合わせる必要あり
# yum install avahi-tools # systemctl status avahi-daemon # firewall-cmd --add-service=mdns --zone=public --permanent # systemctl restart firewalld # avahi-resolve -n lsf1.local lsf1.local fe80::a17f:213d:11b8:a1bd # avahi-resolve -n lsf2.local lsf2.local 192.168.122.1
# yum install nfs-utils
# mount -v -t nfs lsf1.local:/usr/share/lsf /mnt
lsf1.local:/usr/share/lsf /usr/share/lsf nfs defaults 0 0
# yum install java-1.8.0-openjdk ed openssh-server openssh-clients
[root@lsf1 ~]# scp -r .ssh/ root@lsf2.local:/root/ [root@lsf1 ~]# ssh-keyscan lsf1.local lsf2.local > ~/.ssh/known_hosts [root@lsf1 ~]# ssh lsf1.local [root@lsf1 ~]# exit [root@lsf1 ~]# ssh lsf2.local [root@lsf2 ~]# exit
[root@lsf2 ~]# ssh-keyscan lsf1.local lsf2.local > ~/.ssh/known_hosts [root@lsf2 ~]# ssh lsf1.local [root@lsf1 ~]# exit [root@lsf2 ~]# ssh lsf2.local [root@lsf2 ~]# exit
Begin ClusterAdmins
Administrators = lsfadmin
End ClusterAdmins
Begin Host
HOSTNAME model type server RESOURCES #Keywords
#apple Sparc5S SUNSOL 1 (sparc bsd) #Example
#peach DEC3100 DigitalUNIX 1 (alpha osf1)
#banana HP9K778 HPPA 1 (hp68k hpux)
#mango HP735 HPPA 1 (hpux cs)
#grape SGI4D35 SGI5 1 (irix)
#lemon PC200 LINUX 1 (linux)
#pear IBM350 IBMAIX4 1 (aix cs)
#plum PENT_100 NTX86 1 (nt)
#berry DEC3100 ! 1 (ultrix fs bsd mips dec)
#orange ! SUNSOL 1 (sparc bsd) #Example
#prune ! ! 1 (convex)
lsf1.local ! ! 1 (mg)
lsf2.local ! ! 1 (mg)
End Host
Begin Parameters
# LSF_HOST_ADDR_RANGE=*.*.*.*
# FLOAT_CLIENTS_ADDR_RANGE=*.*.*.*
# FLOAT_CLIENTS=10
End Parameters
LSF_MASTER_LIST="lsf1.local"
LSF_SERVER_HOSTS="lsf2.local lsf1.local"
LSF_EGO_DAEMON_CONTROL=N
LSF_RSH="ssh"
$ cat /usr/share/lsf/conf/lsf.conf | grep PORT LSF_LIM_PORT=7869 LSF_RES_PORT=6878 LSB_MBD_PORT=6881 LSB_SBD_PORT=6882
を開放する
# firewall-cmd --add-port=7869/tcp --zone=public --permanent # firewall-cmd --add-port=7869/udp --zone=public --permanent # firewall-cmd --add-port=6878/tcp --zone=public --permanent # firewall-cmd --add-port=6881/tcp --zone=public --permanent # firewall-cmd --add-port=6882/tcp --zone=public --permanent # firewall-cmd --reload
mbatch が有効な場合には 6891/tcp も開放する必要がある。mbatch が有効なときには lsf.conf に LSB_QUERY_PORT=6891 が記載される。
# . /usr/share/lsf/conf/profile.lsf # lsfstartup Starting up all LIMs ... Do you really want to start up LIM on all hosts ? [y/n]y Start up LIM on <lsf1.local> ...... done Start up LIM on <lsf2.local> ...... done Waiting for Master LIM to start up ... Master LIM is ok Starting up all RESes ... Do you really want to start up RES on all hosts ? [y/n]y Start up RES on <lsf1.local> ...... done Start up RES on <lsf2.local> ...... done Starting all slave daemons on LSBATCH hosts ... Do you really want to start up slave batch daemon on all hosts ? [y/n] y Start up slave batch daemon on <lsf1.local> ...... done Start up slave batch daemon on <lsf2.local> ...... done Done starting up LSF daemons on the local LSF cluster ...
# lsid IBM Spectrum LSF Community Edition 10.1.0.0, Jun 15 2016 Copyright IBM Corp. 1992, 2016. All rights reserved. US Government Users Restricted Rights - Use, duplication or disclosure restricted by GSA ADP Schedule Contract with IBM Corp. My cluster name is cluster1 My master name is lsf1 # lshosts HOST_NAME type model cpuf ncpus maxmem maxswp server RESOURCES lsf1 X86_64 PC6000 116.1 2 1023M 1.9G Yes (mg) lsf2.local X86_64 PC6000 116.1 1 1023M 1.9G Yes (mg) # bhosts HOST_NAME STATUS JL/U MAX NJOBS RUN SSUSP USUSP RSV lsf1 ok - 2 0 0 0 0 0 lsf2.local ok - 1 0 0 0 0 0