One thing I didn't mention: this code also is able to be killed on the
fly without killing the domains it monitors; on restart it will discover
and adopt them, as well as their swap VD's, and resume monitoring.
This feature was needed because the script dies after a few hours of
running -- I'm getting a SIGABRT from somewhere in the xc libraries
every few hours, I think. Note that I'm not only checking
xc.domain_getinfo(), but also updating the swap VD expirys on every trip
through the while loop; one of those is likely the culprit.
Steve
On Mon, Feb 09, 2004 at 09:25:36PM -0800, wrote:
> On Sun, Feb 08, 2004 at 09:19:56AM +0000, Ian Pratt wrote:
> > Of course, this will all be much neater in rev 3 of the domain
> > control tools that will use a db backend to maintain state about
> > currently running domains across a cluster...
>
> Ack! We might be doing duplicate work. How far have you gotten with
> this?
>
> Right now I'm running python code (distantly descended from
> createlinuxdom.py) that is able to:
>
> - monitor each domain and restart as needed
>
> - migrate domains from one host to another
>
> - dynamically create and assign swap vd's, and garbage collect them at
> shutdown or after crash
>
> ...and a few other things. Right now migration is via reboot, not
> suspend; haven't had a chance to troubleshoot resume further.
>
> The only thing I'm using VD's for at this point is swap. This code so
> far depends on NFS root partitions, all served from a central NFS
> server; control and state are communicated via NFS also. I just today
> started migrating the control/state comms to jabber instead, so that I
> could start using VD root filesystems after the COW stuff settles down.
> Haven't decided what to do for migrating filesystems between nodes in
> that case though.
>
> Right now I'm calling this 'txenmon' (TerraLuna Xen Monitor) but was
> already considering renaming it 'xenmon' and posting it after I got it
> cleaned up.
>
> This is all to support a production Xen cluster rollout that I plan to
> have running by the end of this month. I really don't want to go back
> to UML at this point, and if I don't have this cluster running by March
> I'm in deep doo-doo -- so I'm committed to working full-time on Xen
> tools now. ;-}
>
> So here's the current version, not cleaned up, way too verbose, crufty,
> but running:
>
> Steve
>
>
> #!/usr/bin/python2.2
>
> import Xc, XenoUtil, string, sys, os, time, socket, cPickle
>
> # initialize a few variables that might come in handy
> thishostname = socket.gethostname()
> if not len(sys.argv) >= 2:
> print "usage: %s /path/to/base/of/users/hosts" % sys.argv[0]
> sys.exit(1)
>
> nfsserv="10.27.2.50"
>
> base = sys.argv[1]
> if len(base) > 1 and base.endswith('/'):
> base=base[:-1]
>
> # Obtain an instance of the Xen control interface
> xc = Xc.new()
>
> # daemonize
> daemonize=0
> if daemonize:
> try:
> pid = os.fork()
> if pid > 0:
> # exit first parent
> sys.exit(0)
> except OSError, e:
> print >>sys.stderr, "fork #1 failed: %d (%s)" % (e.errno, e.strerror)
> sys.exit(1)
> # decouple from parent environment
> # os.chdir("/")
> os.setsid()
> os.umask(0)
> # XXX what about stdout etc?
> # do second fork
> try:
> pid = os.fork()
> if pid > 0:
> # exit from second parent, print eventual PID before
> # print "Daemon PID %d" % pid
> sys.exit(0)
> except OSError, e:
> print >>sys.stderr, "fork #2 failed: %d (%s)" % (e.errno, e.strerror)
> sys.exit(1)
>
> def main():
> while 1:
> guests=getGuests(base)
> # state machine
> for guest in guests:
> print guest.path,guest.activeHost,guest.isRunning()
> if guest.isMine():
> if guest.isRunningHere():
> guest.heartbeat()
> if guest.isRunnable():
> if guest.isRunningHere():
> pass
> else:
> if guest.isRunning():
> print "warning: %s is running on %s" % (
> guest.name, guest.activeHost
> )
> else:
> guest.start()
> else: # not guest.isRunnable()
> if guest.isRunningHere():
> guest.shutdown()
> if guest.isHung():
> guest.destroy()
> else: # not guest.isMine()
> if guest.isRunningHere():
> guest.shutdown()
> if guest.isRunning():
> pass
> else:
> print "warning: %s is not running on %s" % (
> guest.name,guest.ctl('host')
> )
> # end state machine
> # garbage collect vd's
> usedVds=[]
> for guest in guests:
> if guest.isRunningHere():
> usedVds+=guest.vds()
> guest.pickle()
> for vd in listvdids():
> print "usedVds =",usedVds
> if vd in usedVds:
> pass
> else:
> print "deleting vd %s" % vd
> XenoUtil.vd_delete(vd)
> # garbage collect domains
> # XXX
> time.sleep(10)
> # end while
>
> def getGuests(base):
> users=os.listdir(base)
> guests=[]
> for user in users:
> if not os.path.isdir("%s/%s" % (base,user)):
> continue
> guestnames=os.listdir("%s/%s" % (base,user))
> for name in guestnames:
> path="%s/%s/%s" % (base,user,name)
> try:
> try:
> file=open("%s/log/pickle" % path,"r")
> guest=cPickle.load(file)
> file.close()
> except:
> print "creating",path
> guest=Guest(path)
> except:
> print "exception creating guest %s/%s: %s" % (
> user,
> name,
> sys.exc_info()[1].__dict__
> )
> continue
> guests.append(guest)
> return guests
>
> def listvdids():
> vdids=[]
> for vbd in XenoUtil.vd_list():
> vdids.append(vbd['vdisk_id'])
> print "listvdids =", vdids
> return vdids
>
>
> class Guest(object):
>
> def __init__(self,path):
> self.reload(path)
>
> def reload(self,path):
> pathparts=path.split('/')
> name=pathparts.pop()
> user=pathparts.pop()
> base='/'.join(pathparts)
> self.path=path
> self.base=base
> self.user=user
> self.name=name
> self.domain_name="%s" % name
> self.ctlcache={}
> # requested domain id number
> self.domid=self.ctl("domid")
> # kernel
> self.image=self.ctl("kernel")
> # memory
> self.memory_megabytes=int(self.ctl("mem"))
> swap=self.ctl("swap")
> (swap_dev,swap_megabytes) = swap.split(",")
> self.swap_dev=swap_dev
> self.swap_megabytes=int(swap_megabytes)
> # ip
> self.ipaddr = [self.ctl("ip")]
> self.netmask = XenoUtil.get_current_ipmask()
> self.gateway = self.ctl("gw")
> # vbd's
> vbds = []
> vbdfile = open("%s/ctl/vbds" % self.path,"r")
> for line in vbdfile.readlines():
> print line
> ( uname, virt_name, rw ) = line.split(',')
> uname = uname.strip()
> virt_name = virt_name.strip()
> rw = rw.strip()
> vbds.append(( uname, virt_name, rw ))
> self.vbds=vbds
> self.vbd_expert = 0
> # build kernel command line
> ipbit = "ip="+self.ipaddr[0]
> ipbit += ":"+nfsserv
> ipbit += ":"+self.gateway+":"+self.netmask+"::eth0:off"
> rootbit = "root=/dev/nfs nfsroot=/export/%s/root" % path
> extrabit = "4 DOMID=%s " % self.domid
> self.cmdline = ipbit +" "+ rootbit +" "+ extrabit
> self.curid=None
> self.swapvdid=None
> self.shutdownTime=None
> self.activeHost=None
>
>
> def ctl(self,var):
> filename="%s/ctl/%s" % (self.path,var)
> # if not hasattr(self,'ctlcache'):
> # print dir(self)
> # print self.path
> # self.ctlcache={}
> if not self.ctlcache.has_key('filename'):
> self.ctlcache[filename]={'mtime': 0, 'val': None}
> val=None
> mtime=os.path.getmtime(filename)
> if self.ctlcache[filename]['mtime'] < mtime:
> val = open(filename,"r").readline().strip()
> self.ctlcache[filename]={'mtime': mtime, 'val': val}
> else:
> val = self.ctlcache[filename]['val']
> return val
>
> def destroy(self):
> print "destroying %s" % self.domain_name
> # print "now curid =",self.curid
> if self.curid == 0:
> raise "attempt to kill dom0"
> xc.domain_destroy(dom=self.curid,force=True)
>
> def heartbeat(self):
> assert self.isRunningHere()
> # update swap expiry to one day
> try:
> XenoUtil.vd_refresh(self.swapvdid, 86400)
> except:
> print "%s missed swap expiry update: %s" % (
> self.domain_name,
> sys.exc_info()[1].__dict__
> )
> self.activeHost=thishostname
> self.pickle()
>
> def isHung(self):
> if not self.isRunningHere():
> return False
> if self.shutdownTime and time.time() - self.shutdownTime > 300:
> return True
> return False
>
> def isMine(self):
> if self.ctl("host") == thishostname:
> return True
> return False
>
> def isRunnable(self):
> run=int(self.ctl("run"))
> if run > 0:
> return True
> return False
>
> def isRunning(self):
> if self.isRunningHere():
> return True
> else:
> host=self.activeHost
> if host == None:
> return None
> if host == thishostname:
> return False
> filename="%s/log/%s" % (self.path,"pickle")
> mtime=None
> try:
> mtime=os.path.getmtime(filename)
> except:
> return False
> now=time.time()
> if now - mtime < 60:
> return True
> return False
>
> def isRunningHere(self):
> if not self.curid or self.curid == 0:
> return False
> domains=xc.domain_getinfo()
> domids = [ d['dom'] for d in domains ]
> if self.curid in domids:
> # print self.curid
> return True
> self.curid=None
> return False
>
> def XXXlog(self,var,val=None,append=False):
> filename="%s/log/%s" % (self.path,var)
> if val==None:
> out=None
> try:
> out=open(filename,"r").readlines()
> except:
> return None
> out=[l.strip() for l in out]
> return out
> mode="w"
> if append:
> mode="a"
> file=open(filename,mode)
> file.write("%s\n" % str(val))
> file.close()
>
> def mkswap(self):
> # create swap, 1 minute expiry
> vdid=XenoUtil.vd_create(self.swap_megabytes,60)
> # print "vdid =",vdid
> self.swapvdid=vdid
> uname="vd:%s" % vdid
> # format it
> segments = XenoUtil.lookup_disk_uname(uname)
> if XenoUtil.vd_extents_validate(segments,1) < 0:
> print "segment conflict on %s" % uname
> sys.exit(1)
> tmpdev="/dev/xenswap%s" % vdid
> cmd="mknod %s b 125 %s" % (tmpdev,vdid)
> os.system(cmd)
> virt_dev = XenoUtil.blkdev_name_to_number(tmpdev)
> xc.vbd_create(0,virt_dev,1)
> xc.vbd_setextents(0,virt_dev,segments)
> cmd="mkswap %s" % tmpdev
> os.system(cmd)
> xc.vbd_destroy(0,virt_dev)
> self.vbds.append(( uname, self.swap_dev, "w" ))
> print "mkswap:",uname, self.swap_dev, "w"
> print self.vbds
>
> def pickle(self):
> assert self.isRunningHere()
> # write then rename so others see an atomic operation...
> file=open("%s/log/pickle.new" % self.path,"w")
> cPickle.dump(self,file)
> file.close()
> os.rename(
> "%s/log/pickle.new" % self.path,
> "%s/log/pickle" % self.path
> )
>
> def shutdown(self):
> print "shutting down %s" % self.name
> # reduce swap expiry to 10 minutes (to give it time to shut down)
> if self.swapvdid:
> XenoUtil.vd_refresh(self.swapvdid, 600)
> xc.domain_destroy(dom=self.curid)
> if not self.shutdownTime:
> self.shutdownTime=time.time()
>
> def start(self):
> """Create, build and start the domain for this guest."""
> self.reload(self.path)
> image=self.image
> memory_megabytes=self.memory_megabytes
> domain_name=self.domain_name
> ipaddr=self.ipaddr
> netmask=self.netmask
> vbds=self.vbds
> cmdline=self.cmdline
> vbd_expert=self.vbd_expert
>
> print "Domain image : ", self.image
> print "Domain memory : ", self.memory_megabytes
> print "Domain IP address(es) : ", self.ipaddr
> print "Domain block devices : ", self.vbds
> print 'Domain cmdline : "%s"' % self.cmdline
>
> if self.isRunning():
> raise "%s already running on %s" % (self.name,self.activeHost)
>
> if not os.path.isfile( image ):
> print "Image file '" + image + "' does not exist"
> return None
>
> id = xc.domain_create( mem_kb=memory_megabytes*1024, name=domain_name
> )
> print "Created new domain with id = " + str(id)
> if id <= 0:
> print "Error creating domain"
> return None
>
> ret = xc.linux_build( dom=id, image=image, cmdline=cmdline )
> if ret < 0:
> print "Error building Linux guest OS: "
> print "Return code from linux_build = " + str(ret)
> xc.domain_destroy ( dom=id )
> return None
>
> # setup the virtual block devices
> # set the expertise level appropriately
> XenoUtil.VBD_EXPERT_MODE = vbd_expert
>
> self.mkswap()
>
> self.datavds=[]
> for ( uname, virt_name, rw ) in vbds:
> virt_dev = XenoUtil.blkdev_name_to_number( virt_name )
> segments = XenoUtil.lookup_disk_uname( uname )
> if not segments or segments < 0:
> print "Error looking up %s\n" % uname
> xc.domain_destroy ( dom=id )
> return None
>
> # check that setting up this VBD won't violate the sharing
> # allowed by the current VBD expertise level
> # print uname, virt_name, rw, segments
> if XenoUtil.vd_extents_validate(segments, rw=='w' or rw=='rw') <
> 0:
> xc.domain_destroy( dom = id )
> return None
>
> if xc.vbd_create( dom=id, vbd=virt_dev, writeable= rw=='w' or
> rw=='rw' ):
> print "Error creating VBD vbd=%d writeable=%d\n" %
> (virt_dev,rw)
> xc.domain_destroy ( dom=id )
> return None
>
> if xc.vbd_setextents(
> dom=id,
> vbd=virt_dev,
> extents=segments):
> print "Error populating VBD vbd=%d\n" % virt_dev
> xc.domain_destroy ( dom=id )
> return None
> self.datavds.append(virt_dev)
>
>
> # setup virtual firewall rules for all aliases
> for ip in ipaddr:
> XenoUtil.setup_vfr_rules_for_vif( id, 0, ip )
>
> if xc.domain_start( dom=id ) < 0:
> print "Error starting domain"
> xc.domain_destroy ( dom=id )
> sys.exit()
>
> self.curid=id
> print "domain (re)started: %s (%d)" % (domain_name,id)
> self.heartbeat()
> return id
>
>
> def vds(self):
> vds=[]
> # XXX add data vbds
> vds.append(self.swapvdid)
> return vds
>
>
>
>
> main()
>
>
> --
> Stephen G. Traugott (KG6HDQ)
> UNIX/Linux Infrastructure Architect, TerraLuna LLC
> stevegt@xxxxxxxxxxxxx
> http://www.stevegt.com -- http://Infrastructures.Org
--
Stephen G. Traugott (KG6HDQ)
UNIX/Linux Infrastructure Architect, TerraLuna LLC
stevegt@xxxxxxxxxxxxx
http://www.stevegt.com -- http://Infrastructures.Org
-------------------------------------------------------
The SF.Net email is sponsored by EclipseCon 2004
Premiere Conference on Open Tools Development and Integration
See the breadth of Eclipse activity. February 3-5 in Anaheim, CA.
http://www.eclipsecon.org/osdn
_______________________________________________
Xen-devel mailing list
Xen-devel@xxxxxxxxxxxxxxxxxxxxx
https://lists.sourceforge.net/lists/listinfo/xen-devel
|