CoreOS and Spot Instances just for funzies

Over the last year I’ve grown quite fond of the idea of spot instances on EC2. The idea that you can spin up a relatively large cluster for almost no money to play around with new technology and tools is amazing.

I’ve been playing with CoreOS and the various cloud_config options for the last few hours, and I was getting sick of having to click through the EC2 console every time I wanted to spin a new cluster based on my new cloud_config. So I made a quick (read hacky/janky) script to spawn CoreOS clusters on EC2 as spot instances.

spawn_coreos_cluster.py
#!/usr/bin/env python

import argparse
import os
import sys
import time

from boto import ec2
from boto.ec2.blockdevicemapping import BlockDeviceMapping, BlockDeviceType



if not os.environ.get('AWS_SECRET_KEY'):
    err = 'No AWS credentials present in the environment, try again...'
    raise SystemExit(err)


INSTANCE_TYPE = 'c3.xlarge'
INSTANCE_BID = '0.05'

COREOS_AMI = 'ami-31222974'

AWS_ACCESS_KEY = os.environ.get('AWS_ACCESS_KEY')
AWS_SECRET_KEY = os.environ.get('AWS_SECRET_KEY')
EC2_KEY_NAME = 'jake'
SECURITY_GROUPS = ['sg-1234']


def parse_args():
    ap = argparse.ArgumentParser(description='Spawn a CoreOS cluster')
    ap.add_argument('-r', '--region', default='us-west-1')
    ap.add_argument('-n', '--node-count',
                    type=int,
                    default=3,
                    help='How many nodes should be in the cluster?')
    args = ap.parse_args()

    return args


def _get_cloudconfig():
    base_path = os.path.dirname(os.path.realpath(__file__))
    cloud_config = open(os.path.join(base_path, 'cloud_config.yml'))
    return cloud_config.read()

def spawn_cluster(count, region):
    conn = ec2.connect_to_region(region,
                                 aws_access_key_id=AWS_ACCESS_KEY,
                                 aws_secret_access_key=AWS_SECRET_KEY)

    mapping = BlockDeviceMapping()
    eph0 = BlockDeviceType(ephemeral_name='ephemeral0')
    eph1 = BlockDeviceType(ephemeral_name='ephemeral1')
    mapping['/dev/xvdb'] = eph0
    mapping['/dev/xvdc'] = eph1

    instance_params = {
        'count': count,
        'key_name': EC2_KEY_NAME,
        'user_data': _get_cloudconfig(),
        'instance_type': INSTANCE_TYPE,
        'block_device_map': mapping,
        'security_group_ids': SECURITY_GROUPS
    }

    spot_reqs = conn.request_spot_instances(INSTANCE_BID, COREOS_AMI, **instance_params)
    for req in spot_reqs:
        req.add_tags({'Name': 'coreos-cluster', 'coreos': True})
    spot_ids = [s.id for s in spot_reqs]

    for x in xrange(50):
        print 'Waiting for instances to spawn...'
        spot_reqs = conn.get_all_spot_instance_requests(request_ids=spot_ids)
        instance_ids = [s.instance_id for s in spot_reqs if s.instance_id != None]
        if len(instance_ids) == len(spot_reqs):
            print 'Instances all spawned'
            print '====================='
            for i in conn.get_only_instances(instance_ids=instance_ids):
                print 'CoreOS Node:'
                print '    - spot req id: %s' % i.spot_instance_request_id
                print '    - instance id: %s' % i.id
                print '    - Public IP: %s' % i.ip_address
                print '    - Public DNS: %s' % i.public_dns_name
            break

        time.sleep(10)


if __name__ == '__main__':
    args = parse_args()
    spawn_cluster(args.node_count, args.region)
cloud_config.yml
#cloud-config
coreos:
  etcd:
    discovery: https://discovery.etcd.io/fancy
    addr: $public_ipv4:4001
    peer-addr: $private_ipv4:7001
  fleet:
      public-ip: $public_ipv4
  units:
    - name: etcd.service
      command: start
    - name: fleet.service
      command: start

    - name: format-ephemeral.service
      command: start
      content: |
        [Unit]
        Description=Stripes the ephemeral instance disks to one btrfs volume
        [Service]
        Type=oneshot
        RemainAfterExit=yes
        ExecStart=/usr/sbin/wipefs -f /dev/xvdb /dev/xvdc
        ExecStart=/usr/sbin/mkfs.btrfs -f -d raid0 /dev/xvdb /dev/xvdc

    - name: var-lib-docker.mount
      command: start
      content: |
        [Unit]
        Description=Mount ephemeral to /var/lib/docker
        Requires=format-ephemeral.service
        After=format-ephemeral.service
        Before=docker.service
        [Mount]
        What=/dev/xvdb
        Where=/var/lib/docker
        Type=btrfs
STDOUT
➜  core  ./spawn_coreos_cluster.py -n 3
Waiting for instances to spawn...
Waiting for instances to spawn...

Instances all spawned
=====================
CoreOS Node:
    - spot req id: sir-03rt1m
    - instance id: i-ead754
    - Public IP: 54.183.220.1
    - Public DNS: ec2-54-183-220-1.us-west-1.compute.amazonaws.com
CoreOS Node:
    - spot req id: sir-03rw5q
    - instance id: i-cfd053
    - Public IP: 54.183.178.2
    - Public DNS: ec2-54-183-178-2.us-west-1.compute.amazonaws.com
CoreOS Node:
    - spot req id: sir-03rwp8
    - instance id: i-45d053
    - Public IP: 54.183.218.3
    - Public DNS: ec2-54-183-218-3.us-west-1.compute.amazonaws.com

I’ve posted everything as a gist here: https://gist.github.com/jakedahn/374e2e54fdcef711bf2a