ccmlib/repository.py (405 lines of code) (raw):
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# downloaded sources handling
from __future__ import absolute_import, division, with_statement
import json
import logging
from logging import handlers
import os
import re
import shutil
import stat
import subprocess
import sys
import tarfile
import tempfile
import time
from distutils.version import LooseVersion # pylint: disable=import-error, no-name-in-module
from six import next, print_
try:
import ConfigParser
except ImportError:
import configparser as ConfigParser
from ccmlib import common
from ccmlib.common import (ArgumentError, CCMError,
update_java_version, get_default_path, get_jdk_version_int,
platform_binary, rmdirs, validate_install_dir)
from six.moves import urllib
ARCHIVE = "http://archive.apache.org/dist/cassandra"
GIT_REPO = "https://github.com/apache/cassandra.git"
GITHUB_REPO = "https://github.com/apache/cassandra"
GITHUB_TAGS = "https://api.github.com/repos/apache/cassandra/git/refs/tags"
CCM_CONFIG = ConfigParser.RawConfigParser()
CCM_CONFIG.read(os.path.join(os.path.expanduser("~"), ".ccm", "config"))
def setup(version, verbose=False):
(cdir, version, fallback) = __setup(version, verbose)
if cdir:
return (cdir, version)
if version in ('stable', 'oldstable', 'testing'):
version = get_tagged_version_numbers(version)[0]
cdir = version_directory(version)
if cdir is None:
try:
download_version(version, verbose=verbose, binary=True)
cdir = version_directory(version)
except Exception as e:
# If we failed to download from ARCHIVE,
# then we build from source from the git repo,
# as it is more reliable.
# We don't do this if binary: or source: were
# explicitly specified.
if fallback:
common.warning("Downloading {} failed, trying to build from git instead.\n"
"The error was: {}".format(version, e))
version = 'git:cassandra-{}'.format(version)
clone_development(GIT_REPO, version, verbose=verbose)
return (version_directory(version), None)
else:
raise e
return (cdir, version)
def __setup(version, verbose=False):
fallback = True
if version.startswith('git:'):
clone_development(GIT_REPO, version, verbose=verbose)
return (version_directory(version), None, fallback)
elif version.startswith('local:'):
# local: slugs take the form of: "local:/some/path/:somebranch"
try:
_, path, branch = version.split(':')
except ValueError:
raise CCMError("local version ({}) appears to be invalid. Please format as local:/some/path/:somebranch".format(version))
clone_development(path, version, verbose=verbose)
version_dir = version_directory(version)
if version_dir is None:
raise CCMError("Path provided in local slug appears invalid ({})".format(path))
return (version_dir, None, fallback)
elif version.startswith('binary:'):
version = version.replace('binary:', '')
fallback = False
elif version.startswith('github:'):
user_name, _ = github_username_and_branch_name(version)
# make sure to use http for cloning read-only repos such as 'github:apache/cassandra-2.1'
if user_name == "apache":
clone_development(GITHUB_REPO, version, verbose=verbose)
else:
clone_development(github_repo_for_user(user_name), version, verbose=verbose)
return (directory_name(version), None, fallback)
elif version.startswith('source:'):
version = version.replace('source:', '')
elif version.startswith('clone:'):
# locally present C* source tree
version = version.replace('clone:', '')
return (version, None, fallback)
elif version.startswith('alias:'):
alias = version.split(":")[1].split("/")[0]
try:
git_repo = CCM_CONFIG.get("aliases", alias)
clone_development(git_repo, version, verbose=verbose, alias=True)
return (directory_name(version), None, fallback)
except ConfigParser.NoOptionError as e:
common.warning("Unable to find alias {} in configuration file.".format(alias))
raise e
return (None, version, fallback)
def validate(path):
if path.startswith(__get_dir()):
_, version = os.path.split(os.path.normpath(path))
setup(version)
def clone_development(git_repo, version, verbose=False, alias=False):
print_(git_repo, version)
target_dir = directory_name(version)
assert target_dir
if 'github' in version:
git_repo_name, git_branch = github_username_and_branch_name(version)
elif 'local:' in version:
git_repo_name = 'local_{}'.format(git_repo) # add git repo location to distinguish cache location for differing repos
git_branch = version.split(':')[-1] # last token on 'local:...' slugs should always be branch name
elif alias:
git_repo_name = 'alias_{}'.format(version.split('/')[0].split(':')[-1])
git_branch = version.split('/')[-1]
else:
git_repo_name = 'apache'
git_branch = version.split(':', 1)[1]
local_git_cache = os.path.join(__get_dir(), '_git_cache_' + git_repo_name)
logfile = lastlogfilename()
logger = get_logger(logfile)
try:
# Checkout/fetch a local repository cache to reduce the number of
# remote fetches we need to perform:
if not os.path.exists(local_git_cache):
common.info("Cloning Cassandra...")
process = subprocess.Popen(
['git', 'clone', '--bare',
git_repo, local_git_cache],
cwd=__get_dir(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, _, _ = log_info(process, logger)
assert out == 0, "Could not do a git clone"
else:
common.info("Fetching Cassandra updates...")
process = subprocess.Popen(
['git', 'fetch', '-fup', 'origin',
'+refs/heads/*:refs/heads/*', '+refs/tags/*:refs/tags/*'],
cwd=local_git_cache, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, _, _ = log_info(process, logger)
assert out == 0, "Could not update git"
# Checkout the version we want from the local cache:
if not os.path.exists(target_dir):
# development branch doesn't exist. Check it out.
common.info("Cloning Cassandra (from local cache)")
# git on cygwin appears to be adding `cwd` to the commands which is breaking clone
if sys.platform == "cygwin":
local_split = local_git_cache.split(os.sep)
target_split = target_dir.split(os.sep)
process = subprocess.Popen(
['git', 'clone', local_split[-1], target_split[-1]],
cwd=__get_dir(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, _, _ = log_info(process, logger)
assert out == 0, "Could not do a git clone"
else:
process = subprocess.Popen(
['git', 'clone', local_git_cache, target_dir],
cwd=__get_dir(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, _, _ = log_info(process, logger)
assert out == 0, "Could not do a git clone"
# determine if the request is for a branch
is_branch = False
try:
branch_listing = subprocess.check_output(['git', 'branch', '--all'], cwd=target_dir).decode('utf-8')
branches = [b.strip() for b in branch_listing.replace('remotes/origin/', '').split()]
is_branch = git_branch in branches
except subprocess.CalledProcessError as cpe:
common.error("Error Running Branch Filter: {}\nAssumming request is not for a branch".format(cpe.output))
# now check out the right version
branch_or_sha_tag = 'branch' if is_branch else 'SHA/tag'
common.info("Checking out requested {} ({})".format(branch_or_sha_tag, git_branch))
if is_branch:
# we use checkout -B with --track so we can specify that we want to track a specific branch
# otherwise, you get errors on branch names that are also valid SHAs or SHA shortcuts, like 10360
# we use -B instead of -b so we reset branches that already exist and create a new one otherwise
process = subprocess.Popen(['git', 'checkout', '-B', git_branch,
'--track', 'origin/{git_branch}'.format(git_branch=git_branch)],
cwd=target_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, _, _ = log_info(process, logger)
else:
process = subprocess.Popen(
['git', 'checkout', git_branch],
cwd=target_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, _, _ = log_info(process, logger)
if int(out) != 0:
raise CCMError('Could not check out git branch {branch}. '
'Is this a valid branch name? (see {lastlog} or run '
'"ccm showlastlog" for details)'.format(
branch=git_branch, lastlog=logfile
))
# now compile
compile_version(git_branch, target_dir, verbose)
else: # branch is already checked out. See if it is behind and recompile if needed.
process = subprocess.Popen(
['git', 'fetch', 'origin'],
cwd=target_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, _, _ = log_info(process, logger)
assert out == 0, "Could not do a git fetch"
process = subprocess.Popen(['git', 'status', '-sb'], cwd=target_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
_, status, _ = log_info(process, logger)
if str(status).find('[behind') > -1: # If `status` looks like '## cassandra-2.2...origin/cassandra-2.2 [behind 9]\n'
common.info("Branch is behind, recompiling")
process = subprocess.Popen(['git', 'pull'], cwd=target_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, _, _ = log_info(process, logger)
assert out == 0, "Could not do a git pull"
process = subprocess.Popen([platform_binary('ant'), 'realclean'], cwd=target_dir, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
out, _, _ = log_info(process, logger)
assert out == 0, "Could not run 'ant realclean'"
# now compile
compile_version(git_branch, target_dir, verbose)
elif re.search('\[.*?(ahead|behind).*?\]', status.decode("utf-8")) is not None: # status looks like '## trunk...origin/trunk [ahead 1, behind 29]\n'
# If we have diverged in a way that fast-forward merging cannot solve, raise an exception so the cache is wiped
common.error("Could not ascertain branch status, please resolve manually.")
raise Exception
else: # status looks like '## cassandra-2.2...origin/cassandra-2.2\n'
common.debug("Branch up to date, not pulling.")
except Exception as e:
# wipe out the directory if anything goes wrong. Otherwise we will assume it has been compiled the next time it runs.
try:
rmdirs(target_dir)
common.error("Deleted {} due to error".format(target_dir))
except:
print_('Building C* version {version} failed. Attempted to delete {target_dir} '
'but failed. This will need to be manually deleted'.format(
version=version,
target_dir=target_dir
))
finally:
raise e
def download_version(version, url=None, verbose=False, binary=False):
"""Download, extract, and build Cassandra tarball.
if binary == True, download precompiled tarball, otherwise build from source tarball.
"""
archive_url = ARCHIVE
if CCM_CONFIG.has_option('repositories', 'cassandra'):
archive_url = CCM_CONFIG.get('repositories', 'cassandra')
if binary:
archive_url = "%s/%s/apache-cassandra-%s-bin.tar.gz" % (archive_url, version, version) if url is None else url
else:
archive_url = "%s/%s/apache-cassandra-%s-src.tar.gz" % (archive_url, version, version) if url is None else url
_, target = tempfile.mkstemp(suffix=".tar.gz", prefix="ccm-")
try:
__download(archive_url, target, show_progress=verbose)
common.info("Extracting {} as version {} ...".format(target, version))
tar = tarfile.open(target)
dir = tar.next().name.split("/")[0] # pylint: disable=all
tar.extractall(path=__get_dir())
tar.close()
target_dir = os.path.join(__get_dir(), version)
if os.path.exists(target_dir):
rmdirs(target_dir)
shutil.move(os.path.join(__get_dir(), dir), target_dir)
if binary:
# Binary installs don't have a build.xml that is needed
# for pulling the version from. Write the version number
# into a file to read later in common.get_version_from_build()
with open(os.path.join(target_dir, '0.version.txt'), 'w') as f:
f.write(version)
else:
compile_version(version, target_dir, verbose=verbose)
except urllib.error.URLError as e:
msg = "Invalid version {}".format(version) if url is None else "Invalid url {}".format(url)
msg = msg + " (underlying error is: {})".format(str(e))
raise ArgumentError(msg)
except tarfile.ReadError as e:
raise ArgumentError("Unable to uncompress downloaded file: {}".format(str(e)))
except CCMError as e:
# wipe out the directory if anything goes wrong. Otherwise we will assume it has been compiled the next time it runs.
try:
rmdirs(target_dir)
common.error("Deleted {} due to error".format(target_dir))
except:
raise CCMError("Building C* version {} failed. Attempted to delete {} but failed. This will need to be manually deleted".format(version, target_dir))
raise e
def compile_version(version, target_dir, verbose=False):
# compiling cassandra and the stress tool
logfile = lastlogfilename()
logger = get_logger(logfile)
common.info("Compiling Cassandra {} ...".format(version))
logger.info("--- Cassandra Build -------------------\n")
env = update_java_version(install_dir=target_dir, for_build=True, info_message='Cassandra {} build'.format(version))
default_build_properties = os.path.join(common.get_default_path(), 'build.properties.default')
if os.path.exists(default_build_properties):
target_build_properties = os.path.join(target_dir, 'build.properties')
logger.info("Copying %s to %s\n" % (default_build_properties, target_build_properties))
shutil.copyfile(default_build_properties, target_build_properties)
try:
# Patch for pending Cassandra issue: https://issues.apache.org/jira/browse/CASSANDRA-5543
# Similar patch seen with buildbot
attempt = 0
ret_val = 1
gradlew = os.path.join(target_dir, platform_binary('gradlew'))
if os.path.exists(gradlew):
# todo: move to dse/
cmd = [gradlew, 'jar']
else:
mvnw = os.path.join(target_dir, platform_binary('mvnw'))
if os.path.exists(mvnw):
# todo: move to hcd/
cmd = [mvnw, 'verify', '-DskipTest', '-DskipDocker','-DskipDeb','-DskipRPM','-DskipCqlsh', '-Pdatastax-artifactory']
else:
# No gradle, use ant
cmd = [platform_binary('ant'), 'jar']
if get_jdk_version_int(env=env) >= 11:
cmd.append('-Duse.jdk11=true')
while attempt < 3 and ret_val != 0:
if attempt > 0:
logger.info("\n\n`{}` failed. Retry #{}...\n\n".format(' '.join(cmd), attempt))
process = subprocess.Popen(cmd, cwd=target_dir, env=env,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
ret_val, stdout, stderr = log_info(process, logger)
attempt += 1
if ret_val != 0:
raise CCMError('Error compiling Cassandra. See {logfile} or run '
'"ccm showlastlog" for details, stdout=\'{stdout}\' stderr=\'{stderr}\''.format(
logfile=logfile, stdout=stdout.decode(), stderr=stderr.decode()))
except OSError as e:
raise CCMError("Error compiling Cassandra. Is ant installed? See %s for details" % logfile)
stress_dir = os.path.join(target_dir, "tools", "stress") if (
version >= "0.8.0") else \
os.path.join(target_dir, "contrib", "stress")
build_xml = os.path.join(stress_dir, 'build.xml')
if os.path.exists(build_xml): # building stress separately is only necessary pre-1.1
logger.info("\n\n--- cassandra/stress build ------------\n")
try:
# set permissions correctly, seems to not always be the case
stress_bin_dir = os.path.join(stress_dir, 'bin')
for f in os.listdir(stress_bin_dir):
full_path = os.path.join(stress_bin_dir, f)
os.chmod(full_path, stat.S_IRUSR | stat.S_IWUSR | stat.S_IXUSR | stat.S_IRGRP | stat.S_IXGRP | stat.S_IROTH | stat.S_IXOTH)
process = subprocess.Popen([platform_binary('ant'), 'build'], cwd=stress_dir, env=env,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
ret_val, _, _ = log_info(process, logger)
if ret_val != 0:
process = subprocess.Popen([platform_binary('ant'), 'stress-build'], cwd=target_dir, env=env,
stdout=subprocess.PIPE, stderr=subprocess.PIPE)
ret_val, _, _ = log_info(process, logger)
if ret_val != 0:
raise CCMError("Error compiling Cassandra stress tool. "
"See %s for details (you will still be able to use ccm "
"but not the stress related commands)" % logfile)
except IOError as e:
raise CCMError("Error compiling Cassandra stress tool: %s (you will "
"still be able to use ccm but not the stress related commands)" % str(e))
def directory_name(version):
version = version.replace(':', 'COLON') # handle git branches like 'git:trunk'.
version = version.replace('/', 'SLASH') # handle git branches like 'github:mambocab/trunk'.
return os.path.join(__get_dir(), version)
def github_username_and_branch_name(version):
assert version.startswith('github')
return version.split(':', 1)[1].split('/', 1)
def github_repo_for_user(username):
return 'https://github.com/{username}/cassandra.git'.format(username=username)
def version_directory(version):
dir = directory_name(version)
if os.path.exists(dir):
try:
validate_install_dir(dir)
return dir
except ArgumentError:
rmdirs(dir)
return None
else:
return None
def clean_all():
rmdirs(__get_dir())
def get_tagged_version_numbers(series='stable'):
"""Retrieve git tags and find version numbers for a release series
series - 'stable', 'oldstable', or 'testing'"""
releases = []
if series == 'testing':
# Testing releases always have a hyphen after the version number:
tag_regex = re.compile('^refs/tags/cassandra-([0-9]+\.[0-9]+\.[0-9]+-.*$)')
else:
# Stable and oldstable releases are just a number:
tag_regex = re.compile('^refs/tags/cassandra-([0-9]+\.[0-9]+\.[0-9]+$)')
tag_url = urllib.request.urlopen(GITHUB_TAGS)
for ref in (i.get('ref', '') for i in json.loads(tag_url.read())):
m = tag_regex.match(ref)
if m:
releases.append(LooseVersion(m.groups()[0]))
# Sort by semver:
releases.sort(reverse=True)
stable_major_version = LooseVersion(str(releases[0].version[0]) + "." + str(releases[0].version[1]))
stable_releases = [r for r in releases if r >= stable_major_version]
oldstable_releases = [r for r in releases if r not in stable_releases]
oldstable_major_version = LooseVersion(str(oldstable_releases[0].version[0]) + "." + str(oldstable_releases[0].version[1]))
oldstable_releases = [r for r in oldstable_releases if r >= oldstable_major_version]
if series == 'testing':
return [r.vstring for r in releases]
elif series == 'stable':
return [r.vstring for r in stable_releases]
elif series == 'oldstable':
return [r.vstring for r in oldstable_releases]
else:
raise AssertionError("unknown release series: {series}".format(series=series))
def __download(url, target, username=None, password=None, show_progress=False):
if username is not None:
password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm()
password_mgr.add_password(None, url, username, password)
handler = urllib.request.HTTPBasicAuthHandler(password_mgr)
opener = urllib.request.build_opener(handler)
urllib.request.install_opener(opener) # pylint: disable=E1121
u = urllib.request.urlopen(url)
f = open(target, 'wb')
meta = u.info()
file_size = int(meta.get("Content-Length"))
common.info("Downloading {} to {} ({:.3f}MB)".format(url, target, float(file_size) / (1024 * 1024)))
file_size_dl = 0
block_sz = 8192
status = None
attempts = 0
while file_size_dl < file_size:
buffer = u.read(block_sz)
if not buffer:
attempts = attempts + 1
if attempts >= 5:
raise CCMError("Error downloading file (nothing read after {} attempts, downloded only {} of {} bytes)".format(attempts, file_size_dl, file_size))
time.sleep(0.5 * attempts)
continue
else:
attempts = 0
file_size_dl += len(buffer)
f.write(buffer)
if show_progress:
status = r"%10d [%3.2f%%]" % (file_size_dl, file_size_dl * 100. / file_size)
status = chr(8) * (len(status) + 1) + status
print_(status, end='')
f.close()
u.close()
def __get_dir():
repo = os.path.join(get_default_path(), 'repository')
if not os.path.exists(repo):
os.mkdir(repo)
return repo
def lastlogfilename():
return os.path.join(__get_dir(), "ccm-repository.log")
def get_logger(log_file):
logger = logging.getLogger('repository')
logger.addHandler(handlers.RotatingFileHandler(log_file, maxBytes=1024 * 1024 * 5, backupCount=5))
return logger
def log_info(process, logger):
stdoutdata, stderrdata = process.communicate()
rc = process.returncode
logger.info(stdoutdata.decode())
logger.info(stderrdata.decode())
return rc, stdoutdata, stderrdata