harbor中镜像清理脚本


hrbor中的镜像清理

背景

现在harbor中的镜像越来越多,目前harbor的硬盘空间不足,需要定期为harbor进行清理

手动清理过程: 1,选择镜像,然后点击删除
2,点击垃圾清理,手动清理垃圾
3,等待垃圾清理任务完成

脚本

import sys
import requests
import argparse
import logging
import json

logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s : %(message)s', level=logging.INFO)
logger = logging.getLogger(__name__)


def return_json(method, url, user, password):
    try:
        if method == "GET":
            response = requests.get(url, auth=(user, password))
            response.raise_for_status()
            return response.json()
        elif method == "DELETE":
            response = requests.delete(url, auth=(user, password))
            response.raise_for_status()
        else:
            logger.error("Invalid request method!!")
            sys.exit(1)
    except requests.exceptions.RequestException as e:
        logger.error('Error: {}'.format(e))
        sys.exit(1)


def get_project_id(project_name, url, user, password):
    logger.info("get the project id for %s" % project_name)
    projects = return_json("GET", url+'/api/projects?name='+project_name, user, password)
    if project_name != projects[0]['name']:
        logger.error('No such project %s found' % project_name)
        sys.exit(1)
    project_id = projects[0]['project_id']
    logger.info("The %s's id: %s" % (project_name, project_id))
    return str(project_id)


def get_images_in_project(project_id, url, user, password):
    logger.info('get images in %s' % project_id)
    images = return_json('GET',  url+'/api/repositories?project_id='+project_id, user,password)
    if not images:
        logger.info("No images found. Nothing to do.")
        sys.exit(0)
    return images

def get_tags_from_image(project_name, image, url, user, password):
    logger.info('get tags  in %s%s' % (project_name, image))
    tags = return_json('GET',  url+"/api/repositories/"+image+"/tags", user,password)
    tags_end = sorted(tags, key=lambda keys: keys['created'])
    return tags_end


def delete_tags(project_name, image, tag, url, user, password):
    return_json("DELETE", url + "/api/repositories/" + image + "/tags/" + tag, user, password)
    logger.info("Delete the tags %s:%s:%s" % (project_name, image, tag))

def create_gc_schedule(url, user, password):
    gc_url = url + "/api/system/gc/schedule"
    headers = {"Content-Type": "application/json"}
    data = {
        "schedule": {
            "type": "Manual"
        }
    }
    try:
        response = requests.post(gc_url, data=json.dumps(data),auth=(user, password), headers=headers)
        response.raise_for_status()
        logger.info("create the gc schedule successful")
    except requests.exceptions.RequestException as e:
        logger.error('Error: {}'.format(e))
        sys.exit(1)


def main():
    parser = argparse.ArgumentParser(prog='harbor_clean', description="clean up images in the harbor")
    rqs_pars = parser.add_argument_group(title="required arguments")
    parser.add_argument('-t', '--dry-run', action='store_true', help='only print the images, do not delete the images')
    parser.add_argument('-g', '--gc', action='store_true', help="if create the gc scheduler or not")
    rqs_pars.add_argument('-i', '--url', required=True, type=str, help='harbor url')
    rqs_pars.add_argument('-u', '--user', required=True, type=str, help="the user of harbor")
    rqs_pars.add_argument('-p', '--password', required=True, type=str, help="the password of user")
    parser.add_argument('-c', '--keep-counts', type=int, help="how much tags need to keep")
    parser.add_argument('-m', '--image', type=str, help="the image name")
    parser.add_argument('project', type=str, help="the project which need to clean in harbor")

    if sys.argv[1:] == 0:
        parser.print_help()
        parser.exit()

    args = parser.parse_args()

    harbor_url = args.url
    project_name = args.project
    harbor_user = args.user
    harbor_pass = args.password
    tags_counts = args.keep_counts
    image_name = args.image
    dry_run = args.dry_run
    gc_schedule = args.gc


    project_id = get_project_id(project_name, harbor_url, harbor_user, harbor_pass)

    if image_name == "ALL" or image_name == "all" or image_name == "All":
        images = get_images_in_project(project_id, harbor_url, harbor_user, harbor_pass)
    else:
        images = [{"name": image_name}]


    for image in images:
        tags = get_tags_from_image(project_name, image['name'], harbor_url, harbor_user, harbor_pass)
        if len(tags) > tags_counts:
            tags_to_delete = tags[:-tags_counts]
            for tag in tags_to_delete:
                if dry_run:
                    logger.info('we will delte %s:%s:%s' % (project_name, image['name'], tag['name']))
                else:
                    delete_tags(project_name, image['name'], tag['name'], harbor_url, harbor_user, harbor_pass)
        else:
            logger.info("Do not need to clean up")

    if gc_schedule:
        create_gc_schedule(harbor_url, harbor_user, harbor_pass)


if __name__ == "__main__":
    main()

使用

  • 根据日期排序,打印出stjr大于15个tag的项目
python harbor-cleanup.py -i http://harbor-ceshi.sreblog.com -u admin -p harborxxxx  -c 15 test -m all -t
清理
python harbor-cleanup.py -i http://harbor-ceshi.sreblog.com -u admin -p harborxxxx  -c 15 test -m all
  • 清理具体镜像下面的tag
python harbor-cleanup.py -i http://harbor-ceshi.sreblog.com -u admin -p harborxxxx  -c 15 test -m test/test:latest
  • 清理后做垃圾回收
python harbor-cleanup.py -i http://harbor-ceshi.sreblog.com -u admin -p xxxx  -c 15 stjr -m test/test-test -g