github工程实时监控commit issue pr利用工具

前言:github issue commit pr 监控工程实现笔记,价值的话可能就是方便追更漏洞

GitHub地址:https://github.com/chibd2000/vue-myscan

监控效果如下所示,支持微信和qq邮箱提醒,这里用微信做演示

数据库设计


class GithubTask(db.Model):
    __tablename__ = 'github_task'

    monitor_id = Column(String(255, 'utf8mb3_unicode_ci'), primary_key=True, index=True)
    target = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='目标仓库')
    name = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='备注名称')
    not_visited = Column(Integer, nullable=False, info='等待查看的条目')
    status = Column(Integer, nullable=False, info='运行状态')
    last_update_time = Column(Integer, nullable=True, info='上次扫描时间')
    next_update_time = Column(Integer, nullable=True, info='下次扫描时间')

class GithubCommit(db.Model):
    __tablename__ = 'github_commit'

    commit_sha = Column(String(255, 'utf8mb3_unicode_ci'), primary_key=True, index=True)
    monitor_id = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, index=True)
    commit_url = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='url地址')
    commit_message = Column(String(2048, 'utf8mb3_unicode_ci'), nullable=False, info='commit信息')
    commit_author_name = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='commit信息')
    commit_author_date = Column(Integer, nullable=False, info='时间')
    commit_committer_name = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='commit信息')
    commit_committer_date = Column(Integer, nullable=False, info='时间')
    # commit_file = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='标题')
    # commit_status = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='状态')
    commit_branch = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='分支')
    commit_level = Column(Integer, nullable=False, info='程度')
    is_visited = Column(Integer, nullable=False, info='是否已经阅读')

    @staticmethod
    def update_visited(monitor_id):
        db.session.query(GithubCommit).filter_by(monitor_id=monitor_id, is_visited=0).update({'is_visited': 1})
        db.session.commit()
        return True


class GithubIssue(db.Model):
    __tablename__ = 'github_issue'

    issue_id = Column(Integer, primary_key=True)
    monitor_id = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, index=True)
    issue_url = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='地址')
    issue_create_user = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='问题创建用户')
    issue_create_date = Column(Integer, nullable=False, info='问题创建时间')
    issue_status = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='状态')
    issue_level = Column(Integer, nullable=False, info='程度')
    issue_title = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='标题')
    issue_body = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='内容')
    type = Column(String(255, 'utf8mb3_unicode_ci'), nullable=False, info='is pr or issue')
    is_visited = Column(Integer, nullable=False, info='是否已经阅读')

代码设计

import datetime
import random

from module.asynchttp import AsyncFetcher
from module.attributedict import AttributeDict
from module.database import GithubDB
import asyncio
import aiohttp
import time

from utils.conf_reader import get_database_conf, get_github_conf, get_celery_conf

"""
实现:github monitor 主要实现了对于想要跟踪漏洞的项目的commit记录监控展示以及收发信息提醒的实现
为什么要写这个?
- 主要的原因是之后的工作更多的需要追漏洞的细节,所以这边实现了这个功能然后给自己使用

不太喜欢看别人的代码再去写,因为看别人的代码比较累,自己还是更愿意先用自己的思路去实现一次,然后还有什么可以改正的再去参考别人代码
"""


class GithubError(Exception):
    pass


class GithubTokenError(GithubError):
    def __init__(self, message):
        self.message = message


class GithubLimitError(GithubError):
    def __init__(self, message):
        self.message = message


class GithubGrammarError(GithubError):
    def __init__(self, message):
        self.message = message


class GithubMonitor:
    def __init__(self):
        self.source = 'Base'
        self._verify_addr = 'https://api.github.com/user'
        self._apis = get_github_conf()['api']
        self.branch_addr = 'https://api.github.com/repos/{}/branches'
        self.commit_addr = 'https://api.github.com/search/commits?q={}'
        self.issue_addr = 'https://api.github.com/search/issues?q={}'
        self.request_limit = 1

        self._init_db()

    def _init_db(self):
        db_conf = get_database_conf()
        self.db_conn = GithubDB(db_conf)

    def time_format(self):
        return time.strftime("%Y-%m-%d", time.localtime(1671000000))
        # return time.strftime("%Y-%m-%d", time.localtime(int(time.time())))

    async def get_fetch_json(self, session, url, headers=None):
        response = await AsyncFetcher.fetch_response(session=session, url=url, json=True, headers=headers)
        ret_json = await response.json(encoding='utf-8')
        if response.status == 422:
            raise GithubGrammarError('commit grammar error') from None
        elif response.status == 403:
            raise GithubLimitError('request limit error') from None
        else:
            return ret_json

    async def verify_api_available(self):
        try:
            api_list = self._apis.split(',')
            headers = {'Accept': 'application/vnd.github+json',
                       'Authorization': 'Bearer {}'.format(api_list[random.randint(0, len(api_list) - 1)]),
                       'X-GitHub-Api-Version': '2022-11-28'}
            async with aiohttp.ClientSession() as session:
                response = await AsyncFetcher.fetch_response(session=session, url=self._verify_addr, headers=headers)
                if response.status == 200:
                    return True, headers
                elif response.status == 401:
                    raise GithubTokenError('token error') from None
        except GithubTokenError as e:
            print(e.__str__())
            return False, None
        except Exception as e:
            print(e.__str__())
            return False, None


class GithubCommitStatus:
    ADD = 'added'
    MODIFY = 'modified'
    REMOVE = 'removed'


class GithubCommit(object):
    def __init__(self, monitor_id, commit_sha):
        self.monitor_id = monitor_id
        self.commit_sha = commit_sha
        self.commit_url = ''
        self.commit_message = ''
        self.commit_author_name = ''
        self.commit_author_date = ''
        self.commit_committer_name = ''
        self.commit_committer_date = ''
        # self.commit_file = []
        # self.commit_status = ''
        self.commit_branch = ''
        self.commit_level = ''

    def __eq__(self, other):
        return other == self.commit_sha

    def __hash__(self):
        return self.commit_sha

class GithubIssue(object):
    def __init__(self, monitor_id, id):
        self.issue_id = id
        self.monitor_id = monitor_id
        self.issue_url = ''
        self.issue_create_user = ''
        self.issue_create_date = ''
        self.issue_status = ''
        self.issue_level = ''
        self.issue_title = ''
        self.issue_body = ''
        self.type = ''
        self.is_visited = ''

    def __eq__(self, other):
        return other == self.issue_id

    def __hash__(self):
        return self.issue_id


class GithubKeywordMatcherLevel:
    HIGH = 1
    LOW = 0


class GithubKeywordMatcher:
    name = 'keyword matcher'

    def __init__(self):
        self.keyword = ['bug', 'rce', 'command', 'exec', 'fix', 'inject', 'xxe', 'sql', 'overstep', 'upload',
                        'security', 'vulnerability', 'bypass', '修复', '越权', '漏洞', '命令执行', '权限', '绕过', '注入',
                        '后门', '安全', 'CVE-']

    def check_keyword(self, text: str):
        word_list = text.lower().split(' ')
        if any(word in word_list for word in self.keyword):
            return GithubKeywordMatcherLevel.HIGH
        else:
            return GithubKeywordMatcherLevel.LOW


class GithubBuiler:
    pass


class GithubCommitBuiler(GithubBuiler):
    name = 'commit builder'

    def __init__(self, monitor_id, commit_sha):
        super(GithubCommitBuiler, self).__init__()
        self.github_commit = GithubCommit(monitor_id, commit_sha)
        self.keyword_matcher = GithubKeywordMatcher()

    def _date_format(self, date):
        return int(time.mktime(time.strptime(datetime.datetime.strftime(
            datetime.datetime.strptime(date, "%Y-%m-%dT%H:%M:%S%z") + datetime.timedelta(hours=8), "%Y-%m-%d %H:%M:%S"),
                                             "%Y-%m-%d %H:%M:%S")))

    def build_branch(self, branch_json):
        if len(branch_json) > 0:
            for branch in branch_json:
                if branch['commit']['sha'] == self.github_commit.commit_sha:
                    self.github_commit.commit_branch = branch['name']
        else:
            self.github_commit.commit_branch = '-'

    def build_level(self):
        self.github_commit.commit_level = self.keyword_matcher.check_keyword(self.github_commit.commit_message)

    def build_commit(self, ret_json: dict):
        self.github_commit.commit_url = ret_json['html_url']
        self.github_commit.commit_message = ret_json['commit']['message']
        self.github_commit.commit_author_name = ret_json['commit']['author']['name']
        self.github_commit.commit_author_date = self._date_format(ret_json['commit']['author']['date'])
        self.github_commit.commit_committer_name = ret_json['commit']['committer']['name']
        self.github_commit.commit_committer_date = self._date_format(ret_json['commit']['committer']['date'])

    @property
    def build_info(self):
        return {'commit_sha': self.github_commit.commit_sha, 'monitor_id': self.github_commit.monitor_id,
                'commit_url': self.github_commit.commit_url, 'commit_message': self.github_commit.commit_message,
                'commit_author_name': self.github_commit.commit_author_name,
                'commit_author_date': self.github_commit.commit_author_date,
                'commit_committer_name': self.github_commit.commit_committer_name,
                'commit_committer_date': self.github_commit.commit_committer_date,
                'commit_branch': self.github_commit.commit_branch, 'commit_level': self.github_commit.commit_level,
                'is_visited': 0}

    def build_mail_content(self, target):
        data = '''------------------------\n'''
        data += '仓库:' + str(target) + '\n'
        for index, value in self.build_info.items():
            if index == 'commit_sha' or index == 'monitor_id' or index == 'is_visited':
                continue
            if index == 'commit_url':
                data += '地址:' + str(value) + '\n'
            elif index == 'commit_message':
                # message_value = str(value)[:10] if len(str(value)) > 10 else str(value)
                data += '信息:' + str(value) + '\n'
            elif index == 'commit_author_name':
                data += '作者名称:' + str(value) + '\n'
            elif index == 'commit_author_date':
                data += '作者时间:' + str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(value))) + '\n'
            elif index == 'commit_committer_name':
                data += '提交者名称:' + str(value) + '\n'
            elif index == 'commit_committer_date':
                data += '提交时间:' + str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(value))) + '\n'
            elif index == 'commit_branch':
                data += '分支:' + str(value) + '\n'
            elif index == 'commit_level':
                vuln_level = '高' if value == 1 else '低'
                data += '风险程序:' + vuln_level + '\n'
        return data


class GithubIssueBuiler(GithubBuiler):
    name = 'issue builder'

    def __init__(self, monitor_id, issue_id):
        super(GithubIssueBuiler, self).__init__()
        self.github_issue = GithubIssue(monitor_id, issue_id)
        self.keyword_matcher = GithubKeywordMatcher()

    def _date_format(self, date):
        return int(time.mktime(time.strptime(datetime.datetime.strftime(
            datetime.datetime.strptime(date, "%Y-%m-%dT%H:%M:%S%z") + datetime.timedelta(hours=8), "%Y-%m-%d %H:%M:%S"),
                                             "%Y-%m-%d %H:%M:%S")))

    def build_level(self):
        self.github_issue.issue_level = self.keyword_matcher.check_keyword(self.github_issue.issue_body) or self.keyword_matcher.check_keyword(self.github_issue.issue_title)

    def build_issue(self, ret_json: dict):
        self.github_issue.issue_url = ret_json['html_url']
        self.github_issue.issue_create_user = ret_json['user']['login']
        self.github_issue.issue_create_date = self._date_format(ret_json['created_at'])
        self.github_issue.issue_status = ret_json['state']
        self.github_issue.issue_title = ret_json['title']
        self.github_issue.issue_body = ret_json['body']
        self.github_issue.type = 'pr' if ret_json['node_id'].startswith('PR') else 'issue'

    @property
    def build_info(self):
        return {'issue_id': self.github_issue.issue_id, 'monitor_id': self.github_issue.monitor_id,
                'issue_url': self.github_issue.issue_url, 'issue_create_user': self.github_issue.issue_create_user,
                'issue_create_date': self.github_issue.issue_create_date,
                'issue_status': self.github_issue.issue_status,
                'issue_level': self.github_issue.issue_level,
                'issue_title': self.github_issue.issue_title,
                'issue_body': self.github_issue.issue_body[:20] if len(self.github_issue.issue_body) > 20 else self.github_issue.issue_body,
                'type': self.github_issue.type,
                'is_visited': 0}

    def build_mail_content(self, target):
        data = '''------------------------\n'''
        data += '仓库:' + str(target) + '\n'
        for index, value in self.build_info.items():
            if index == 'issue_id' or index == 'monitor_id' or index == 'is_visited':
                continue
            if index == 'issue_url':
                data += 'issue地址:' + str(value) + '\n'
            elif index == 'type':
                data += '类型:' + str(value) + '\n'
            elif index == 'issue_title':
                data += '标题:' + str(value) + '\n'
            elif index == 'issue_body':
                data += '内容:' + str(value) + '\n'
            elif index == 'issue_create_user':
                data += '提交用户:' + str(value) + '\n'
            elif index == 'issue_create_date':
                data += '提交时间:' + str(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(value))) + '\n'
            elif index == 'issue_status':
                data += '状态:' + str(value) + '\n'
            elif index == 'issue_level':
                vuln_level = '高' if value == 1 else '低'
                data += '风险程序:' + vuln_level + '\n'
        return data


class GithubCommitMonitor(GithubMonitor):
    def __init__(self):
        super(GithubCommitMonitor, self).__init__()
        self.commit_query = 'committer-date:>{}+repo:{}'

    def check_repeat(self, exists_commit_result, current_sha):
        return not any(commit['commit_sha'] == current_sha for commit in exists_commit_result)

    async def get_commits(self, monitor_id):
        commit_list = list()
        try:
            self.db_conn.update_last_update_time(int(time.time()), monitor_id)
            session_f, headers = await self.verify_api_available()
            monitor_task = self.db_conn.get_monitor_task_by_monitorid(monitor_id)
            if monitor_task:
                target = monitor_task['target']
                commit_query = self.commit_query.format(self.time_format(), target)
                exists_commit_list = self.db_conn.get_commit_by_monitorid(monitor_id, 'commit_sha')
                async with aiohttp.ClientSession() as session:
                    ret_json = await self.get_fetch_json(session=session, url=self.commit_addr.format(commit_query), headers=headers)
                    attr_dict = AttributeDict(ret_json)
                    if attr_dict.total_count > 0:
                        current_request_count = 0
                        for item in attr_dict.get('items'):
                            if current_request_count >= self.request_limit:
                                break
                            item_dict = AttributeDict(item)
                            if self.check_repeat(exists_commit_list, item_dict.sha):
                                commit_json = await self.get_fetch_json(session=session, url=item_dict.url, headers=headers)
                                gmb = GithubCommitBuiler(monitor_id, commit_json['sha'])
                                gmb.build_commit(commit_json)
                                branch_json = await self.get_fetch_json(session=session, url=self.branch_addr.format(target), headers=headers)
                                gmb.build_branch(branch_json)
                                gmb.build_level()
                                self.db_conn.insert_commit(gmb.build_info)
                                current_request_count += 1
                                commit_list.append(gmb.build_mail_content(target))
        except GithubGrammarError as e:
            print(e.__str__())
        except GithubLimitError as e:
            print(e.__str__())
        except Exception as e:
            print(e.__str__())
        finally:
            self.db_conn.update_next_update_time(int(time.time()) + 60, monitor_id)
            return commit_list

    async def monitor(self, monitor_id):
        return await self.get_commits(monitor_id)

    async def all_monitor(self):
        task_list = []
        mointor_tasks = self.db_conn.get_monitor_task_by_monitorid()
        for task in mointor_tasks:
            task_list.append(asyncio.create_task(self.get_commits(task['monitor_id'])))
        return await asyncio.gather(*task_list)


class GithubIssuePrMonitor(GithubMonitor):
    def __init__(self):
        super(GithubIssuePrMonitor, self).__init__()
        self.issue_query = 'created:>{}+repo:{}'

    def check_repeat(self, exists_issue_result, current_id):
        return not any(issue['issue_id'] == current_id for issue in exists_issue_result)

    async def get_issues(self, monitor_id):
        issue_list = list()
        try:
            self.db_conn.update_last_update_time(int(time.time()), monitor_id)
            session_f, headers = await self.verify_api_available()
            monitor_task = self.db_conn.get_monitor_task_by_monitorid(monitor_id)
            if monitor_task:
                target = monitor_task['target']
                issue_query = self.issue_query.format(self.time_format(), target)
                exists_issue_list = self.db_conn.get_issue_by_monitorid(monitor_id, 'issue_id')
                async with aiohttp.ClientSession() as session:
                    ret_json = await self.get_fetch_json(session=session, url=self.issue_addr.format(issue_query), headers=headers)
                    attr_dict = AttributeDict(ret_json)
                    if attr_dict.total_count > 0:
                        current_request_count = 0
                        for item in attr_dict.get('items'):
                            if current_request_count >= self.request_limit:
                                break
                            item_dict = AttributeDict(item)
                            if self.check_repeat(exists_issue_list, item_dict.id):
                                gib = GithubIssueBuiler(monitor_id, item_dict.id)
                                gib.build_issue(item)
                                gib.build_level()
                                self.db_conn.insert_issue(gib.build_info)
                                current_request_count += 1
                                issue_list.append(gib.build_mail_content(target))
        except GithubGrammarError as e:
            print(e.__str__())
        except GithubLimitError as e:
            print(e.__str__())
        except Exception as e:
            print(e.__str__())
        finally:
            self.db_conn.update_next_update_time(int(time.time()) + get_celery_conf()['schedule'], monitor_id)
            return issue_list

    async def monitor(self, monitor_id):
        return await self.get_issues(monitor_id)

    async def all_monitor(self):
        task_list = []
        mointor_tasks = self.db_conn.get_monitor_task_by_monitorid()
        for task in mointor_tasks:
            task_list.append(asyncio.create_task(self.get_issues(task['monitor_id'])))
        return await asyncio.gather(*task_list)


if __name__ == '__main__':
    g = GithubIssuePrMonitor()
    asyncio.get_event_loop().run_until_complete(g.monitor('b77d935349fa074c50ec98673858d6a'))

posted @ 2023-01-03 15:16  zpchcbd  阅读(302)  评论(0编辑  收藏  举报