diff options
author | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2023-03-30 16:33:30 -0400 |
---|---|---|
committer | Konstantin Ryabitsev <konstantin@linuxfoundation.org> | 2023-03-30 16:33:30 -0400 |
commit | 5b903b6d1469d211a19966cf893972111fec280d (patch) | |
tree | 4039232cc32168e205d207031b0a0f528f2fef86 | |
download | bugspray-5b903b6d1469d211a19966cf893972111fec280d.tar.gz |
Initial commit
This is the initial commit of "minimal viable product" as startups like
to say. Documentation is going to be forthcoming -- for now we're
running initial burn-in tests and finding the largest bugs.
Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r-- | .gitignore | 14 | ||||
-rw-r--r-- | default.config.toml | 80 | ||||
-rwxr-xr-x | peebz.sh | 9 | ||||
-rw-r--r-- | peebz/__init__.py | 904 | ||||
-rw-r--r-- | peebz/bz2pi.py | 148 | ||||
-rw-r--r-- | peebz/command.py | 157 | ||||
-rw-r--r-- | peebz/parse.py | 198 | ||||
-rw-r--r-- | peebz/pi2bz.py | 189 | ||||
-rw-r--r-- | requirements.txt | 5 |
9 files changed, 1704 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..5e5f255 --- /dev/null +++ b/.gitignore @@ -0,0 +1,14 @@ +*.swp +*.pyc +*.pyo +*.json +*.pdf +test.log +build/* +dist/* +MANIFEST +.idea +__pycache__ +*.egg-info +*.sqlite3 +.venv diff --git a/default.config.toml b/default.config.toml new file mode 100644 index 0000000..a490f19 --- /dev/null +++ b/default.config.toml @@ -0,0 +1,80 @@ +[db] +dburl = 'sqlite:////home/user/work/temp/peebz.sqlite3' + +[notify] +neverto = ['*@kernel-bugs.*', 'bugbot@kernel.org'] +never_if_creator = ['bugbot@kernel.org'] +never_if_text_matches = ['*has been marked as a duplicate of*'] +fromaddr = 'Kernel.org Bugbot <bugbot@kernel.org>' +smtpserver = 'localhost' +smtpport = 25 + +[bugzilla] +apikey = '<omitted>' +name = 'Kernel.org Bugzilla' +url = 'https://bugzilla.kernel.org' +resturl = 'https://bugzilla.kernel.org/rest' +bugmask = 'https://bugzilla.kernel.org/show_bug.cgi?id={bug_id}' +commentmask = 'https://bugzilla.kernel.org/show_bug.cgi?id={bug_id}#c{comment_count}' +attachmask = 'https://bugzilla.kernel.org/attachment.cgi?id={attachment_id}' +linkmask = 'https://msgid.link/{msgid}' +subsystem_cf = 'cf_subsystem' +maintainers_url = 'https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/MAINTAINERS' + +[mimetypes] +deny = [ 'text/html', 'application/*-signature' ] + +[logging] +logfile = 'peebz.log' +loglevel = 'info' + +[components.'Linux'.'Kernel'] +new_bug_send_notification = true +pi_query = '(nq:"bugbot on" OR nq:"bugbot assign")' +pi_must_bz_groups = ['editbugs'] +pi_url = 'https://lore.kernel.org/all/' +pi_assign_regex = '^bugbot assign to (\S+)' +bz_new_bugs_quicksearch = 'OPEN flag:bugbot+' +alwayscc = ['bugs@lists.linux.dev'] + +[templates] +parse_bug_intro = '${author} writes:' +parse_bug_intro_with_count = '${author} replies to comment #${comment_count}:' +parse_bug_outro = '(via ${msgid_link})' + +new_bug_notify = ''' +Hello: + +This conversation is now tracked by ${bzname}: +${bug_url} + +There is no need to do anything else, just keep talking. +''' + +new_comment_notify = ''' +${comment_author} writes via ${bzname}: + +${comment_text} + +View: ${comment_url} +You can reply to this message to join the discussion. +''' + +new_attachment_notify = ''' +${comment_author} added a new attachment via ${bzname}. +You can download it by following the link below. + +File: ${file_name} (${content_type}) +Size: ${human_size} +Link: ${attachment_url} +--- +${summary} + +You can reply to this message to join the discussion. +''' + +botsig = ''' +-- +Deet-doot-dot, I am a bot. +${myname} (${appname} ${appver}) +''' diff --git a/peebz.sh b/peebz.sh new file mode 100755 index 0000000..f3809dc --- /dev/null +++ b/peebz.sh @@ -0,0 +1,9 @@ +#!/usr/bin/env bash +# +# Run from a git checkout. +# + +REAL_SCRIPT=$(realpath -e ${BASH_SOURCE[0]}) +SCRIPT_TOP="${SCRIPT_TOP:-$(dirname ${REAL_SCRIPT})}" + +exec env PYTHONPATH="${SCRIPT_TOP}" python3 "${SCRIPT_TOP}/peebz/command.py" "${@}" diff --git a/peebz/__init__.py b/peebz/__init__.py new file mode 100644 index 0000000..f2d2024 --- /dev/null +++ b/peebz/__init__.py @@ -0,0 +1,904 @@ +#!/usr/bin/env python3 +import email +import email.message +import email.utils +import email.parser +import requests +import logging +import base64 +import b4 +import sys +import uuid +import datetime + +import urllib.parse + +import sqlalchemy as sa + +from typing import Dict, Tuple, List, Set, Optional +from fnmatch import fnmatch +from string import Template + +from sqlalchemy.exc import NoSuchTableError + +# Policy we use for saving mail locally +emlpolicy = email.policy.EmailPolicy(utf8=True, cte_type='8bit', max_line_length=None, + message_factory=email.message.EmailMessage) + +REQSESSION = None +CONFIG = dict() + +__APPNAME__ = 'peebz' +__VERSION__ = '0.1' + +__DBSCHEMA__ = 1 + +logger = logging.getLogger(__APPNAME__) +b4.logger = logger + +SACONN = None +SAENGINE = None +REST_CACHE = dict() +MAINT_CACHE = dict() + + +def get_msg_from_stdin() -> Optional[email.message.EmailMessage]: + if not sys.stdin.isatty(): + msg = email.parser.BytesParser(policy=emlpolicy).parsebytes(sys.stdin.buffer.read()) + return msg # noqa + return None + + +def get_requests_session(): + global REQSESSION + if REQSESSION is None: + REQSESSION = requests.session() + REQSESSION.headers.update({'User-Agent': f'{__APPNAME__}/{__VERSION__}'}) + return REQSESSION + + +def get_config() -> Dict: + return CONFIG + + +def get_component_config(product: str, component: str) -> Dict: + config = get_config() + return config['components'][product][component] + + +def get_template_by_bid(tptname: str, bid: int) -> Template: + product, component = bz_get_product_component_by_bid(bid) + return get_template_by_product_component(tptname, product, component) + + +def get_template_by_product_component(tptname: str, product: str, component: str) -> Template: + config = get_config() + try: + return Template(config['components'][product][component]['templates'][tptname]) + except KeyError: + pass + return Template(config['templates'][tptname]) + + +def get_msgid_link(msgid: str) -> str: + config = get_config() + linkmask = config['bugzilla'].get('linkmask', 'https://lore.kernel.org/{msgid}') + return linkmask.format(msgid=msgid) + + +def bz_rest(path: str, payload: Dict = None, params: Dict = None, method: str = 'GET') -> Dict: + global REST_CACHE + # We only cache GETs without any params + if method == 'GET' and not params and path in REST_CACHE: + logger.debug('Using cached data') + return REST_CACHE[path] + + config = get_config() + url = '{BZURL}/{path}'.format(BZURL=config['bugzilla'].get('resturl').rstrip('/'), path=path) + logger.debug('Querying url=%s', url) + myparams = dict() if params is None else dict(params) + myparams['api_key'] = config['bugzilla'].get('apikey', '') + ses = get_requests_session() + if method == 'GET': + res = ses.get(url, params=myparams) + elif method == 'POST': + res = ses.post(url, params=myparams, json=payload) + elif method == 'PUT': + res = ses.put(url, params=myparams, json=payload) + else: + logger.critical('CRITICAL: Unknown method=%s', method) + raise RuntimeError('Unknown method %s' % method) + + # logger.debug('res=%s', res.text) + if res.status_code == 404: + raise LookupError('Bugzilla returned 404: %s' % res.text) + + # for every other error, we just BT for now + res.raise_for_status() + rdata = res.json() + if method == 'GET' and not params: + # Cache it + REST_CACHE[path] = rdata + elif method != 'GET': + # We changed something, so nuke our cache + logger.debug('Clearing cache') + REST_CACHE = dict() + + return rdata + + +def bz_add_atts_to_bug(bid: int, atts: List[Dict]) -> List[int]: + aids = list() + for att in atts: + att['ids'] = [bid] + rdata = bz_rest(f'bug/{bid}/attachment', att, method='POST') + logger.debug('Created new attachment %s', ', '.join(rdata['ids'])) + aids += rdata['ids'] + return aids + + +def bz_add_new_bug(payload: Dict) -> Tuple[int, int]: + if 'version' not in payload: + payload['version'] = 'unspecified' + rdata = bz_rest('bug', payload, method='POST') + bid = rdata['id'] + logger.debug('Created new bug %s', bid) + # Apparently, we don't get comment-id info from new bug creation + cid = bz_get_cid_by_bid_count(bid) + return bid, cid + + +def bz_add_new_comment(bid: int, comment: str) -> int: + payload = { + 'id': bid, + 'comment': comment + } + rdata = bz_rest(f'bug/{bid}/comment', payload, method='POST') + cid = rdata['id'] + logger.debug('Created new comment %s', cid) + return cid + + +def bz_get_bug(bid: int, resolve_dupes=False) -> Dict: + if resolve_dupes: + bid = bz_dedupe_bid(bid) + + path = f'bug/{bid}' + rdata = bz_rest(path) + for bdata in rdata['bugs']: + if bdata['id'] == bid: + return bdata + raise RuntimeError('Could not get bug info for %s' % bid) + + +def bz_get_user(username: str) -> Dict: + path = f'user/{username}' + try: + rdata = bz_rest(path) + for udata in rdata['users']: + if udata['name'] == username: + return udata + except LookupError: + pass + + raise LookupError('Could not get user info for %s' % username) + + +def bz_get_user_groups(username: str) -> Set: + udata = bz_get_user(username) + return set([x['name'] for x in udata['groups']]) + + +def bz_get_cid_by_bid_count(bid: int, count: int = 0) -> int: + bdata = bz_rest(f'bug/{bid}/comment') + for rbid, rbdata in bdata['bugs'].items(): + if int(rbid) != bid: + continue + for comment in rbdata['comments']: + if comment['count'] == count: + logger.debug('cid for %s/c%s is %s', bid, count, comment['id']) + return comment['id'] + raise LookupError('No cid matching bid=%s count=%s' % (bid, count)) + + +def bz_get_count_by_bid_cid(bid: int, cid: int) -> int: + bdata = bz_rest(f'bug/{bid}/comment') + for rbid, rbdata in bdata['bugs'].items(): + if int(rbid) != bid: + continue + for comment in rbdata['comments']: + if comment['id'] == cid: + logger.debug('count for %s/%s is c%s', bid, cid, comment['count']) + return comment['count'] + raise LookupError('No match for bid=%s cid=%s', bid, cid) + + +def bz_dedupe_bid(bid: int) -> int: + bdata = bz_get_bug(bid) + if bdata.get('dupe_of'): + # Nothing wrong with recursion + return bz_dedupe_bid(bdata['dupe_of']) + return bid + + +def bz_check_user_allowed(uid: str, product: str, component: str) -> bool: + cconf = get_component_config(product, component) + mustgroups = cconf.get('pi_must_bz_groups') + if mustgroups is None: + # No restrictions, anyone can do anything they like + return True + try: + udata = bz_get_user(uid) + except LookupError: + logger.debug('Could not find user %s in bugzilla', uid) + return False + + for mustgroup in mustgroups: + for ugroup in udata.get('groups', list()): + if ugroup['name'] == mustgroup: + logger.debug('%s mustgroup matches %s', uid, mustgroup) + return True + + logger.debug('%s not member of %s', uid, mustgroups) + return False + + +def bz_assign_bug(bid: int, uid: str) -> None: + logger.info('Assigning bug %s to %s', bid, uid) + path = f'bug/{bid}' + payload = { + 'assigned_to': uid, + } + bz_rest(path, payload=payload, method='PUT') + + +def db_get_query_last_check(product: str, component: str) -> str: + key = f'query_{product}_{component}' + return db_get_meta_value(key) + + +def db_store_query_last_check(product: str, component: str, last_check: str) -> None: + key = f'query_{product}_{component}' + return db_store_meta_value(key, last_check) + + +def db_get_msgid_by_bid_cid(bid: int, cid: Optional[int]) -> str: + engine, dbconn = db_get_sa() + md = sa.MetaData() + t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine) + if cid: + q = sa.select([t_bmap.c.message_id]).where(t_bmap.c.bug_id == bid, t_bmap.c.comment_id == cid) + else: + # If cid is not defined, we get all mappings and use the lowest cid + q = sa.select([t_bmap.c.message_id]).where(t_bmap.c.bug_id == bid).order_by(t_bmap.c.comment_id) + + rp = dbconn.execute(q) + fa = rp.fetchall() + if len(fa): + logger.debug('query results for bid=%s, cid=%s: %s', bid, cid, fa) + return fa[0][0] + + raise LookupError('No message-id matching bid=%s, cid=%s' % (bid, cid)) + + +def db_get_bid_cid_by_msgid(msgid: str) -> Tuple[int, Optional[int]]: + engine, dbconn = db_get_sa() + md = sa.MetaData() + t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine) + logger.debug('Querying db for msgid=%s', msgid) + q = sa.select([t_bmap.c.bug_id, t_bmap.c.comment_id]).where(t_bmap.c.message_id == msgid) + rp = dbconn.execute(q) + fa = rp.fetchall() + if not len(fa): + raise LookupError('msgid %s not known' % msgid) + bid, cid = fa[0] + dbid = bz_dedupe_bid(bid) + if dbid != bid: + cid = None + logger.debug(' matching bid=%s, cid=%s', bid, cid) + return bid, cid + + +def db_store_msgid_bid_cid(msgid: str, bid: int, cid: int) -> None: + msgid = msgid.strip('<>') + engine, dbconn = db_get_sa() + md = sa.MetaData() + t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine) + q = sa.insert(t_bmap).values(message_id=msgid, bug_id=bid, comment_id=cid) + dbconn.execute(q) + logger.info('Created new mapping for %s: %s/%s', msgid, bid, cid) + + +def db_get_recipients(bid: int) -> Set[str]: + engine, dbconn = db_get_sa() + md = sa.MetaData() + t_recip = sa.Table('recipients', md, autoload=True, autoload_with=engine) + logger.debug('Querying recipients for bid=%s', bid) + q = sa.select([t_recip.c.email]).where(t_recip.c.bug_id == bid) + rp = dbconn.execute(q) + fa = rp.fetchall() + if not len(fa): + raise LookupError('bid %s not known' % bid) + return set(x[0] for x in fa) + + +def db_store_recipients(bid: int, recipients: Set[str]) -> None: + # TODO: add ability to unsubscribe? + try: + stored = db_get_recipients(bid) + except LookupError: + stored = set() + # Any new ones to store? + extras = recipients - stored + if not extras: + return + + engine, dbconn = db_get_sa() + md = sa.MetaData() + t_recip = sa.Table('recipients', md, autoload=True, autoload_with=engine) + logger.debug('Storing new recipients for bid=%s', bid) + for addr in extras: + q = sa.insert(t_recip).values(bug_id=bid, email=addr) + dbconn.execute(q) + logger.debug(' Added %s', addr) + return + + +def db_get_meta_value(key: str) -> str: + engine, dbconn = db_get_sa() + md = sa.MetaData() + t_meta = sa.Table('meta', md, autoload=True, autoload_with=engine) + q = sa.select([t_meta.c.var_value]).where(t_meta.c.var_key == key) + rp = dbconn.execute(q) + fa = rp.fetchall() + if not len(fa): + raise LookupError('meta key %s not known' % key) + return fa[0][0] + + +def db_store_meta_value(key: str, value: str) -> None: + engine, dbconn = db_get_sa() + md = sa.MetaData() + t_meta = sa.Table('meta', md, autoload=True, autoload_with=engine) + with engine.begin() as dbconn: + q = sa.delete(t_meta).where(t_meta.c.var_key == key) + dbconn.execute(q) + q = sa.insert(t_meta).values(var_key=key, var_value=value) + dbconn.execute(q) + + +def db_store_notify_last_check(bid: int, when: str): + key = f'notify_bug_{bid}' + return db_store_meta_value(key, when) + + +def db_get_notify_last_check(bid: int) -> str: + key = f'notify_bug_{bid}' + return db_get_meta_value(key) + + +def bz_get_changed_bugs(since: str, include_untracked: bool = False) -> List: + logger.debug('Querying for changed bugs since %s', since) + params = { + 'chfieldfrom': since, + 'include_fields': 'id,summary', + } + rdata = bz_rest('bug', params=params) + if include_untracked: + return rdata['bugs'] + bids = tuple([x['id'] for x in rdata['bugs']]) + engine, dbconn = db_get_sa() + md = sa.MetaData() + t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine) + q = sa.select([t_bmap.c.bug_id]).where(t_bmap.c.bug_id.in_(bids)).distinct() + rp = dbconn.execute(q) + fa = rp.fetchall() + if not fa: + return list() + tracked = set([x[0] for x in fa]) + bugs = list() + for bdata in rdata['bugs']: + if bdata['id'] in tracked: + bugs.append(bdata) + + return bugs + + +def bz_quicksearch_bugs(query: str) -> Dict: + params = { + 'include_fields': 'id,summary', + 'quicksearch': query, + } + return bz_rest('bug', params=params) + + +def bz_get_query_bugs(params: Dict, exclude: Set[int]) -> List[int]: + if 'include_fields' not in params: + params['include_fields'] = 'id,summary' + rdata = bz_rest('bug', params=params) + bids = list() + for bdata in rdata['bugs']: + if bdata['id'] in exclude: + continue + bids.append(bdata['id']) + return bids + + +def get_human_size(size: int, decimals: int = 2) -> str: + units = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB'] + while True: + unit = units.pop(0) + if size < 1024.0 or not len(units): + break + size /= 1024.0 + + return f'{size:.{decimals}f} {unit}' + + +def bz_get_attachment_by_aid(aid: int, include_fields: Optional[str] = None) -> Dict: + if include_fields is None: + rdata = bz_rest(f'bug/attachment/{aid}') + else: + params = { + 'include_fields': include_fields, + } + rdata = bz_rest(f'bug/attachment/{aid}', params=params) + + for raid, radata in rdata['attachments'].items(): + if int(raid) == aid: + if 'size' in radata: + radata['human_size'] = get_human_size(radata['size']) + return radata + + raise LookupError('No matching attachment_id found: %s', aid) + + +def bz_get_all_comments_by_bid(bid: int) -> List[Dict]: + bdata = bz_rest(f'bug/{bid}/comment') + for rbid, rbdata in bdata['bugs'].items(): + if int(rbid) != bid: + continue + return rbdata['comments'] + + +def bz_get_comments_for_bid_since(bid: int, since: str) -> List[Dict]: + params = { + 'new_since': since, + 'id': bid, + } + bdata = bz_rest(f'bug/{bid}/comment', params=params) + for rbid, rbdata in bdata['bugs'].items(): + if int(rbid) != bid: + continue + return rbdata['comments'] + + +def bz_get_newest_comments_for_bid(bid: int, include_private: bool = False) -> List[Dict]: + try: + when = db_get_notify_last_check(bid) + except LookupError: + # grab the highest cid we know about + engine, dbconn = db_get_sa() + md = sa.MetaData() + t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine) + q = sa.select(sa.func.max(t_bmap.c.comment_id)).where(t_bmap.c.bug_id == bid) + rp = dbconn.execute(q) + mfa = rp.fetchall() + c_max = None + if mfa: + c_max = mfa[0][0] + cdatas = bz_get_all_comments_by_bid(bid) + comments = list() + for cdata in cdatas: + if c_max and cdata['id'] <= c_max: + continue + if not include_private and cdata['is_private']: + continue + comments.append(cdata) + return comments + + logger.debug('Getting newest comments since %s', when) + cdatas = bz_get_comments_for_bid_since(bid, when) + comments = list() + for cdata in cdatas: + if not include_private and cdata['is_private']: + continue + comments.append(cdata) + return comments + + +def msg_get_inre_msgids(msg: email.message.EmailMessage) -> List[str]: + pairs = list() + if msg.get('In-Reply-To'): + pairs += email.utils.getaddresses([str(x) for x in msg.get_all('in-reply-to', [])]) + if msg.get('References'): + pairs += email.utils.getaddresses([str(x) for x in msg.get_all('references', [])]) + + msgids = list() + for pair in pairs: + if pair[1] not in msgids: + msgids.append(pair[1]) + + return msgids + + +def sort_msgs_by_received(msgs: List[email.message.EmailMessage]) -> List[email.message.EmailMessage]: + tosort = list() + for msg in msgs: + latest_rdt = None + for rhdr in msg.get_all('Received'): + # The received headers are pretty free-form, but generally will end with ; datetimeinfo + chunks = rhdr.rsplit(';', maxsplit=1) + if len(chunks) < 2: + continue + rdate = chunks[1].strip() + rdt = email.utils.parsedate_tz(rdate) + if rdt is None: + continue + # use the latest rdt + if not latest_rdt or latest_rdt < rdt: + latest_rdt = rdt + latest_rdt = None + if not latest_rdt and msg.get('Date'): + # Use the Date header as fallback + latest_rdt = email.utils.parsedate_to_datetime(msg.get('Date')) + if not latest_rdt: + logger.debug('Message without a date: %s!', msg.get('Message-ID')) + continue + tosort.append((latest_rdt, msg)) + + sortedmsgs = list() + for rdt, msg in sorted(tosort): + sortedmsgs.append(msg) + + return sortedmsgs + + +def msg_get_inre_bid_cid(msg: email.message.EmailMessage) -> Tuple[int, int]: + msgids = msg_get_inre_msgids(msg) + if not msgids: + logger.debug('No references in msg') + raise LookupError('No references found') + + res = list() + for msgid in msgids: + try: + res.append(db_get_bid_cid_by_msgid(msgid)) + except LookupError: + # Try the next one until we run out + pass + if not res: + logger.debug('No reference matched database records') + raise LookupError('Nothing matching in the db') + + if len(res) > 1: + # Use the highest cid, which is going to be the latest matching comment + res.sort(key=lambda x: x[1], reverse=True) + return res[0] + + +def msg_get_recipients(msg: email.message.EmailMessage) -> Set[str]: + pairs = list() + if msg.get('To'): + pairs += email.utils.getaddresses([str(x) for x in msg.get_all('to', [])]) + if msg.get('Cc'): + pairs += email.utils.getaddresses([str(x) for x in msg.get_all('cc', [])]) + if msg.get('From'): + pairs += email.utils.getaddresses([str(x) for x in msg.get_all('from', [])]) + + return set([x[1].lower() for x in pairs]) + + +def bz_get_product_component_by_bid(bid: int) -> Tuple[str, str]: + bdata = bz_get_bug(bid, resolve_dupes=True) + return bdata['product'], bdata['component'] + + +def get_product_component_by_recipients(recipients: Set[str]) -> Tuple[str, str]: + config = get_config() + if not config.get('components'): + logger.debug('No components found in config') + raise LookupError('No components defined in config') + for recipient in recipients: + logger.debug('Matching %s', recipient) + for bz_product, bz_components in config['components'].items(): + for bz_component, c_config in bz_components.items(): + if not c_config.get('recipients'): + continue + for addr in c_config['recipients']: + if fnmatch(recipient, addr): + logger.debug('Matched %s with product=%s, component=%s', recipient, bz_product, bz_component) + return bz_product, bz_component + + raise LookupError('No matches for any recipients') + + +def get_newbug_payload_by_product_component(product: str, component: str) -> Dict: + config = get_config() + try: + payload = config['components'][product][component]['payload'] + except KeyError: + payload = dict() + payload['product'] = product + payload['component'] = component + return payload + + +def msg_get_author(msg: email.message.EmailMessage) -> Tuple[str, str]: + author = ('', 'missing@address.local') + fh = msg.get('from') + if fh: + author = email.utils.getaddresses([fh])[0] + + if not author[0]: + return 'Zorro Boogs', author[1] + + return author + + +def msg_parse_for_bug(msg: email.message.EmailMessage) -> Tuple[str, Tuple[str, str], str, str, List[Dict]]: + msgid = b4.LoreMessage.get_clean_msgid(msg) + mp = msg.get_body(preferencelist=('plain',)) + bbody = mp.get_payload(decode=True) + cs = mp.get_content_charset() + if not cs: + cs = 'utf-8' + cpay = bbody.decode(cs, errors='replace') + # Strip signature if we find it + chunks = cpay.rsplit('-- ', maxsplit=1) + cbody = chunks[0] + lsub = b4.LoreSubject(msg.get('Subject', '')) + subject = lsub.subject + atts = msg_get_valid_attachments(msg) + author = msg_get_author(msg) + + return msgid, author, subject, cbody, atts + + +def msg_get_valid_attachments(msg: email.message.EmailMessage) -> List[Dict]: + # Get all good attachments + config = get_config() + atts = list() + for part in msg.walk(): + if part.get_content_disposition() != 'attachment': + continue + ct = part.get_content_type() + mts = config.get('mimetypes') + allowed = True + if mts and 'deny' in mts: + for dmt in mts['deny']: + if fnmatch(ct, dmt): + logger.debug('Skipping denied mime-type attachement: %s', ct) + allowed = False + break + if allowed: + databytes = part.get_payload(decode=True) + data = base64.b64encode(databytes).decode() + filename = part.get_filename() + if not filename: + filename = 'unnamed.txt' + ct = 'text/plain' + summary = filename + payload = { + 'file_name': filename, + 'content_type': ct, + 'summary': summary, + 'data': data, + } + + atts.append(payload) + + return atts + + +def get_recipients_by_product_component(product: str, component: str) -> Set[str]: + recip = set() + config = get_config() + try: + recip.update(config['notify']['alwayscc']) + logger.debug('added global alwayscc: %s', config['notify']['alwayscc']) + except KeyError: + pass + try: + recip.update(config['components'][product][component]['alwayscc']) + logger.debug('added %s/%s alwayscc: %s', product, component, + config['components'][product][component]['alwayscc']) + except KeyError: + pass + return recip + + +def get_recipients_by_subsystem(subsystem: str) -> Set[str]: + global MAINT_CACHE + if not len(MAINT_CACHE): + config = get_config() + ses = get_requests_session() + murl = config['bugzilla']['maintainers_url'] + if murl.startswith('file://'): + with open(murl.replace('file://', ''), 'r') as fh: + mdata = fh.read() + else: + res = ses.get(config['bugzilla']['maintainers_url']) + res.raise_for_status() + mdata = res.text + lookfor = ['M:', 'L:'] + prevline = None + cur_sub = None + for line in mdata.splitlines(): + if len(line) < 2 or not len(line[0].strip()): + cur_sub = None + continue + if line[:2] not in lookfor: + prevline = line + continue + if not cur_sub: + cur_sub = prevline + addr = email.utils.parseaddr(line[2:]) + if cur_sub: + if cur_sub not in MAINT_CACHE: + MAINT_CACHE[cur_sub] = set() + MAINT_CACHE[cur_sub].add(addr[1]) + + try: + return MAINT_CACHE[subsystem] + except KeyError: + return set() + + +def get_bug_recipients(bid: int) -> Set[str]: + # Get all db-stored recipients + # TODO: implement "onlyto" + allrecip = set() + try: + allrecip.update(db_get_recipients(bid)) + except LookupError: + logger.debug('No in-database recipients for bid=%s', bid) + # Now get all bug cc recipients + bdata = bz_get_bug(bid, resolve_dupes=True) + bugr = set(bdata['cc']) + bugr.add(bdata['assigned_to']) + bugr.add(bdata['creator']) + bugr.update(get_recipients_by_product_component(bdata['product'], bdata['component'])) + allrecip.update(bugr) + config = get_config() + subsystem_cf = config['bugzilla'].get('subsystem_cf') + if subsystem_cf: + subr = get_recipients_by_subsystem(bdata[subsystem_cf]) + allrecip.update(subr) + + # Remove "neverto" addresses + for mask in config['notify'].get('neverto', list()): + for addr in set(allrecip): + if fnmatch(addr, mask): + logger.debug('Removed %s because it matched neverto=%s', addr, mask) + allrecip.remove(addr) + return allrecip + + +def make_msgid(bid: int, cid: Optional[int]) -> str: + config = get_config() + bzurl = config['bugzilla']['url'] + bzloc = urllib.parse.urlparse(bzurl) + slug = f'b{bid}' + if bid and cid: + count = bz_get_count_by_bid_cid(bid, cid) + slug += f'c{count}' + + msgid = '<%s-%s-%s@%s>' % (datetime.date.today().strftime('%Y%m%d'), slug, uuid.uuid4().hex[:12], bzloc.netloc) + return msgid + + +def notify_bug(bid: int, cid: Optional[int], msg: email.message.EmailMessage, inre_cid: Optional[int] = None, + dry_run: bool = False) -> str: + bdata = bz_get_bug(bid, resolve_dupes=True) + config = get_config() + if not msg.get('From'): + msg['From'] = config['notify'].get('fromaddr') + if not msg.get('To'): + recipients = get_bug_recipients(bid) + msg['To'] = b4.format_addrs([('', x) for x in recipients]) + if not msg.get('Message-ID'): + msg['Message-ID'] = make_msgid(bid, cid) + if not msg.get('In-Reply-To'): + inre_msgid = None + try: + inre_msgid = db_get_msgid_by_bid_cid(bid, inre_cid) + except LookupError: + logger.debug('Could not find msgid matching bid=%s, cid=%s', bid, inre_cid) + # Find anything + try: + inre_msgid = db_get_msgid_by_bid_cid(bid, None) + except LookupError: + pass + if inre_msgid: + msg['In-Reply-To'] = f'<{inre_msgid}>' + msg['References'] = f'<{inre_msgid}>' + if not msg.get('Subject'): + msg['Subject'] = 'Re: %s' % bdata['summary'] + else: + msg['Subject'] = bdata['summary'] + + msg['X-Bugzilla-Product'] = bdata['product'] + msg['X-Bugzilla-Component'] = bdata['component'] + msg['X-Mailer'] = f'{__APPNAME__} {__VERSION__}' + # Should we add other X-B headers? + # If we have notify.smtpserver and notify.smtpport defined, use that, + # otherwise use b4's get_smtp method + smtpserver = config['notify'].get('smtpserver') + smtpport = int(config['notify'].get('smtpport', '25')) + if smtpserver and smtpport: + import smtplib + smtp = smtplib.SMTP(smtpserver, smtpport) + else: + smtp, fromaddr = b4.get_smtp(dryrun=dry_run) + b4.send_mail(smtp, [msg], fromaddr=config['notify'].get('fromaddr'), dryrun=dry_run) + + return msg.get('Message-ID') + + +def add_bot_signature(body: str) -> str: + config = get_config() + sigtpt = Template(config['templates'].get('botsig')) + sigvals = { + 'myname': config['bugzilla'].get('name'), + 'appname': __APPNAME__, + 'appver': __VERSION__, + } + body += sigtpt.safe_substitute(sigvals) + return body + + +def db_get_sa() -> Tuple[sa.engine.Engine, sa.engine.Connection]: + global SACONN, SAENGINE + if SACONN is None: + config = get_config() + try: + dburl = config['db']['dburl'] + except KeyError: + raise LookupError('CRITICAL: main.dburl not set in config file') + + db_pool_recycle = int(config['db'].get('dbpoolrecycle', '300')) + SAENGINE = sa.create_engine(dburl, pool_recycle=db_pool_recycle) + SACONN = SAENGINE.connect() + if SAENGINE.driver == 'pysqlite': + md = sa.MetaData() + try: + t_meta = sa.Table('meta', md, autoload=True, autoload_with=SAENGINE) + q = sa.select([t_meta.c.var_value]).where(t_meta.c.var_key == 'schema') + rp = SACONN.execute(q) + dbver = rp.fetchone()[0] + # Future logic to upgrade database here + logger.debug('dbver=%s', dbver) + except NoSuchTableError: + db_init_sa_sqlite_db(SAENGINE, SACONN) + + return SAENGINE, SACONN + + +def db_init_sa_sqlite_db(engine: sa.engine.Engine, dbconn: sa.engine.Connection): + logger.info('Setting up SQLite database') + md = sa.MetaData() + meta = sa.Table('meta', md, + sa.Column('var_key', sa.Text()), + sa.Column('var_value', sa.Text()), + ) + sa.Index('idx_meta_key_value', meta.c.var_key, meta.c.var_value, unique=True) + bmap = sa.Table('msgid_bug_mapping', md, + sa.Column('row_id', sa.Integer(), primary_key=True), + sa.Column('bug_id', sa.Integer(), nullable=False), + sa.Column('comment_id', sa.Integer(), nullable=False), + sa.Column('message_id', sa.Text(), nullable=False), + ) + sa.Index('idx_msgid_bugid_commentid', bmap.c.message_id, bmap.c.bug_id, bmap.c.comment_id, unique=True) + sa.Index('idx_msgid_commentid', bmap.c.message_id, bmap.c.comment_id, unique=True) + sa.Index('idx_msgid_bugid', bmap.c.message_id, bmap.c.bug_id, unique=True) + + recip = sa.Table('recipients', md, + sa.Column('row_id', sa.Integer(), primary_key=True), + sa.Column('bug_id', sa.Integer(), nullable=False), + sa.Column('email', sa.Text(), nullable=False), + ) + sa.Index('idx_bugid_email', recip.c.bug_id, recip.c.email, unique=True) + md.create_all(engine) + q = sa.insert(meta).values(var_key='schema', var_value=str(__DBSCHEMA__)) + dbconn.execute(q) diff --git a/peebz/bz2pi.py b/peebz/bz2pi.py new file mode 100644 index 0000000..b78a648 --- /dev/null +++ b/peebz/bz2pi.py @@ -0,0 +1,148 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2023 by the Linux Foundation + +import argparse +import peebz +import datetime +import re +import b4 + +import email.message + +from fnmatch import fnmatch + +logger = peebz.logger + + +def process_new_comments(bid: int, dry_run: bool = False): + config = peebz.get_config() + cdatas = peebz.bz_get_newest_comments_for_bid(bid) + for cdata in cdatas: + # Check if we've already notified about this bug + cid = cdata['id'] + try: + peebz.db_get_msgid_by_bid_cid(bid, cid) + logger.debug('Skipping, msgid match for bid=%s, cid=%s', bid, cid) + continue + except LookupError: + pass + # Check if the creator is in never_if_creator + skip = False + for mask in config['notify'].get('never_if_creator', list()): + if fnmatch(cdata['creator'], mask): + logger.debug('Skipping cid=%s because it matched never_if_creator=%s', cid, mask) + skip = True + break + if skip: + continue + # Check if text is in never_if_text_matches + for mask in config['notify'].get('never_if_text_matches', list()): + if fnmatch(cdata['text'], mask): + logger.debug('Skipping cid=%s because it matched never_if_text_matches=%s', cid, mask) + skip = True + break + if skip: + continue + clines = cdata['text'].strip().splitlines() + inre_cid = None + bodyvals = { + 'bzname': config['bugzilla'].get('name'), + 'bug_url': config['bugzilla'].get('bugmask', '').format(bug_id=bid), + 'comment_url': config['bugzilla'].get('bugmask', '').format(bug_id=bid) + f"#c{cdata['count']}", + 'comment_author': cdata['creator'], + } + if cdata['attachment_id']: + logger.info('Processing new attachment for bug_id=%s, comment_id=%s', bid, cid) + adata = peebz.bz_get_attachment_by_aid( + cdata['attachment_id'], + include_fields='file_name,size,content_type,summary,is_patch,is_private', + ) + if adata['is_private']: + logger.debug('Skipping attachment marked private') + continue + bodytpt = peebz.get_template_by_bid('new_attachment_notify', bid) + bodyvals.update(adata) + bodyvals['attachment_url'] = config['bugzilla'].get('attachmask', '').format( + attachment_id=cdata['attachment_id']) + else: + logger.info('Processing new comment for bug_id=%s, comment_id=%s', bid, cid) + fline = clines[0] + matches = re.search(r'\(In reply to.*from comment #(\d+)', fline, flags=re.I) + if matches: + inre_count = int(matches.groups()[0]) + try: + inre_cid = peebz.bz_get_cid_by_bid_count(bid, inre_count) + except LookupError: + pass + bodyvals['comment_text'] = '\n'.join(clines) + bodytpt = peebz.get_template_by_bid('new_comment_notify', bid) + + msg = email.message.EmailMessage() + msg['Reply-To'] = b4.format_addrs([('', cdata['creator'])]) + body = bodytpt.safe_substitute(bodyvals) + body = peebz.add_bot_signature(body) + msg.set_payload(body, charset='utf-8') + msgid = peebz.notify_bug(bid, cid, msg, inre_cid=inre_cid, dry_run=dry_run) + if msgid and not dry_run: + peebz.db_store_msgid_bid_cid(msgid, bid, cid) + peebz.db_store_recipients(bid, {cdata['creator']}) + # TODO: This assumes that comments are always in incremental order + lastcheck = cdata['creation_time'].replace('T', ' ').rstrip('Z') + peebz.db_store_notify_last_check(bid, lastcheck) + + +def main(cmdargs: argparse.Namespace) -> None: + now = datetime.datetime.utcnow() + lastrun = now.strftime('%Y-%m-%d %H:%M:%S') + try: + # Get all new bugs that changed since last run + since = peebz.db_get_meta_value('notify_last_run') + except LookupError: + logger.debug('Got a LookupError, getting everything for the past hour') + # Assume it's the first run and get changes for the past hour + hourago = datetime.datetime.utcnow() - datetime.timedelta(hours=1) + since = hourago.strftime('%Y-%m-%d %H:%M:%S') + + # first, process all changed bugs that we're tracking + logger.info('Getting a list of changed bugs since %s', since) + buglist = peebz.bz_get_changed_bugs(since) + seen = set() + if buglist: + for bdata in buglist: + logger.debug('Looking at %s: %s', bdata['id'], bdata['summary']) + bid = bdata['id'] + seen.add(bid) + process_new_comments(bid, dry_run=cmdargs.dry_run) + else: + logger.info('No changes to any tracked bugs') + + # Now go by product/component and handle new bug queries if defined + config = peebz.get_config() + for bz_product, bz_components in config['components'].items(): + for bz_component in bz_components.keys(): + cconf = peebz.get_component_config(bz_product, bz_component) + qs = cconf.get('bz_new_bugs_quicksearch') + if not qs: + logger.debug('No quicksearch defined for %s/%s', bz_product, bz_component) + continue + logger.info('Querying matching quicksearch results since %s for %s/%s, qs=%s', since, bz_product, + bz_component, qs) + params = { + 'chfieldfrom': since, + 'product': bz_product, + 'component': bz_component, + 'quicksearch': qs, + } + buglist = peebz.bz_get_query_bugs(params, exclude=seen) + if buglist: + logger.info('Processing %s matching quicksearch bugs', len(buglist)) + for bid in buglist: + seen.add(bid) + process_new_comments(bid, dry_run=cmdargs.dry_run) + else: + logger.info('No changed bugs matching these parameters.') + + if not cmdargs.dry_run: + peebz.db_store_meta_value(key='notify_last_run', value=lastrun) diff --git a/peebz/command.py b/peebz/command.py new file mode 100644 index 0000000..30046bd --- /dev/null +++ b/peebz/command.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2020 by the Linux Foundation +# + +import sys +import argparse +import b4 +import logging +import logging.handlers +import peebz + +logger = peebz.logger + + +def cmd_parse(cmdargs): + import peebz.parse + peebz.parse.main(cmdargs) + + +def cmd_bz2pi(cmdargs): + import peebz.bz2pi + peebz.bz2pi.main(cmdargs) + + +def cmd_pi2bz(cmdargs): + import peebz.pi2bz + peebz.pi2bz.main(cmdargs) + + +def cmd_bzdump(cmdargs): + import json + from pygments import highlight, lexers, formatters + if cmdargs.bug_id: + rdata = peebz.bz_get_bug(cmdargs.bug_id, resolve_dupes=cmdargs.resolve_dupes) + elif cmdargs.username: + rdata = peebz.bz_get_user(cmdargs.username) + elif cmdargs.quicksearch: + rdata = peebz.bz_quicksearch_bugs(query=cmdargs.quicksearch) + else: + sys.exit(1) + + jdata = json.dumps(rdata, sort_keys=True, indent=4) + colorized = highlight(jdata, lexers.JsonLexer(), formatters.TerminalFormatter()) + print(colorized) + + +def setup_parser() -> argparse.ArgumentParser: + # noinspection PyTypeChecker + parser = argparse.ArgumentParser( + prog='peebz', + description='A tool to bridge public-inbox collections with bugzilla', + formatter_class=argparse.ArgumentDefaultsHelpFormatter, + ) + parser.add_argument('--version', action='version', version=peebz.__VERSION__) + parser.add_argument('-d', '--debug', action='store_true', default=False, + help='Add more debugging info to the output') + parser.add_argument('-q', '--quiet', action='store_true', default=False, + help='Output critical information only') + parser.add_argument('-c', '--config', required=True, + help='config.toml to use') + parser.add_argument('--dry-run', action='store_true', default=False, + help='Dry run, do not make any changes') + + subparsers = parser.add_subparsers(help='sub-command help', dest='subcmd') + # parse : process RFC2822 messages passed on stdin (testing mode) + sp_parse = subparsers.add_parser('parse', help='Parse messages passed via stdin') + sp_parse.add_argument('--product', help='Force this product instead of guessing by recipient') + sp_parse.add_argument('--component', help='Force this component instead of guessing by recipient') + sp_parse.set_defaults(func=cmd_parse) + + # pi2bz : query public-inbox to find the latest treads that interest us + sp_pi2bz = subparsers.add_parser('pi2bz', help='Query public-inbox sources for updates') + sp_pi2bz.add_argument('--product', help='Only run queries for this product') + sp_pi2bz.add_argument('--component', help='Only run queries for this component') + sp_pi2bz.set_defaults(func=cmd_pi2bz) + + # bz2pi: query bugzilla and sends out any mail updates + sp_bz2pi = subparsers.add_parser('bz2pi', help='Send emails about bugzilla-originated changes') + sp_bz2pi.set_defaults(func=cmd_bz2pi) + + # show : command to show REST raw REST output + sp_bzdump = subparsers.add_parser('bzdump', help='Show colorized raw REST output from bugzilla API') + sp_bzdump.add_argument('-b', '--bug-id', type=int, help='Bug to show') + sp_bzdump.add_argument('-c', '--comment-id', help='Comment to show') + sp_bzdump.add_argument('-a', '--attachment-id', help='Attachment to show') + sp_bzdump.add_argument('-u', '--username', help='User to show') + sp_bzdump.add_argument('-q', '--quicksearch', help='Quicksearch query to run') + + sp_bzdump.add_argument('--resolve-dupes', action='store_true', help='Resolve dupes') + sp_bzdump.set_defaults(func=cmd_bzdump) + + return parser + + +def cmd(): + parser = setup_parser() + cmdargs = parser.parse_args() + logger.setLevel(logging.DEBUG) + + ch = logging.StreamHandler() + formatter = logging.Formatter('%(message)s') + ch.setFormatter(formatter) + + if cmdargs.quiet: + ch.setLevel(logging.CRITICAL) + elif cmdargs.debug: + ch.setLevel(logging.DEBUG) + else: + ch.setLevel(logging.INFO) + + logger.addHandler(ch) + + if 'func' not in cmdargs: + parser.print_help() + sys.exit(1) + + with open(cmdargs.config, 'rb') as fh: + try: + import tomllib # noqa + peebz.CONFIG = tomllib.load(fh) + except ModuleNotFoundError: + import tomli # noqa + peebz.CONFIG = tomli.load(fh) + + try: + logfile = peebz.CONFIG['logging']['logfile'] + flh = logging.handlers.WatchedFileHandler(logfile) + fmt = '[%(process)d] %(asctime)s - ' + cmdargs.subcmd + ': %(message)s' + flh.setFormatter(logging.Formatter(fmt)) + loglevel = peebz.CONFIG['logging'].get('loglevel', 'info') + if loglevel == 'debug': + flh.setLevel(logging.DEBUG) + else: + flh.setLevel(logging.INFO) + logger.addHandler(flh) + except KeyError: + # No file logging for you + pass + + cmdargs.func(cmdargs) + + +if __name__ == '__main__': + import os + # noinspection PyBroadException + try: + if peebz.__VERSION__.endswith('-dev'): + base = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) + dotgit = os.path.join(base, '.git') + lines = b4.git_get_command_lines(dotgit, ['rev-parse', '--short', 'HEAD']) + if lines: + peebz.__VERSION__ = '%s-%.5s' % (peebz.__VERSION__, lines[0].strip()) + except Exception as ex: + pass + cmd() diff --git a/peebz/parse.py b/peebz/parse.py new file mode 100644 index 0000000..07de9e5 --- /dev/null +++ b/peebz/parse.py @@ -0,0 +1,198 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2023 by the Linux Foundation + +import sys +import argparse +import peebz +import b4 +import re + +from typing import Tuple, Dict + +import email.message +import email.utils + +logger = peebz.logger + + +def new_bug_notification(bid: int, inre_cid: int, dry_run: bool = False): + msg = email.message.EmailMessage() + config = peebz.get_config() + bodyvals = { + 'bzname': config['bugzilla'].get('name'), + 'bug_url': config['bugzilla'].get('bugmask', '').format(bug_id=bid), + } + bodytpt = peebz.get_template_by_bid('new_bug_notify', bid) + body = bodytpt.safe_substitute(bodyvals) + sigtpt = peebz.get_template_by_bid('botsig', bid) + sigvals = { + 'myname': config['bugzilla'].get('name'), + 'appname': peebz.__APPNAME__, + 'appver': peebz.__VERSION__, + } + body += sigtpt.safe_substitute(sigvals) + msg.set_payload(body, charset='utf-8') + peebz.notify_bug(bid, None, msg, inre_cid=inre_cid, dry_run=dry_run) + + +def make_bug_desc_from_body(product: str, component: str, body: str, vals: Dict) -> str: + if 'comment_count' in vals and vals['comment_count']: + tpt_intro = peebz.get_template_by_product_component('parse_bug_intro_with_count', product, component) + else: + tpt_intro = peebz.get_template_by_product_component('parse_bug_intro', product, component) + tpt_outro = peebz.get_template_by_product_component('parse_bug_outro', product, component) + desc = '' + intro = tpt_intro.safe_substitute(vals) + if intro: + desc = intro + '\n\n' + desc += body.strip() + outro = tpt_outro.safe_substitute(vals) + if outro: + desc += '\n\n' + outro + desc += '\n' + + return desc + + +def new_bug_from_msg(msg: email.message.EmailMessage, product: str, component: str, + dry_run: bool = False) -> Tuple[int, int]: + msgid, author, subject, body, atts = peebz.msg_parse_for_bug(msg) + payload = peebz.get_newbug_payload_by_product_component(product, component) + summary = re.sub(r'^\s*(Re|Fwd):\s*', '', subject) + vals = { + 'author': b4.format_addrs([author]), + 'msgid_link': peebz.get_msgid_link(msgid), + } + desc = make_bug_desc_from_body(product, component, body, vals) + + payload['summary'] = summary + payload['description'] = desc + if not dry_run: + bid, cid = peebz.bz_add_new_bug(payload) + logger.debug('new bug bid=%s, cid=%s', bid, cid) + recipients = peebz.msg_get_recipients(msg) + peebz.db_store_recipients(bid, recipients) + if atts: + peebz.bz_add_atts_to_bug(bid, atts) + else: + logger.info('--- DRY RUN ---') + logger.info('Would have created a new bug in %s/%s:', product, component) + logger.info('Summary: %s', payload['summary']) + logger.info('Description:') + logger.info(payload['description']) + bid = cid = None + + return bid, cid + + +def new_comment_from_msg(bid: int, cid: int, msg: email.message.EmailMessage, dry_run: bool = False) -> int: + msgid, author, subject, body, atts = peebz.msg_parse_for_bug(msg) + vals = { + 'author': b4.format_addrs([author]), + 'msgid_link': peebz.get_msgid_link(msgid), + } + if cid: + try: + vals['comment_count'] = peebz.bz_get_count_by_bid_cid(bid, cid) + except LookupError: + pass + product, component = peebz.bz_get_product_component_by_bid(bid) + + desc = make_bug_desc_from_body(product, component, body, vals) + + if not dry_run: + cid = peebz.bz_add_new_comment(bid, desc) + recipients = peebz.msg_get_recipients(msg) + peebz.db_store_recipients(bid, recipients) + if atts: + peebz.bz_add_atts_to_bug(bid, atts) + else: + logger.info('--- DRY RUN ---') + logger.info('Would have added this comment to %s', bid) + logger.info(desc) + cid = None + return cid + + +def process_rfc2822(msg: email.message.EmailMessage, product: str, component: str, + dry_run: bool = False) -> None: + # Ignore any messages that have an X-Bugzilla-Product header, + # so we don't get into any loops + if msg.get('x-bugzilla-product'): + logger.debug('Skipping bugzilla-originating message') + return + + cconf = peebz.get_component_config(product, component) + # Get the message-id + msgid = b4.LoreMessage.get_clean_msgid(msg) + try: + # If we have this exact msgid, then it's a dupe + bid, cid = peebz.db_get_bid_cid_by_msgid(msgid) + logger.info('Already recorded as bid=%s, cid=%s', bid, cid) + return + except LookupError: + pass + + # Walk through references and in-reply-to and see if we know any of them + bid = cid = None + try: + bid, cid = peebz.msg_get_inre_bid_cid(msg) + except LookupError: + pass + + if bid: + bdata = peebz.bz_get_bug(bid) + if not bdata['is_open']: + logger.info('Bug %s is closed, not adding comments', bid) + sys.exit(0) + + cid = new_comment_from_msg(bid, cid, msg, dry_run=dry_run) + if not dry_run: + peebz.db_store_msgid_bid_cid(msgid, bid, cid) + else: + bid, cid = new_bug_from_msg(msg, product, component, dry_run=dry_run) + if not dry_run: + peebz.db_store_msgid_bid_cid(msgid, bid, cid) + if cconf.get('new_bug_send_notification'): + new_bug_notification(bid, cid, dry_run=dry_run) + + # Do we have any assign triggers? + assign_re = cconf.get('pi_assign_regex') + if assign_re: + matches = re.search(assign_re, msg.as_string(), flags=re.I | re.M) + if matches: + author = peebz.msg_get_author(msg) + fromaddr = author[1] + if peebz.bz_check_user_allowed(fromaddr, product, component): + assign_to = matches.groups()[0] + if assign_to == 'me': + logger.debug('me=%s', fromaddr) + assign_to = fromaddr + # Does this user exist? + try: + peebz.bz_get_user(assign_to) + if not dry_run: + peebz.bz_assign_bug(bid, assign_to) + else: + logger.debug('---DRY RUN---') + logger.debug('Would have assigned bid=%s to %s', bid, assign_to) + + except LookupError: + logger.info('Unable to assign %s to %s: no such user', bid, assign_to) + + +def main(cmdargs: argparse.Namespace) -> None: + msg = peebz.get_msg_from_stdin() + product = cmdargs.product + component = cmdargs.component + if not (product and component): + recipients = peebz.msg_get_recipients(msg) + try: + product, component = peebz.get_product_component_by_recipients(recipients) + except LookupError as ex: + # TODO: fail properly here + logger.info(str(ex)) + sys.exit(1) + process_rfc2822(msg, product, component, dry_run=cmdargs.dry_run) diff --git a/peebz/pi2bz.py b/peebz/pi2bz.py new file mode 100644 index 0000000..36f07d3 --- /dev/null +++ b/peebz/pi2bz.py @@ -0,0 +1,189 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- +# SPDX-License-Identifier: GPL-2.0-or-later +# Copyright (C) 2023 by the Linux Foundation + +import argparse +import peebz +import peebz.parse +import b4 +import urllib.parse +import email.message +import gzip +import datetime + +from typing import List, Set + +logger = peebz.logger +b4.logger = logger +# force b4 to use EmailMessage factory +b4.emlpolicy = peebz.emlpolicy + + +def get_query_results(query_url: str) -> List: + loc = urllib.parse.urlparse(query_url) + logger.debug('query=%s', query_url) + logger.debug('grabbing search results from %s', loc.netloc) + session = peebz.get_requests_session() + # For the query to retrieve a mbox file, we need to send a POST request + resp = session.post(query_url, data='') + if resp.status_code == 404: + raise LookupError('Nothing matching query=%s', query_url) + if resp.status_code != 200: + raise LookupError('Server returned an error for %s: %s' % (query_url, resp.status_code)) + t_mbox = gzip.decompress(resp.content) + resp.close() + if not len(t_mbox): + raise LookupError('Nothing matching query=%s', query_url) + return b4.split_and_dedupe_pi_results(t_mbox) + + +def get_sorted_thread(url: str, msgid: str) -> List[email.message.EmailMessage]: + loc = urllib.parse.urlparse(url) + mbox_url = url.rstrip('/') + '/' + urllib.parse.quote_plus(msgid) + '/t.mbox.gz' + logger.debug('mbox_url=%s', mbox_url) + logger.debug('grabbing thread from %s', loc.netloc) + session = peebz.get_requests_session() + resp = session.get(mbox_url) + if resp.status_code == 404: + raise LookupError('Nothing matching mbox_url=%s', mbox_url) + if resp.status_code != 200: + raise LookupError('Server returned an error for %s: %s' % (mbox_url, resp.status_code)) + t_mbox = gzip.decompress(resp.content) + resp.close() + + deduped = b4.split_and_dedupe_pi_results(t_mbox) + if not deduped: + raise LookupError('No messages matching mbox_url=%s' % mbox_url) + strict = b4.get_strict_thread(deduped, msgid) + return peebz.sort_msgs_by_received(strict) + + +def get_new_msgs(msgs: List[email.message.EmailMessage]) -> List[email.message.EmailMessage]: + new_msgs = list() + for msg in msgs: + msgid = b4.LoreMessage.get_clean_msgid(msg) + try: + peebz.db_get_bid_cid_by_msgid(msgid) + continue + except LookupError: + new_msgs.append(msg) + + return new_msgs + + +def get_tracked_bug_msgids(product: str, component: str) -> Set[str]: + cconf = peebz.get_component_config(product, component) + params = { + 'include_fields': 'id', + 'product': product, + 'component': component, + 'quicksearch': 'OPEN', + 'chfieldfrom': '90d', + } + params.update(cconf.get('bz_query_params', dict())) + rdata = peebz.bz_rest('bug', params=params) + msgids = set() + for bdata in rdata.get('bugs', list()): + bid = bdata['id'] + try: + msgid = peebz.db_get_msgid_by_bid_cid(bid, None) + logger.debug('bid=%s is tracked as msgid=%s', bid, msgid) + msgids.add(msgid) + except LookupError: + logger.debug('Not tracking bid=%s', bid) + + return msgids + + +def update_component(product: str, component: str, dry_run: bool = False): + logger.info('Running pi2bz for %s/%s, dry_run=%s', product, component, dry_run) + cconf = peebz.get_component_config(product, component) + tracked = get_tracked_bug_msgids(product, component) + url = cconf.get('pi_url').rstrip('/') + now = datetime.datetime.utcnow() + + seen_msgids = set() + updates = list() + if len(tracked): + logger.info('Checking for updates in %s tracked threads', len(tracked)) + for msgid in tracked: + try: + tmsgs = get_sorted_thread(url, msgid) + except LookupError: + logger.debug('No results returned for msgid=%s', msgid) + continue + + for tmsg in tmsgs: + tmsgid = b4.LoreMessage.get_clean_msgid(tmsg) + if tmsgid in seen_msgids: + logger.debug('Already seen %s', tmsgid) + continue + seen_msgids.add(tmsgid) + try: + peebz.db_get_bid_cid_by_msgid(tmsgid) + logger.debug('%s has already been processed', tmsgid) + continue + except LookupError: + logger.debug('New message in tracked thread: %s', tmsgid) + updates.append(tmsg) + + # Now grab the latest query matches + query = cconf.get('pi_query') + if query: + logger.info('Running query for %s/%s', product, component) + try: + last_check = peebz.db_get_query_last_check(product, component) + query += f' AND dt:{last_check}..' + except LookupError: + pass + qquery = urllib.parse.quote_plus(query) + query_url = url.rstrip('/') + f'/?x=m&q={qquery}' + lastdt = now.strftime('%Y%m%d%H%M%S') + try: + msgs = get_query_results(query_url) + for msg in msgs: + msgid = b4.LoreMessage.get_clean_msgid(msg) + if msgid in seen_msgids: + logger.debug('Already seen %s', msgid) + continue + + # New thing to track! + seen_msgids.add(msgid) + author = peebz.msg_get_author(msg) + fromaddr = author[1] + if not peebz.bz_check_user_allowed(fromaddr, product, component): + logger.debug('skipping msg %s', msg.get('Subject')) + continue + # Retrieve and queue up the entire thread + try: + tmsgs = get_sorted_thread(url, msgid) + except LookupError: + logger.debug('No results returned for msgid=%s', msgid) + continue + for tmsg in tmsgs: + tmsgid = b4.LoreMessage.get_clean_msgid(tmsg) + seen_msgids.add(tmsgid) + updates.append(tmsg) + + except LookupError: + logger.info('No new results for product=%s, component=%s', product, component) + + if not dry_run: + peebz.db_store_query_last_check(product, component, lastdt) + + if not updates: + logger.info('No new messages to add to bugzilla for %s/%s', product, component) + return + + for msg in updates: + logger.debug('Recording %s', msg.get('Subject')) + peebz.parse.process_rfc2822(msg, product, component, dry_run=dry_run) + + +def main(cmdargs: argparse.Namespace): + config = peebz.get_config() + # Iterate all components + for bz_product, bz_components in config['components'].items(): + for bz_component in bz_components.keys(): + update_component(bz_product, bz_component, dry_run=cmdargs.dry_run) diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..cab6f85 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,5 @@ +b4>=0.12.2,<1.0 +Pygments>=2.14.0,<3.0 +requests>=2.28 +sqlalchemy>=1.4,<2.0 +tomli>=2.0; python_version < '3.11' |