Initial commit

This is the initial commit of "minimal viable product" as startups like to say. Documentation is going to be forthcoming -- for now we're running initial burn-in tests and finding the largest bugs. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
author: Konstantin Ryabitsev <konstantin@linuxfoundation.org> 2023-03-30 16:33:30 -0400
committer: Konstantin Ryabitsev <konstantin@linuxfoundation.org> 2023-03-30 16:33:30 -0400
commit: 5b903b6d1469d211a19966cf893972111fec280d (patch)
tree: 4039232cc32168e205d207031b0a0f528f2fef86
download: bugspray-5b903b6d1469d211a19966cf893972111fec280d.tar.gz
9 files changed, 1704 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5e5f255
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,14 @@
+*.swp
+*.pyc
+*.pyo
+*.json
+*.pdf
+test.log
+build/*
+dist/*
+MANIFEST
+.idea
+__pycache__
+*.egg-info
+*.sqlite3
+.venv
diff --git a/default.config.toml b/default.config.toml
new file mode 100644
index 0000000..a490f19
--- /dev/null
+++ b/default.config.toml
@@ -0,0 +1,80 @@
+[db]
+dburl = 'sqlite:////home/user/work/temp/peebz.sqlite3'
+
+[notify]
+neverto = ['*@kernel-bugs.*', 'bugbot@kernel.org']
+never_if_creator = ['bugbot@kernel.org']
+never_if_text_matches = ['*has been marked as a duplicate of*']
+fromaddr = 'Kernel.org Bugbot <bugbot@kernel.org>'
+smtpserver = 'localhost'
+smtpport = 25
+
+[bugzilla]
+apikey = '<omitted>'
+name = 'Kernel.org Bugzilla'
+url = 'https://bugzilla.kernel.org'
+resturl = 'https://bugzilla.kernel.org/rest'
+bugmask = 'https://bugzilla.kernel.org/show_bug.cgi?id={bug_id}'
+commentmask = 'https://bugzilla.kernel.org/show_bug.cgi?id={bug_id}#c{comment_count}'
+attachmask = 'https://bugzilla.kernel.org/attachment.cgi?id={attachment_id}'
+linkmask = 'https://msgid.link/{msgid}'
+subsystem_cf = 'cf_subsystem'
+maintainers_url = 'https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/MAINTAINERS'
+
+[mimetypes]
+deny = [ 'text/html', 'application/*-signature' ]
+
+[logging]
+logfile = 'peebz.log'
+loglevel = 'info'
+
+[components.'Linux'.'Kernel']
+new_bug_send_notification = true
+pi_query = '(nq:"bugbot on" OR nq:"bugbot assign")'
+pi_must_bz_groups = ['editbugs']
+pi_url = 'https://lore.kernel.org/all/'
+pi_assign_regex = '^bugbot assign to (\S+)'
+bz_new_bugs_quicksearch = 'OPEN flag:bugbot+'
+alwayscc = ['bugs@lists.linux.dev']
+
+[templates]
+parse_bug_intro = '${author} writes:'
+parse_bug_intro_with_count = '${author} replies to comment #${comment_count}:'
+parse_bug_outro = '(via ${msgid_link})'
+
+new_bug_notify = '''
+Hello:
+
+This conversation is now tracked by ${bzname}:
+${bug_url}
+
+There is no need to do anything else, just keep talking.
+'''
+
+new_comment_notify = '''
+${comment_author} writes via ${bzname}:
+
+${comment_text}
+
+View: ${comment_url}
+You can reply to this message to join the discussion.
+'''
+
+new_attachment_notify = '''
+${comment_author} added a new attachment via ${bzname}.
+You can download it by following the link below.
+
+File: ${file_name} (${content_type})
+Size: ${human_size}
+Link: ${attachment_url}
+---
+${summary}
+
+You can reply to this message to join the discussion.
+'''
+
+botsig = '''
+-- 
+Deet-doot-dot, I am a bot.
+${myname} (${appname} ${appver})
+'''
diff --git a/peebz.sh b/peebz.sh
new file mode 100755
index 0000000..f3809dc
--- /dev/null
+++ b/peebz.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+#
+# Run from a git checkout.
+#
+
+REAL_SCRIPT=$(realpath -e ${BASH_SOURCE[0]})
+SCRIPT_TOP="${SCRIPT_TOP:-$(dirname ${REAL_SCRIPT})}"
+
+exec env PYTHONPATH="${SCRIPT_TOP}" python3 "${SCRIPT_TOP}/peebz/command.py" "${@}"
diff --git a/peebz/__init__.py b/peebz/__init__.py
new file mode 100644
index 0000000..f2d2024
--- /dev/null
+++ b/peebz/__init__.py
@@ -0,0 +1,904 @@
+#!/usr/bin/env python3
+import email
+import email.message
+import email.utils
+import email.parser
+import requests
+import logging
+import base64
+import b4
+import sys
+import uuid
+import datetime
+
+import urllib.parse
+
+import sqlalchemy as sa
+
+from typing import Dict, Tuple, List, Set, Optional
+from fnmatch import fnmatch
+from string import Template
+
+from sqlalchemy.exc import NoSuchTableError
+
+# Policy we use for saving mail locally
+emlpolicy = email.policy.EmailPolicy(utf8=True, cte_type='8bit', max_line_length=None,
+                                     message_factory=email.message.EmailMessage)
+
+REQSESSION = None
+CONFIG = dict()
+
+__APPNAME__ = 'peebz'
+__VERSION__ = '0.1'
+
+__DBSCHEMA__ = 1
+
+logger = logging.getLogger(__APPNAME__)
+b4.logger = logger
+
+SACONN = None
+SAENGINE = None
+REST_CACHE = dict()
+MAINT_CACHE = dict()
+
+
+def get_msg_from_stdin() -> Optional[email.message.EmailMessage]:
+    if not sys.stdin.isatty():
+        msg = email.parser.BytesParser(policy=emlpolicy).parsebytes(sys.stdin.buffer.read())
+        return msg  # noqa
+    return None
+
+
+def get_requests_session():
+    global REQSESSION
+    if REQSESSION is None:
+        REQSESSION = requests.session()
+        REQSESSION.headers.update({'User-Agent': f'{__APPNAME__}/{__VERSION__}'})
+    return REQSESSION
+
+
+def get_config() -> Dict:
+    return CONFIG
+
+
+def get_component_config(product: str, component: str) -> Dict:
+    config = get_config()
+    return config['components'][product][component]
+
+
+def get_template_by_bid(tptname: str, bid: int) -> Template:
+    product, component = bz_get_product_component_by_bid(bid)
+    return get_template_by_product_component(tptname, product, component)
+
+
+def get_template_by_product_component(tptname: str, product: str, component: str) -> Template:
+    config = get_config()
+    try:
+        return Template(config['components'][product][component]['templates'][tptname])
+    except KeyError:
+        pass
+    return Template(config['templates'][tptname])
+
+
+def get_msgid_link(msgid: str) -> str:
+    config = get_config()
+    linkmask = config['bugzilla'].get('linkmask', 'https://lore.kernel.org/{msgid}')
+    return linkmask.format(msgid=msgid)
+
+
+def bz_rest(path: str, payload: Dict = None, params: Dict = None, method: str = 'GET') -> Dict:
+    global REST_CACHE
+    # We only cache GETs without any params
+    if method == 'GET' and not params and path in REST_CACHE:
+        logger.debug('Using cached data')
+        return REST_CACHE[path]
+
+    config = get_config()
+    url = '{BZURL}/{path}'.format(BZURL=config['bugzilla'].get('resturl').rstrip('/'), path=path)
+    logger.debug('Querying url=%s', url)
+    myparams = dict() if params is None else dict(params)
+    myparams['api_key'] = config['bugzilla'].get('apikey', '')
+    ses = get_requests_session()
+    if method == 'GET':
+        res = ses.get(url, params=myparams)
+    elif method == 'POST':
+        res = ses.post(url, params=myparams, json=payload)
+    elif method == 'PUT':
+        res = ses.put(url, params=myparams, json=payload)
+    else:
+        logger.critical('CRITICAL: Unknown method=%s', method)
+        raise RuntimeError('Unknown method %s' % method)
+
+    # logger.debug('res=%s', res.text)
+    if res.status_code == 404:
+        raise LookupError('Bugzilla returned 404: %s' % res.text)
+
+    # for every other error, we just BT for now
+    res.raise_for_status()
+    rdata = res.json()
+    if method == 'GET' and not params:
+        # Cache it
+        REST_CACHE[path] = rdata
+    elif method != 'GET':
+        # We changed something, so nuke our cache
+        logger.debug('Clearing cache')
+        REST_CACHE = dict()
+
+    return rdata
+
+
+def bz_add_atts_to_bug(bid: int, atts: List[Dict]) -> List[int]:
+    aids = list()
+    for att in atts:
+        att['ids'] = [bid]
+        rdata = bz_rest(f'bug/{bid}/attachment', att, method='POST')
+        logger.debug('Created new attachment %s', ', '.join(rdata['ids']))
+        aids += rdata['ids']
+    return aids
+
+
+def bz_add_new_bug(payload: Dict) -> Tuple[int, int]:
+    if 'version' not in payload:
+        payload['version'] = 'unspecified'
+    rdata = bz_rest('bug', payload, method='POST')
+    bid = rdata['id']
+    logger.debug('Created new bug %s', bid)
+    # Apparently, we don't get comment-id info from new bug creation
+    cid = bz_get_cid_by_bid_count(bid)
+    return bid, cid
+
+
+def bz_add_new_comment(bid: int, comment: str) -> int:
+    payload = {
+        'id': bid,
+        'comment': comment
+    }
+    rdata = bz_rest(f'bug/{bid}/comment', payload, method='POST')
+    cid = rdata['id']
+    logger.debug('Created new comment %s', cid)
+    return cid
+
+
+def bz_get_bug(bid: int, resolve_dupes=False) -> Dict:
+    if resolve_dupes:
+        bid = bz_dedupe_bid(bid)
+
+    path = f'bug/{bid}'
+    rdata = bz_rest(path)
+    for bdata in rdata['bugs']:
+        if bdata['id'] == bid:
+            return bdata
+    raise RuntimeError('Could not get bug info for %s' % bid)
+
+
+def bz_get_user(username: str) -> Dict:
+    path = f'user/{username}'
+    try:
+        rdata = bz_rest(path)
+        for udata in rdata['users']:
+            if udata['name'] == username:
+                return udata
+    except LookupError:
+        pass
+
+    raise LookupError('Could not get user info for %s' % username)
+
+
+def bz_get_user_groups(username: str) -> Set:
+    udata = bz_get_user(username)
+    return set([x['name'] for x in udata['groups']])
+
+
+def bz_get_cid_by_bid_count(bid: int, count: int = 0) -> int:
+    bdata = bz_rest(f'bug/{bid}/comment')
+    for rbid, rbdata in bdata['bugs'].items():
+        if int(rbid) != bid:
+            continue
+        for comment in rbdata['comments']:
+            if comment['count'] == count:
+                logger.debug('cid for %s/c%s is %s', bid, count, comment['id'])
+                return comment['id']
+    raise LookupError('No cid matching bid=%s count=%s' % (bid, count))
+
+
+def bz_get_count_by_bid_cid(bid: int, cid: int) -> int:
+    bdata = bz_rest(f'bug/{bid}/comment')
+    for rbid, rbdata in bdata['bugs'].items():
+        if int(rbid) != bid:
+            continue
+        for comment in rbdata['comments']:
+            if comment['id'] == cid:
+                logger.debug('count for %s/%s is c%s', bid, cid, comment['count'])
+                return comment['count']
+    raise LookupError('No match for bid=%s cid=%s', bid, cid)
+
+
+def bz_dedupe_bid(bid: int) -> int:
+    bdata = bz_get_bug(bid)
+    if bdata.get('dupe_of'):
+        # Nothing wrong with recursion
+        return bz_dedupe_bid(bdata['dupe_of'])
+    return bid
+
+
+def bz_check_user_allowed(uid: str, product: str, component: str) -> bool:
+    cconf = get_component_config(product, component)
+    mustgroups = cconf.get('pi_must_bz_groups')
+    if mustgroups is None:
+        # No restrictions, anyone can do anything they like
+        return True
+    try:
+        udata = bz_get_user(uid)
+    except LookupError:
+        logger.debug('Could not find user %s in bugzilla', uid)
+        return False
+
+    for mustgroup in mustgroups:
+        for ugroup in udata.get('groups', list()):
+            if ugroup['name'] == mustgroup:
+                logger.debug('%s mustgroup matches %s', uid, mustgroup)
+                return True
+
+    logger.debug('%s not member of %s', uid, mustgroups)
+    return False
+
+
+def bz_assign_bug(bid: int, uid: str) -> None:
+    logger.info('Assigning bug %s to %s', bid, uid)
+    path = f'bug/{bid}'
+    payload = {
+        'assigned_to': uid,
+    }
+    bz_rest(path, payload=payload, method='PUT')
+
+
+def db_get_query_last_check(product: str, component: str) -> str:
+    key = f'query_{product}_{component}'
+    return db_get_meta_value(key)
+
+
+def db_store_query_last_check(product: str, component: str, last_check: str) -> None:
+    key = f'query_{product}_{component}'
+    return db_store_meta_value(key, last_check)
+
+
+def db_get_msgid_by_bid_cid(bid: int, cid: Optional[int]) -> str:
+    engine, dbconn = db_get_sa()
+    md = sa.MetaData()
+    t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine)
+    if cid:
+        q = sa.select([t_bmap.c.message_id]).where(t_bmap.c.bug_id == bid, t_bmap.c.comment_id == cid)
+    else:
+        # If cid is not defined, we get all mappings and use the lowest cid
+        q = sa.select([t_bmap.c.message_id]).where(t_bmap.c.bug_id == bid).order_by(t_bmap.c.comment_id)
+
+    rp = dbconn.execute(q)
+    fa = rp.fetchall()
+    if len(fa):
+        logger.debug('query results for bid=%s, cid=%s: %s', bid, cid, fa)
+        return fa[0][0]
+
+    raise LookupError('No message-id matching bid=%s, cid=%s' % (bid, cid))
+
+
+def db_get_bid_cid_by_msgid(msgid: str) -> Tuple[int, Optional[int]]:
+    engine, dbconn = db_get_sa()
+    md = sa.MetaData()
+    t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine)
+    logger.debug('Querying db for msgid=%s', msgid)
+    q = sa.select([t_bmap.c.bug_id, t_bmap.c.comment_id]).where(t_bmap.c.message_id == msgid)
+    rp = dbconn.execute(q)
+    fa = rp.fetchall()
+    if not len(fa):
+        raise LookupError('msgid %s not known' % msgid)
+    bid, cid = fa[0]
+    dbid = bz_dedupe_bid(bid)
+    if dbid != bid:
+        cid = None
+    logger.debug(' matching bid=%s, cid=%s', bid, cid)
+    return bid, cid
+
+
+def db_store_msgid_bid_cid(msgid: str, bid: int, cid: int) -> None:
+    msgid = msgid.strip('<>')
+    engine, dbconn = db_get_sa()
+    md = sa.MetaData()
+    t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine)
+    q = sa.insert(t_bmap).values(message_id=msgid, bug_id=bid, comment_id=cid)
+    dbconn.execute(q)
+    logger.info('Created new mapping for %s: %s/%s', msgid, bid, cid)
+
+
+def db_get_recipients(bid: int) -> Set[str]:
+    engine, dbconn = db_get_sa()
+    md = sa.MetaData()
+    t_recip = sa.Table('recipients', md, autoload=True, autoload_with=engine)
+    logger.debug('Querying recipients for bid=%s', bid)
+    q = sa.select([t_recip.c.email]).where(t_recip.c.bug_id == bid)
+    rp = dbconn.execute(q)
+    fa = rp.fetchall()
+    if not len(fa):
+        raise LookupError('bid %s not known' % bid)
+    return set(x[0] for x in fa)
+
+
+def db_store_recipients(bid: int, recipients: Set[str]) -> None:
+    # TODO: add ability to unsubscribe?
+    try:
+        stored = db_get_recipients(bid)
+    except LookupError:
+        stored = set()
+    # Any new ones to store?
+    extras = recipients - stored
+    if not extras:
+        return
+
+    engine, dbconn = db_get_sa()
+    md = sa.MetaData()
+    t_recip = sa.Table('recipients', md, autoload=True, autoload_with=engine)
+    logger.debug('Storing new recipients for bid=%s', bid)
+    for addr in extras:
+        q = sa.insert(t_recip).values(bug_id=bid, email=addr)
+        dbconn.execute(q)
+        logger.debug(' Added %s', addr)
+    return
+
+
+def db_get_meta_value(key: str) -> str:
+    engine, dbconn = db_get_sa()
+    md = sa.MetaData()
+    t_meta = sa.Table('meta', md, autoload=True, autoload_with=engine)
+    q = sa.select([t_meta.c.var_value]).where(t_meta.c.var_key == key)
+    rp = dbconn.execute(q)
+    fa = rp.fetchall()
+    if not len(fa):
+        raise LookupError('meta key %s not known' % key)
+    return fa[0][0]
+
+
+def db_store_meta_value(key: str, value: str) -> None:
+    engine, dbconn = db_get_sa()
+    md = sa.MetaData()
+    t_meta = sa.Table('meta', md, autoload=True, autoload_with=engine)
+    with engine.begin() as dbconn:
+        q = sa.delete(t_meta).where(t_meta.c.var_key == key)
+        dbconn.execute(q)
+        q = sa.insert(t_meta).values(var_key=key, var_value=value)
+        dbconn.execute(q)
+
+
+def db_store_notify_last_check(bid: int, when: str):
+    key = f'notify_bug_{bid}'
+    return db_store_meta_value(key, when)
+
+
+def db_get_notify_last_check(bid: int) -> str:
+    key = f'notify_bug_{bid}'
+    return db_get_meta_value(key)
+
+
+def bz_get_changed_bugs(since: str, include_untracked: bool = False) -> List:
+    logger.debug('Querying for changed bugs since %s', since)
+    params = {
+        'chfieldfrom': since,
+        'include_fields': 'id,summary',
+    }
+    rdata = bz_rest('bug', params=params)
+    if include_untracked:
+        return rdata['bugs']
+    bids = tuple([x['id'] for x in rdata['bugs']])
+    engine, dbconn = db_get_sa()
+    md = sa.MetaData()
+    t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine)
+    q = sa.select([t_bmap.c.bug_id]).where(t_bmap.c.bug_id.in_(bids)).distinct()
+    rp = dbconn.execute(q)
+    fa = rp.fetchall()
+    if not fa:
+        return list()
+    tracked = set([x[0] for x in fa])
+    bugs = list()
+    for bdata in rdata['bugs']:
+        if bdata['id'] in tracked:
+            bugs.append(bdata)
+
+    return bugs
+
+
+def bz_quicksearch_bugs(query: str) -> Dict:
+    params = {
+        'include_fields': 'id,summary',
+        'quicksearch': query,
+    }
+    return bz_rest('bug', params=params)
+
+
+def bz_get_query_bugs(params: Dict, exclude: Set[int]) -> List[int]:
+    if 'include_fields' not in params:
+        params['include_fields'] = 'id,summary'
+    rdata = bz_rest('bug', params=params)
+    bids = list()
+    for bdata in rdata['bugs']:
+        if bdata['id'] in exclude:
+            continue
+        bids.append(bdata['id'])
+    return bids
+
+
+def get_human_size(size: int, decimals: int = 2) -> str:
+    units = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
+    while True:
+        unit = units.pop(0)
+        if size < 1024.0 or not len(units):
+            break
+        size /= 1024.0
+
+    return f'{size:.{decimals}f} {unit}'
+
+
+def bz_get_attachment_by_aid(aid: int, include_fields: Optional[str] = None) -> Dict:
+    if include_fields is None:
+        rdata = bz_rest(f'bug/attachment/{aid}')
+    else:
+        params = {
+            'include_fields': include_fields,
+        }
+        rdata = bz_rest(f'bug/attachment/{aid}', params=params)
+
+    for raid, radata in rdata['attachments'].items():
+        if int(raid) == aid:
+            if 'size' in radata:
+                radata['human_size'] = get_human_size(radata['size'])
+            return radata
+
+    raise LookupError('No matching attachment_id found: %s', aid)
+
+
+def bz_get_all_comments_by_bid(bid: int) -> List[Dict]:
+    bdata = bz_rest(f'bug/{bid}/comment')
+    for rbid, rbdata in bdata['bugs'].items():
+        if int(rbid) != bid:
+            continue
+        return rbdata['comments']
+
+
+def bz_get_comments_for_bid_since(bid: int, since: str) -> List[Dict]:
+    params = {
+        'new_since': since,
+        'id': bid,
+    }
+    bdata = bz_rest(f'bug/{bid}/comment', params=params)
+    for rbid, rbdata in bdata['bugs'].items():
+        if int(rbid) != bid:
+            continue
+        return rbdata['comments']
+
+
+def bz_get_newest_comments_for_bid(bid: int, include_private: bool = False) -> List[Dict]:
+    try:
+        when = db_get_notify_last_check(bid)
+    except LookupError:
+        # grab the highest cid we know about
+        engine, dbconn = db_get_sa()
+        md = sa.MetaData()
+        t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine)
+        q = sa.select(sa.func.max(t_bmap.c.comment_id)).where(t_bmap.c.bug_id == bid)
+        rp = dbconn.execute(q)
+        mfa = rp.fetchall()
+        c_max = None
+        if mfa:
+            c_max = mfa[0][0]
+        cdatas = bz_get_all_comments_by_bid(bid)
+        comments = list()
+        for cdata in cdatas:
+            if c_max and cdata['id'] <= c_max:
+                continue
+            if not include_private and cdata['is_private']:
+                continue
+            comments.append(cdata)
+        return comments
+
+    logger.debug('Getting newest comments since %s', when)
+    cdatas = bz_get_comments_for_bid_since(bid, when)
+    comments = list()
+    for cdata in cdatas:
+        if not include_private and cdata['is_private']:
+            continue
+        comments.append(cdata)
+    return comments
+
+
+def msg_get_inre_msgids(msg: email.message.EmailMessage) -> List[str]:
+    pairs = list()
+    if msg.get('In-Reply-To'):
+        pairs += email.utils.getaddresses([str(x) for x in msg.get_all('in-reply-to', [])])
+    if msg.get('References'):
+        pairs += email.utils.getaddresses([str(x) for x in msg.get_all('references', [])])
+
+    msgids = list()
+    for pair in pairs:
+        if pair[1] not in msgids:
+            msgids.append(pair[1])
+
+    return msgids
+
+
+def sort_msgs_by_received(msgs: List[email.message.EmailMessage]) -> List[email.message.EmailMessage]:
+    tosort = list()
+    for msg in msgs:
+        latest_rdt = None
+        for rhdr in msg.get_all('Received'):
+            # The received headers are pretty free-form, but generally will end with ; datetimeinfo
+            chunks = rhdr.rsplit(';', maxsplit=1)
+            if len(chunks) < 2:
+                continue
+            rdate = chunks[1].strip()
+            rdt = email.utils.parsedate_tz(rdate)
+            if rdt is None:
+                continue
+            # use the latest rdt
+            if not latest_rdt or latest_rdt < rdt:
+                latest_rdt = rdt
+        latest_rdt = None
+        if not latest_rdt and msg.get('Date'):
+            # Use the Date header as fallback
+            latest_rdt = email.utils.parsedate_to_datetime(msg.get('Date'))
+        if not latest_rdt:
+            logger.debug('Message without a date: %s!', msg.get('Message-ID'))
+            continue
+        tosort.append((latest_rdt, msg))
+
+    sortedmsgs = list()
+    for rdt, msg in sorted(tosort):
+        sortedmsgs.append(msg)
+
+    return sortedmsgs
+
+
+def msg_get_inre_bid_cid(msg: email.message.EmailMessage) -> Tuple[int, int]:
+    msgids = msg_get_inre_msgids(msg)
+    if not msgids:
+        logger.debug('No references in msg')
+        raise LookupError('No references found')
+
+    res = list()
+    for msgid in msgids:
+        try:
+            res.append(db_get_bid_cid_by_msgid(msgid))
+        except LookupError:
+            # Try the next one until we run out
+            pass
+    if not res:
+        logger.debug('No reference matched database records')
+        raise LookupError('Nothing matching in the db')
+
+    if len(res) > 1:
+        # Use the highest cid, which is going to be the latest matching comment
+        res.sort(key=lambda x: x[1], reverse=True)
+    return res[0]
+
+
+def msg_get_recipients(msg: email.message.EmailMessage) -> Set[str]:
+    pairs = list()
+    if msg.get('To'):
+        pairs += email.utils.getaddresses([str(x) for x in msg.get_all('to', [])])
+    if msg.get('Cc'):
+        pairs += email.utils.getaddresses([str(x) for x in msg.get_all('cc', [])])
+    if msg.get('From'):
+        pairs += email.utils.getaddresses([str(x) for x in msg.get_all('from', [])])
+
+    return set([x[1].lower() for x in pairs])
+
+
+def bz_get_product_component_by_bid(bid: int) -> Tuple[str, str]:
+    bdata = bz_get_bug(bid, resolve_dupes=True)
+    return bdata['product'], bdata['component']
+
+
+def get_product_component_by_recipients(recipients: Set[str]) -> Tuple[str, str]:
+    config = get_config()
+    if not config.get('components'):
+        logger.debug('No components found in config')
+        raise LookupError('No components defined in config')
+    for recipient in recipients:
+        logger.debug('Matching %s', recipient)
+        for bz_product, bz_components in config['components'].items():
+            for bz_component, c_config in bz_components.items():
+                if not c_config.get('recipients'):
+                    continue
+                for addr in c_config['recipients']:
+                    if fnmatch(recipient, addr):
+                        logger.debug('Matched %s with product=%s, component=%s', recipient, bz_product, bz_component)
+                        return bz_product, bz_component
+
+    raise LookupError('No matches for any recipients')
+
+
+def get_newbug_payload_by_product_component(product: str, component: str) -> Dict:
+    config = get_config()
+    try:
+        payload = config['components'][product][component]['payload']
+    except KeyError:
+        payload = dict()
+    payload['product'] = product
+    payload['component'] = component
+    return payload
+
+
+def msg_get_author(msg: email.message.EmailMessage) -> Tuple[str, str]:
+    author = ('', 'missing@address.local')
+    fh = msg.get('from')
+    if fh:
+        author = email.utils.getaddresses([fh])[0]
+
+    if not author[0]:
+        return 'Zorro Boogs', author[1]
+
+    return author
+
+
+def msg_parse_for_bug(msg: email.message.EmailMessage) -> Tuple[str, Tuple[str, str], str, str, List[Dict]]:
+    msgid = b4.LoreMessage.get_clean_msgid(msg)
+    mp = msg.get_body(preferencelist=('plain',))
+    bbody = mp.get_payload(decode=True)
+    cs = mp.get_content_charset()
+    if not cs:
+        cs = 'utf-8'
+    cpay = bbody.decode(cs, errors='replace')
+    # Strip signature if we find it
+    chunks = cpay.rsplit('-- ', maxsplit=1)
+    cbody = chunks[0]
+    lsub = b4.LoreSubject(msg.get('Subject', ''))
+    subject = lsub.subject
+    atts = msg_get_valid_attachments(msg)
+    author = msg_get_author(msg)
+
+    return msgid, author, subject, cbody, atts
+
+
+def msg_get_valid_attachments(msg: email.message.EmailMessage) -> List[Dict]:
+    # Get all good attachments
+    config = get_config()
+    atts = list()
+    for part in msg.walk():
+        if part.get_content_disposition() != 'attachment':
+            continue
+        ct = part.get_content_type()
+        mts = config.get('mimetypes')
+        allowed = True
+        if mts and 'deny' in mts:
+            for dmt in mts['deny']:
+                if fnmatch(ct, dmt):
+                    logger.debug('Skipping denied mime-type attachement: %s', ct)
+                    allowed = False
+                    break
+        if allowed:
+            databytes = part.get_payload(decode=True)
+            data = base64.b64encode(databytes).decode()
+            filename = part.get_filename()
+            if not filename:
+                filename = 'unnamed.txt'
+                ct = 'text/plain'
+            summary = filename
+            payload = {
+                'file_name': filename,
+                'content_type': ct,
+                'summary': summary,
+                'data': data,
+            }
+
+            atts.append(payload)
+
+    return atts
+
+
+def get_recipients_by_product_component(product: str, component: str) -> Set[str]:
+    recip = set()
+    config = get_config()
+    try:
+        recip.update(config['notify']['alwayscc'])
+        logger.debug('added global alwayscc: %s', config['notify']['alwayscc'])
+    except KeyError:
+        pass
+    try:
+        recip.update(config['components'][product][component]['alwayscc'])
+        logger.debug('added %s/%s alwayscc: %s', product, component,
+                     config['components'][product][component]['alwayscc'])
+    except KeyError:
+        pass
+    return recip
+
+
+def get_recipients_by_subsystem(subsystem: str) -> Set[str]:
+    global MAINT_CACHE
+    if not len(MAINT_CACHE):
+        config = get_config()
+        ses = get_requests_session()
+        murl = config['bugzilla']['maintainers_url']
+        if murl.startswith('file://'):
+            with open(murl.replace('file://', ''), 'r') as fh:
+                mdata = fh.read()
+        else:
+            res = ses.get(config['bugzilla']['maintainers_url'])
+            res.raise_for_status()
+            mdata = res.text
+        lookfor = ['M:', 'L:']
+        prevline = None
+        cur_sub = None
+        for line in mdata.splitlines():
+            if len(line) < 2 or not len(line[0].strip()):
+                cur_sub = None
+                continue
+            if line[:2] not in lookfor:
+                prevline = line
+                continue
+            if not cur_sub:
+                cur_sub = prevline
+            addr = email.utils.parseaddr(line[2:])
+            if cur_sub:
+                if cur_sub not in MAINT_CACHE:
+                    MAINT_CACHE[cur_sub] = set()
+                MAINT_CACHE[cur_sub].add(addr[1])
+
+    try:
+        return MAINT_CACHE[subsystem]
+    except KeyError:
+        return set()
+
+
+def get_bug_recipients(bid: int) -> Set[str]:
+    # Get all db-stored recipients
+    # TODO: implement "onlyto"
+    allrecip = set()
+    try:
+        allrecip.update(db_get_recipients(bid))
+    except LookupError:
+        logger.debug('No in-database recipients for bid=%s', bid)
+    # Now get all bug cc recipients
+    bdata = bz_get_bug(bid, resolve_dupes=True)
+    bugr = set(bdata['cc'])
+    bugr.add(bdata['assigned_to'])
+    bugr.add(bdata['creator'])
+    bugr.update(get_recipients_by_product_component(bdata['product'], bdata['component']))
+    allrecip.update(bugr)
+    config = get_config()
+    subsystem_cf = config['bugzilla'].get('subsystem_cf')
+    if subsystem_cf:
+        subr = get_recipients_by_subsystem(bdata[subsystem_cf])
+        allrecip.update(subr)
+
+    # Remove "neverto" addresses
+    for mask in config['notify'].get('neverto', list()):
+        for addr in set(allrecip):
+            if fnmatch(addr, mask):
+                logger.debug('Removed %s because it matched neverto=%s', addr, mask)
+                allrecip.remove(addr)
+    return allrecip
+
+
+def make_msgid(bid: int, cid: Optional[int]) -> str:
+    config = get_config()
+    bzurl = config['bugzilla']['url']
+    bzloc = urllib.parse.urlparse(bzurl)
+    slug = f'b{bid}'
+    if bid and cid:
+        count = bz_get_count_by_bid_cid(bid, cid)
+        slug += f'c{count}'
+
+    msgid = '<%s-%s-%s@%s>' % (datetime.date.today().strftime('%Y%m%d'), slug, uuid.uuid4().hex[:12], bzloc.netloc)
+    return msgid
+
+
+def notify_bug(bid: int, cid: Optional[int], msg: email.message.EmailMessage, inre_cid: Optional[int] = None,
+               dry_run: bool = False) -> str:
+    bdata = bz_get_bug(bid, resolve_dupes=True)
+    config = get_config()
+    if not msg.get('From'):
+        msg['From'] = config['notify'].get('fromaddr')
+    if not msg.get('To'):
+        recipients = get_bug_recipients(bid)
+        msg['To'] = b4.format_addrs([('', x) for x in recipients])
+    if not msg.get('Message-ID'):
+        msg['Message-ID'] = make_msgid(bid, cid)
+    if not msg.get('In-Reply-To'):
+        inre_msgid = None
+        try:
+            inre_msgid = db_get_msgid_by_bid_cid(bid, inre_cid)
+        except LookupError:
+            logger.debug('Could not find msgid matching bid=%s, cid=%s', bid, inre_cid)
+            # Find anything
+            try:
+                inre_msgid = db_get_msgid_by_bid_cid(bid, None)
+            except LookupError:
+                pass
+        if inre_msgid:
+            msg['In-Reply-To'] = f'<{inre_msgid}>'
+            msg['References'] = f'<{inre_msgid}>'
+            if not msg.get('Subject'):
+                msg['Subject'] = 'Re: %s' % bdata['summary']
+        else:
+            msg['Subject'] = bdata['summary']
+
+    msg['X-Bugzilla-Product'] = bdata['product']
+    msg['X-Bugzilla-Component'] = bdata['component']
+    msg['X-Mailer'] = f'{__APPNAME__} {__VERSION__}'
+    # Should we add other X-B headers?
+    # If we have notify.smtpserver and notify.smtpport defined, use that,
+    # otherwise use b4's get_smtp method
+    smtpserver = config['notify'].get('smtpserver')
+    smtpport = int(config['notify'].get('smtpport', '25'))
+    if smtpserver and smtpport:
+        import smtplib
+        smtp = smtplib.SMTP(smtpserver, smtpport)
+    else:
+        smtp, fromaddr = b4.get_smtp(dryrun=dry_run)
+    b4.send_mail(smtp, [msg], fromaddr=config['notify'].get('fromaddr'), dryrun=dry_run)
+
+    return msg.get('Message-ID')
+
+
+def add_bot_signature(body: str) -> str:
+    config = get_config()
+    sigtpt = Template(config['templates'].get('botsig'))
+    sigvals = {
+        'myname': config['bugzilla'].get('name'),
+        'appname': __APPNAME__,
+        'appver': __VERSION__,
+    }
+    body += sigtpt.safe_substitute(sigvals)
+    return body
+
+
+def db_get_sa() -> Tuple[sa.engine.Engine, sa.engine.Connection]:
+    global SACONN, SAENGINE
+    if SACONN is None:
+        config = get_config()
+        try:
+            dburl = config['db']['dburl']
+        except KeyError:
+            raise LookupError('CRITICAL: main.dburl not set in config file')
+
+        db_pool_recycle = int(config['db'].get('dbpoolrecycle', '300'))
+        SAENGINE = sa.create_engine(dburl, pool_recycle=db_pool_recycle)
+        SACONN = SAENGINE.connect()
+        if SAENGINE.driver == 'pysqlite':
+            md = sa.MetaData()
+            try:
+                t_meta = sa.Table('meta', md, autoload=True, autoload_with=SAENGINE)
+                q = sa.select([t_meta.c.var_value]).where(t_meta.c.var_key == 'schema')
+                rp = SACONN.execute(q)
+                dbver = rp.fetchone()[0]
+                # Future logic to upgrade database here
+                logger.debug('dbver=%s', dbver)
+            except NoSuchTableError:
+                db_init_sa_sqlite_db(SAENGINE, SACONN)
+
+    return SAENGINE, SACONN
+
+
+def db_init_sa_sqlite_db(engine: sa.engine.Engine, dbconn: sa.engine.Connection):
+    logger.info('Setting up SQLite database')
+    md = sa.MetaData()
+    meta = sa.Table('meta', md,
+                    sa.Column('var_key', sa.Text()),
+                    sa.Column('var_value', sa.Text()),
+                    )
+    sa.Index('idx_meta_key_value', meta.c.var_key, meta.c.var_value, unique=True)
+    bmap = sa.Table('msgid_bug_mapping', md,
+                    sa.Column('row_id', sa.Integer(), primary_key=True),
+                    sa.Column('bug_id', sa.Integer(), nullable=False),
+                    sa.Column('comment_id', sa.Integer(), nullable=False),
+                    sa.Column('message_id', sa.Text(), nullable=False),
+                    )
+    sa.Index('idx_msgid_bugid_commentid', bmap.c.message_id, bmap.c.bug_id, bmap.c.comment_id, unique=True)
+    sa.Index('idx_msgid_commentid', bmap.c.message_id, bmap.c.comment_id, unique=True)
+    sa.Index('idx_msgid_bugid', bmap.c.message_id, bmap.c.bug_id, unique=True)
+
+    recip = sa.Table('recipients', md,
+                     sa.Column('row_id', sa.Integer(), primary_key=True),
+                     sa.Column('bug_id', sa.Integer(), nullable=False),
+                     sa.Column('email', sa.Text(), nullable=False),
+                     )
+    sa.Index('idx_bugid_email', recip.c.bug_id, recip.c.email, unique=True)
+    md.create_all(engine)
+    q = sa.insert(meta).values(var_key='schema', var_value=str(__DBSCHEMA__))
+    dbconn.execute(q)
diff --git a/peebz/bz2pi.py b/peebz/bz2pi.py
new file mode 100644
index 0000000..b78a648
--- /dev/null
+++ b/peebz/bz2pi.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 by the Linux Foundation
+
+import argparse
+import peebz
+import datetime
+import re
+import b4
+
+import email.message
+
+from fnmatch import fnmatch
+
+logger = peebz.logger
+
+
+def process_new_comments(bid: int, dry_run: bool = False):
+    config = peebz.get_config()
+    cdatas = peebz.bz_get_newest_comments_for_bid(bid)
+    for cdata in cdatas:
+        # Check if we've already notified about this bug
+        cid = cdata['id']
+        try:
+            peebz.db_get_msgid_by_bid_cid(bid, cid)
+            logger.debug('Skipping, msgid match for bid=%s, cid=%s', bid, cid)
+            continue
+        except LookupError:
+            pass
+        # Check if the creator is in never_if_creator
+        skip = False
+        for mask in config['notify'].get('never_if_creator', list()):
+            if fnmatch(cdata['creator'], mask):
+                logger.debug('Skipping cid=%s because it matched never_if_creator=%s', cid, mask)
+                skip = True
+                break
+        if skip:
+            continue
+        # Check if text is in never_if_text_matches
+        for mask in config['notify'].get('never_if_text_matches', list()):
+            if fnmatch(cdata['text'], mask):
+                logger.debug('Skipping cid=%s because it matched never_if_text_matches=%s', cid, mask)
+                skip = True
+                break
+        if skip:
+            continue
+        clines = cdata['text'].strip().splitlines()
+        inre_cid = None
+        bodyvals = {
+            'bzname': config['bugzilla'].get('name'),
+            'bug_url': config['bugzilla'].get('bugmask', '').format(bug_id=bid),
+            'comment_url': config['bugzilla'].get('bugmask', '').format(bug_id=bid) + f"#c{cdata['count']}",
+            'comment_author': cdata['creator'],
+        }
+        if cdata['attachment_id']:
+            logger.info('Processing new attachment for bug_id=%s, comment_id=%s', bid, cid)
+            adata = peebz.bz_get_attachment_by_aid(
+                cdata['attachment_id'],
+                include_fields='file_name,size,content_type,summary,is_patch,is_private',
+            )
+            if adata['is_private']:
+                logger.debug('Skipping attachment marked private')
+                continue
+            bodytpt = peebz.get_template_by_bid('new_attachment_notify', bid)
+            bodyvals.update(adata)
+            bodyvals['attachment_url'] = config['bugzilla'].get('attachmask', '').format(
+                attachment_id=cdata['attachment_id'])
+        else:
+            logger.info('Processing new comment for bug_id=%s, comment_id=%s', bid, cid)
+            fline = clines[0]
+            matches = re.search(r'\(In reply to.*from comment #(\d+)', fline, flags=re.I)
+            if matches:
+                inre_count = int(matches.groups()[0])
+                try:
+                    inre_cid = peebz.bz_get_cid_by_bid_count(bid, inre_count)
+                except LookupError:
+                    pass
+            bodyvals['comment_text'] = '\n'.join(clines)
+            bodytpt = peebz.get_template_by_bid('new_comment_notify', bid)
+
+        msg = email.message.EmailMessage()
+        msg['Reply-To'] = b4.format_addrs([('', cdata['creator'])])
+        body = bodytpt.safe_substitute(bodyvals)
+        body = peebz.add_bot_signature(body)
+        msg.set_payload(body, charset='utf-8')
+        msgid = peebz.notify_bug(bid, cid, msg, inre_cid=inre_cid, dry_run=dry_run)
+        if msgid and not dry_run:
+            peebz.db_store_msgid_bid_cid(msgid, bid, cid)
+            peebz.db_store_recipients(bid, {cdata['creator']})
+            # TODO: This assumes that comments are always in incremental order
+            lastcheck = cdata['creation_time'].replace('T', ' ').rstrip('Z')
+            peebz.db_store_notify_last_check(bid, lastcheck)
+
+
+def main(cmdargs: argparse.Namespace) -> None:
+    now = datetime.datetime.utcnow()
+    lastrun = now.strftime('%Y-%m-%d %H:%M:%S')
+    try:
+        # Get all new bugs that changed since last run
+        since = peebz.db_get_meta_value('notify_last_run')
+    except LookupError:
+        logger.debug('Got a LookupError, getting everything for the past hour')
+        # Assume it's the first run and get changes for the past hour
+        hourago = datetime.datetime.utcnow() - datetime.timedelta(hours=1)
+        since = hourago.strftime('%Y-%m-%d %H:%M:%S')
+
+    # first, process all changed bugs that we're tracking
+    logger.info('Getting a list of changed bugs since %s', since)
+    buglist = peebz.bz_get_changed_bugs(since)
+    seen = set()
+    if buglist:
+        for bdata in buglist:
+            logger.debug('Looking at %s: %s', bdata['id'], bdata['summary'])
+            bid = bdata['id']
+            seen.add(bid)
+            process_new_comments(bid, dry_run=cmdargs.dry_run)
+    else:
+        logger.info('No changes to any tracked bugs')
+
+    # Now go by product/component and handle new bug queries if defined
+    config = peebz.get_config()
+    for bz_product, bz_components in config['components'].items():
+        for bz_component in bz_components.keys():
+            cconf = peebz.get_component_config(bz_product, bz_component)
+            qs = cconf.get('bz_new_bugs_quicksearch')
+            if not qs:
+                logger.debug('No quicksearch defined for %s/%s', bz_product, bz_component)
+                continue
+            logger.info('Querying matching quicksearch results since %s for %s/%s, qs=%s', since, bz_product,
+                        bz_component, qs)
+            params = {
+                'chfieldfrom': since,
+                'product': bz_product,
+                'component': bz_component,
+                'quicksearch': qs,
+            }
+            buglist = peebz.bz_get_query_bugs(params, exclude=seen)
+            if buglist:
+                logger.info('Processing %s matching quicksearch bugs', len(buglist))
+                for bid in buglist:
+                    seen.add(bid)
+                    process_new_comments(bid, dry_run=cmdargs.dry_run)
+            else:
+                logger.info('No changed bugs matching these parameters.')
+
+    if not cmdargs.dry_run:
+        peebz.db_store_meta_value(key='notify_last_run', value=lastrun)
diff --git a/peebz/command.py b/peebz/command.py
new file mode 100644
index 0000000..30046bd
--- /dev/null
+++ b/peebz/command.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2020 by the Linux Foundation
+#
+
+import sys
+import argparse
+import b4
+import logging
+import logging.handlers
+import peebz
+
+logger = peebz.logger
+
+
+def cmd_parse(cmdargs):
+    import peebz.parse
+    peebz.parse.main(cmdargs)
+
+
+def cmd_bz2pi(cmdargs):
+    import peebz.bz2pi
+    peebz.bz2pi.main(cmdargs)
+
+
+def cmd_pi2bz(cmdargs):
+    import peebz.pi2bz
+    peebz.pi2bz.main(cmdargs)
+
+
+def cmd_bzdump(cmdargs):
+    import json
+    from pygments import highlight, lexers, formatters
+    if cmdargs.bug_id:
+        rdata = peebz.bz_get_bug(cmdargs.bug_id, resolve_dupes=cmdargs.resolve_dupes)
+    elif cmdargs.username:
+        rdata = peebz.bz_get_user(cmdargs.username)
+    elif cmdargs.quicksearch:
+        rdata = peebz.bz_quicksearch_bugs(query=cmdargs.quicksearch)
+    else:
+        sys.exit(1)
+
+    jdata = json.dumps(rdata, sort_keys=True, indent=4)
+    colorized = highlight(jdata, lexers.JsonLexer(), formatters.TerminalFormatter())
+    print(colorized)
+
+
+def setup_parser() -> argparse.ArgumentParser:
+    # noinspection PyTypeChecker
+    parser = argparse.ArgumentParser(
+        prog='peebz',
+        description='A tool to bridge public-inbox collections with bugzilla',
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+    )
+    parser.add_argument('--version', action='version', version=peebz.__VERSION__)
+    parser.add_argument('-d', '--debug', action='store_true', default=False,
+                        help='Add more debugging info to the output')
+    parser.add_argument('-q', '--quiet', action='store_true', default=False,
+                        help='Output critical information only')
+    parser.add_argument('-c', '--config', required=True,
+                        help='config.toml to use')
+    parser.add_argument('--dry-run', action='store_true', default=False,
+                        help='Dry run, do not make any changes')
+
+    subparsers = parser.add_subparsers(help='sub-command help', dest='subcmd')
+    # parse : process RFC2822 messages passed on stdin (testing mode)
+    sp_parse = subparsers.add_parser('parse', help='Parse messages passed via stdin')
+    sp_parse.add_argument('--product', help='Force this product instead of guessing by recipient')
+    sp_parse.add_argument('--component', help='Force this component instead of guessing by recipient')
+    sp_parse.set_defaults(func=cmd_parse)
+
+    # pi2bz : query public-inbox to find the latest treads that interest us
+    sp_pi2bz = subparsers.add_parser('pi2bz', help='Query public-inbox sources for updates')
+    sp_pi2bz.add_argument('--product', help='Only run queries for this product')
+    sp_pi2bz.add_argument('--component', help='Only run queries for this component')
+    sp_pi2bz.set_defaults(func=cmd_pi2bz)
+
+    # bz2pi: query bugzilla and sends out any mail updates
+    sp_bz2pi = subparsers.add_parser('bz2pi', help='Send emails about bugzilla-originated changes')
+    sp_bz2pi.set_defaults(func=cmd_bz2pi)
+
+    # show  : command to show REST raw REST output
+    sp_bzdump = subparsers.add_parser('bzdump', help='Show colorized raw REST output from bugzilla API')
+    sp_bzdump.add_argument('-b', '--bug-id', type=int, help='Bug to show')
+    sp_bzdump.add_argument('-c', '--comment-id', help='Comment to show')
+    sp_bzdump.add_argument('-a', '--attachment-id', help='Attachment to show')
+    sp_bzdump.add_argument('-u', '--username', help='User to show')
+    sp_bzdump.add_argument('-q', '--quicksearch', help='Quicksearch query to run')
+
+    sp_bzdump.add_argument('--resolve-dupes', action='store_true', help='Resolve dupes')
+    sp_bzdump.set_defaults(func=cmd_bzdump)
+
+    return parser
+
+
+def cmd():
+    parser = setup_parser()
+    cmdargs = parser.parse_args()
+    logger.setLevel(logging.DEBUG)
+
+    ch = logging.StreamHandler()
+    formatter = logging.Formatter('%(message)s')
+    ch.setFormatter(formatter)
+
+    if cmdargs.quiet:
+        ch.setLevel(logging.CRITICAL)
+    elif cmdargs.debug:
+        ch.setLevel(logging.DEBUG)
+    else:
+        ch.setLevel(logging.INFO)
+
+    logger.addHandler(ch)
+
+    if 'func' not in cmdargs:
+        parser.print_help()
+        sys.exit(1)
+
+    with open(cmdargs.config, 'rb') as fh:
+        try:
+            import tomllib  # noqa
+            peebz.CONFIG = tomllib.load(fh)
+        except ModuleNotFoundError:
+            import tomli  # noqa
+            peebz.CONFIG = tomli.load(fh)
+
+    try:
+        logfile = peebz.CONFIG['logging']['logfile']
+        flh = logging.handlers.WatchedFileHandler(logfile)
+        fmt = '[%(process)d] %(asctime)s - ' + cmdargs.subcmd + ': %(message)s'
+        flh.setFormatter(logging.Formatter(fmt))
+        loglevel = peebz.CONFIG['logging'].get('loglevel', 'info')
+        if loglevel == 'debug':
+            flh.setLevel(logging.DEBUG)
+        else:
+            flh.setLevel(logging.INFO)
+        logger.addHandler(flh)
+    except KeyError:
+        # No file logging for you
+        pass
+
+    cmdargs.func(cmdargs)
+
+
+if __name__ == '__main__':
+    import os
+    # noinspection PyBroadException
+    try:
+        if peebz.__VERSION__.endswith('-dev'):
+            base = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+            dotgit = os.path.join(base, '.git')
+            lines = b4.git_get_command_lines(dotgit, ['rev-parse', '--short', 'HEAD'])
+            if lines:
+                peebz.__VERSION__ = '%s-%.5s' % (peebz.__VERSION__, lines[0].strip())
+    except Exception as ex:
+        pass
+    cmd()
diff --git a/peebz/parse.py b/peebz/parse.py
new file mode 100644
index 0000000..07de9e5
--- /dev/null
+++ b/peebz/parse.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 by the Linux Foundation
+
+import sys
+import argparse
+import peebz
+import b4
+import re
+
+from typing import Tuple, Dict
+
+import email.message
+import email.utils
+
+logger = peebz.logger
+
+
+def new_bug_notification(bid: int, inre_cid: int, dry_run: bool = False):
+    msg = email.message.EmailMessage()
+    config = peebz.get_config()
+    bodyvals = {
+        'bzname': config['bugzilla'].get('name'),
+        'bug_url': config['bugzilla'].get('bugmask', '').format(bug_id=bid),
+    }
+    bodytpt = peebz.get_template_by_bid('new_bug_notify', bid)
+    body = bodytpt.safe_substitute(bodyvals)
+    sigtpt = peebz.get_template_by_bid('botsig', bid)
+    sigvals = {
+        'myname': config['bugzilla'].get('name'),
+        'appname': peebz.__APPNAME__,
+        'appver': peebz.__VERSION__,
+    }
+    body += sigtpt.safe_substitute(sigvals)
+    msg.set_payload(body, charset='utf-8')
+    peebz.notify_bug(bid, None, msg, inre_cid=inre_cid, dry_run=dry_run)
+
+
+def make_bug_desc_from_body(product: str, component: str, body: str, vals: Dict) -> str:
+    if 'comment_count' in vals and vals['comment_count']:
+        tpt_intro = peebz.get_template_by_product_component('parse_bug_intro_with_count', product, component)
+    else:
+        tpt_intro = peebz.get_template_by_product_component('parse_bug_intro', product, component)
+    tpt_outro = peebz.get_template_by_product_component('parse_bug_outro', product, component)
+    desc = ''
+    intro = tpt_intro.safe_substitute(vals)
+    if intro:
+        desc = intro + '\n\n'
+    desc += body.strip()
+    outro = tpt_outro.safe_substitute(vals)
+    if outro:
+        desc += '\n\n' + outro
+    desc += '\n'
+
+    return desc
+
+
+def new_bug_from_msg(msg: email.message.EmailMessage, product: str, component: str,
+                     dry_run: bool = False) -> Tuple[int, int]:
+    msgid, author, subject, body, atts = peebz.msg_parse_for_bug(msg)
+    payload = peebz.get_newbug_payload_by_product_component(product, component)
+    summary = re.sub(r'^\s*(Re|Fwd):\s*', '', subject)
+    vals = {
+        'author': b4.format_addrs([author]),
+        'msgid_link': peebz.get_msgid_link(msgid),
+    }
+    desc = make_bug_desc_from_body(product, component, body, vals)
+
+    payload['summary'] = summary
+    payload['description'] = desc
+    if not dry_run:
+        bid, cid = peebz.bz_add_new_bug(payload)
+        logger.debug('new bug bid=%s, cid=%s', bid, cid)
+        recipients = peebz.msg_get_recipients(msg)
+        peebz.db_store_recipients(bid, recipients)
+        if atts:
+            peebz.bz_add_atts_to_bug(bid, atts)
+    else:
+        logger.info('--- DRY RUN ---')
+        logger.info('Would have created a new bug in %s/%s:', product, component)
+        logger.info('Summary: %s', payload['summary'])
+        logger.info('Description:')
+        logger.info(payload['description'])
+        bid = cid = None
+
+    return bid, cid
+
+
+def new_comment_from_msg(bid: int, cid: int, msg: email.message.EmailMessage, dry_run: bool = False) -> int:
+    msgid, author, subject, body, atts = peebz.msg_parse_for_bug(msg)
+    vals = {
+        'author': b4.format_addrs([author]),
+        'msgid_link': peebz.get_msgid_link(msgid),
+    }
+    if cid:
+        try:
+            vals['comment_count'] = peebz.bz_get_count_by_bid_cid(bid, cid)
+        except LookupError:
+            pass
+    product, component = peebz.bz_get_product_component_by_bid(bid)
+
+    desc = make_bug_desc_from_body(product, component, body, vals)
+
+    if not dry_run:
+        cid = peebz.bz_add_new_comment(bid, desc)
+        recipients = peebz.msg_get_recipients(msg)
+        peebz.db_store_recipients(bid, recipients)
+        if atts:
+            peebz.bz_add_atts_to_bug(bid, atts)
+    else:
+        logger.info('--- DRY RUN ---')
+        logger.info('Would have added this comment to %s', bid)
+        logger.info(desc)
+        cid = None
+    return cid
+
+
+def process_rfc2822(msg: email.message.EmailMessage, product: str, component: str,
+                    dry_run: bool = False) -> None:
+    # Ignore any messages that have an X-Bugzilla-Product header,
+    # so we don't get into any loops
+    if msg.get('x-bugzilla-product'):
+        logger.debug('Skipping bugzilla-originating message')
+        return
+
+    cconf = peebz.get_component_config(product, component)
+    # Get the message-id
+    msgid = b4.LoreMessage.get_clean_msgid(msg)
+    try:
+        # If we have this exact msgid, then it's a dupe
+        bid, cid = peebz.db_get_bid_cid_by_msgid(msgid)
+        logger.info('Already recorded as bid=%s, cid=%s', bid, cid)
+        return
+    except LookupError:
+        pass
+
+    # Walk through references and in-reply-to and see if we know any of them
+    bid = cid = None
+    try:
+        bid, cid = peebz.msg_get_inre_bid_cid(msg)
+    except LookupError:
+        pass
+
+    if bid:
+        bdata = peebz.bz_get_bug(bid)
+        if not bdata['is_open']:
+            logger.info('Bug %s is closed, not adding comments', bid)
+            sys.exit(0)
+
+        cid = new_comment_from_msg(bid, cid, msg, dry_run=dry_run)
+        if not dry_run:
+            peebz.db_store_msgid_bid_cid(msgid, bid, cid)
+    else:
+        bid, cid = new_bug_from_msg(msg, product, component, dry_run=dry_run)
+        if not dry_run:
+            peebz.db_store_msgid_bid_cid(msgid, bid, cid)
+            if cconf.get('new_bug_send_notification'):
+                new_bug_notification(bid, cid, dry_run=dry_run)
+
+    # Do we have any assign triggers?
+    assign_re = cconf.get('pi_assign_regex')
+    if assign_re:
+        matches = re.search(assign_re, msg.as_string(), flags=re.I | re.M)
+        if matches:
+            author = peebz.msg_get_author(msg)
+            fromaddr = author[1]
+            if peebz.bz_check_user_allowed(fromaddr, product, component):
+                assign_to = matches.groups()[0]
+                if assign_to == 'me':
+                    logger.debug('me=%s', fromaddr)
+                    assign_to = fromaddr
+                # Does this user exist?
+                try:
+                    peebz.bz_get_user(assign_to)
+                    if not dry_run:
+                        peebz.bz_assign_bug(bid, assign_to)
+                    else:
+                        logger.debug('---DRY RUN---')
+                        logger.debug('Would have assigned bid=%s to %s', bid, assign_to)
+
+                except LookupError:
+                    logger.info('Unable to assign %s to %s: no such user', bid, assign_to)
+
+
+def main(cmdargs: argparse.Namespace) -> None:
+    msg = peebz.get_msg_from_stdin()
+    product = cmdargs.product
+    component = cmdargs.component
+    if not (product and component):
+        recipients = peebz.msg_get_recipients(msg)
+        try:
+            product, component = peebz.get_product_component_by_recipients(recipients)
+        except LookupError as ex:
+            # TODO: fail properly here
+            logger.info(str(ex))
+            sys.exit(1)
+    process_rfc2822(msg, product, component, dry_run=cmdargs.dry_run)
diff --git a/peebz/pi2bz.py b/peebz/pi2bz.py
new file mode 100644
index 0000000..36f07d3
--- /dev/null
+++ b/peebz/pi2bz.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 by the Linux Foundation
+
+import argparse
+import peebz
+import peebz.parse
+import b4
+import urllib.parse
+import email.message
+import gzip
+import datetime
+
+from typing import List, Set
+
+logger = peebz.logger
+b4.logger = logger
+# force b4 to use EmailMessage factory
+b4.emlpolicy = peebz.emlpolicy
+
+
+def get_query_results(query_url: str) -> List:
+    loc = urllib.parse.urlparse(query_url)
+    logger.debug('query=%s', query_url)
+    logger.debug('grabbing search results from %s', loc.netloc)
+    session = peebz.get_requests_session()
+    # For the query to retrieve a mbox file, we need to send a POST request
+    resp = session.post(query_url, data='')
+    if resp.status_code == 404:
+        raise LookupError('Nothing matching query=%s', query_url)
+    if resp.status_code != 200:
+        raise LookupError('Server returned an error for %s: %s' % (query_url, resp.status_code))
+    t_mbox = gzip.decompress(resp.content)
+    resp.close()
+    if not len(t_mbox):
+        raise LookupError('Nothing matching query=%s', query_url)
+    return b4.split_and_dedupe_pi_results(t_mbox)
+
+
+def get_sorted_thread(url: str, msgid: str) -> List[email.message.EmailMessage]:
+    loc = urllib.parse.urlparse(url)
+    mbox_url = url.rstrip('/') + '/' + urllib.parse.quote_plus(msgid) + '/t.mbox.gz'
+    logger.debug('mbox_url=%s', mbox_url)
+    logger.debug('grabbing thread from %s', loc.netloc)
+    session = peebz.get_requests_session()
+    resp = session.get(mbox_url)
+    if resp.status_code == 404:
+        raise LookupError('Nothing matching mbox_url=%s', mbox_url)
+    if resp.status_code != 200:
+        raise LookupError('Server returned an error for %s: %s' % (mbox_url, resp.status_code))
+    t_mbox = gzip.decompress(resp.content)
+    resp.close()
+
+    deduped = b4.split_and_dedupe_pi_results(t_mbox)
+    if not deduped:
+        raise LookupError('No messages matching mbox_url=%s' % mbox_url)
+    strict = b4.get_strict_thread(deduped, msgid)
+    return peebz.sort_msgs_by_received(strict)
+
+
+def get_new_msgs(msgs: List[email.message.EmailMessage]) -> List[email.message.EmailMessage]:
+    new_msgs = list()
+    for msg in msgs:
+        msgid = b4.LoreMessage.get_clean_msgid(msg)
+        try:
+            peebz.db_get_bid_cid_by_msgid(msgid)
+            continue
+        except LookupError:
+            new_msgs.append(msg)
+
+    return new_msgs
+
+
+def get_tracked_bug_msgids(product: str, component: str) -> Set[str]:
+    cconf = peebz.get_component_config(product, component)
+    params = {
+        'include_fields': 'id',
+        'product': product,
+        'component': component,
+        'quicksearch': 'OPEN',
+        'chfieldfrom': '90d',
+    }
+    params.update(cconf.get('bz_query_params', dict()))
+    rdata = peebz.bz_rest('bug', params=params)
+    msgids = set()
+    for bdata in rdata.get('bugs', list()):
+        bid = bdata['id']
+        try:
+            msgid = peebz.db_get_msgid_by_bid_cid(bid, None)
+            logger.debug('bid=%s is tracked as msgid=%s', bid, msgid)
+            msgids.add(msgid)
+        except LookupError:
+            logger.debug('Not tracking bid=%s', bid)
+
+    return msgids
+
+
+def update_component(product: str, component: str, dry_run: bool = False):
+    logger.info('Running pi2bz for %s/%s, dry_run=%s', product, component, dry_run)
+    cconf = peebz.get_component_config(product, component)
+    tracked = get_tracked_bug_msgids(product, component)
+    url = cconf.get('pi_url').rstrip('/')
+    now = datetime.datetime.utcnow()
+
+    seen_msgids = set()
+    updates = list()
+    if len(tracked):
+        logger.info('Checking for updates in %s tracked threads', len(tracked))
+        for msgid in tracked:
+            try:
+                tmsgs = get_sorted_thread(url, msgid)
+            except LookupError:
+                logger.debug('No results returned for msgid=%s', msgid)
+                continue
+
+            for tmsg in tmsgs:
+                tmsgid = b4.LoreMessage.get_clean_msgid(tmsg)
+                if tmsgid in seen_msgids:
+                    logger.debug('Already seen %s', tmsgid)
+                    continue
+                seen_msgids.add(tmsgid)
+                try:
+                    peebz.db_get_bid_cid_by_msgid(tmsgid)
+                    logger.debug('%s has already been processed', tmsgid)
+                    continue
+                except LookupError:
+                    logger.debug('New message in tracked thread: %s', tmsgid)
+                    updates.append(tmsg)
+
+    # Now grab the latest query matches
+    query = cconf.get('pi_query')
+    if query:
+        logger.info('Running query for %s/%s', product, component)
+        try:
+            last_check = peebz.db_get_query_last_check(product, component)
+            query += f' AND dt:{last_check}..'
+        except LookupError:
+            pass
+        qquery = urllib.parse.quote_plus(query)
+        query_url = url.rstrip('/') + f'/?x=m&q={qquery}'
+        lastdt = now.strftime('%Y%m%d%H%M%S')
+        try:
+            msgs = get_query_results(query_url)
+            for msg in msgs:
+                msgid = b4.LoreMessage.get_clean_msgid(msg)
+                if msgid in seen_msgids:
+                    logger.debug('Already seen %s', msgid)
+                    continue
+
+                # New thing to track!
+                seen_msgids.add(msgid)
+                author = peebz.msg_get_author(msg)
+                fromaddr = author[1]
+                if not peebz.bz_check_user_allowed(fromaddr, product, component):
+                    logger.debug('skipping msg %s', msg.get('Subject'))
+                    continue
+                # Retrieve and queue up the entire thread
+                try:
+                    tmsgs = get_sorted_thread(url, msgid)
+                except LookupError:
+                    logger.debug('No results returned for msgid=%s', msgid)
+                    continue
+                for tmsg in tmsgs:
+                    tmsgid = b4.LoreMessage.get_clean_msgid(tmsg)
+                    seen_msgids.add(tmsgid)
+                    updates.append(tmsg)
+
+        except LookupError:
+            logger.info('No new results for product=%s, component=%s', product, component)
+
+        if not dry_run:
+            peebz.db_store_query_last_check(product, component, lastdt)
+
+    if not updates:
+        logger.info('No new messages to add to bugzilla for %s/%s', product, component)
+        return
+
+    for msg in updates:
+        logger.debug('Recording %s', msg.get('Subject'))
+        peebz.parse.process_rfc2822(msg, product, component, dry_run=dry_run)
+
+
+def main(cmdargs: argparse.Namespace):
+    config = peebz.get_config()
+    # Iterate all components
+    for bz_product, bz_components in config['components'].items():
+        for bz_component in bz_components.keys():
+            update_component(bz_product, bz_component, dry_run=cmdargs.dry_run)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..cab6f85
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+b4>=0.12.2,<1.0
+Pygments>=2.14.0,<3.0
+requests>=2.28
+sqlalchemy>=1.4,<2.0
+tomli>=2.0; python_version < '3.11'
author	Konstantin Ryabitsev <konstantin@linuxfoundation.org>	2023-03-30 16:33:30 -0400
committer	Konstantin Ryabitsev <konstantin@linuxfoundation.org>	2023-03-30 16:33:30 -0400
commit	5b903b6d1469d211a19966cf893972111fec280d (patch)
tree	4039232cc32168e205d207031b0a0f528f2fef86
download	bugspray-5b903b6d1469d211a19966cf893972111fec280d.tar.gz