aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorKonstantin Ryabitsev <konstantin@linuxfoundation.org>2023-03-30 16:33:30 -0400
committerKonstantin Ryabitsev <konstantin@linuxfoundation.org>2023-03-30 16:33:30 -0400
commit5b903b6d1469d211a19966cf893972111fec280d (patch)
tree4039232cc32168e205d207031b0a0f528f2fef86
downloadbugspray-5b903b6d1469d211a19966cf893972111fec280d.tar.gz
Initial commit
This is the initial commit of "minimal viable product" as startups like to say. Documentation is going to be forthcoming -- for now we're running initial burn-in tests and finding the largest bugs. Signed-off-by: Konstantin Ryabitsev <konstantin@linuxfoundation.org>
-rw-r--r--.gitignore14
-rw-r--r--default.config.toml80
-rwxr-xr-xpeebz.sh9
-rw-r--r--peebz/__init__.py904
-rw-r--r--peebz/bz2pi.py148
-rw-r--r--peebz/command.py157
-rw-r--r--peebz/parse.py198
-rw-r--r--peebz/pi2bz.py189
-rw-r--r--requirements.txt5
9 files changed, 1704 insertions, 0 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..5e5f255
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,14 @@
+*.swp
+*.pyc
+*.pyo
+*.json
+*.pdf
+test.log
+build/*
+dist/*
+MANIFEST
+.idea
+__pycache__
+*.egg-info
+*.sqlite3
+.venv
diff --git a/default.config.toml b/default.config.toml
new file mode 100644
index 0000000..a490f19
--- /dev/null
+++ b/default.config.toml
@@ -0,0 +1,80 @@
+[db]
+dburl = 'sqlite:////home/user/work/temp/peebz.sqlite3'
+
+[notify]
+neverto = ['*@kernel-bugs.*', 'bugbot@kernel.org']
+never_if_creator = ['bugbot@kernel.org']
+never_if_text_matches = ['*has been marked as a duplicate of*']
+fromaddr = 'Kernel.org Bugbot <bugbot@kernel.org>'
+smtpserver = 'localhost'
+smtpport = 25
+
+[bugzilla]
+apikey = '<omitted>'
+name = 'Kernel.org Bugzilla'
+url = 'https://bugzilla.kernel.org'
+resturl = 'https://bugzilla.kernel.org/rest'
+bugmask = 'https://bugzilla.kernel.org/show_bug.cgi?id={bug_id}'
+commentmask = 'https://bugzilla.kernel.org/show_bug.cgi?id={bug_id}#c{comment_count}'
+attachmask = 'https://bugzilla.kernel.org/attachment.cgi?id={attachment_id}'
+linkmask = 'https://msgid.link/{msgid}'
+subsystem_cf = 'cf_subsystem'
+maintainers_url = 'https://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git/plain/MAINTAINERS'
+
+[mimetypes]
+deny = [ 'text/html', 'application/*-signature' ]
+
+[logging]
+logfile = 'peebz.log'
+loglevel = 'info'
+
+[components.'Linux'.'Kernel']
+new_bug_send_notification = true
+pi_query = '(nq:"bugbot on" OR nq:"bugbot assign")'
+pi_must_bz_groups = ['editbugs']
+pi_url = 'https://lore.kernel.org/all/'
+pi_assign_regex = '^bugbot assign to (\S+)'
+bz_new_bugs_quicksearch = 'OPEN flag:bugbot+'
+alwayscc = ['bugs@lists.linux.dev']
+
+[templates]
+parse_bug_intro = '${author} writes:'
+parse_bug_intro_with_count = '${author} replies to comment #${comment_count}:'
+parse_bug_outro = '(via ${msgid_link})'
+
+new_bug_notify = '''
+Hello:
+
+This conversation is now tracked by ${bzname}:
+${bug_url}
+
+There is no need to do anything else, just keep talking.
+'''
+
+new_comment_notify = '''
+${comment_author} writes via ${bzname}:
+
+${comment_text}
+
+View: ${comment_url}
+You can reply to this message to join the discussion.
+'''
+
+new_attachment_notify = '''
+${comment_author} added a new attachment via ${bzname}.
+You can download it by following the link below.
+
+File: ${file_name} (${content_type})
+Size: ${human_size}
+Link: ${attachment_url}
+---
+${summary}
+
+You can reply to this message to join the discussion.
+'''
+
+botsig = '''
+--
+Deet-doot-dot, I am a bot.
+${myname} (${appname} ${appver})
+'''
diff --git a/peebz.sh b/peebz.sh
new file mode 100755
index 0000000..f3809dc
--- /dev/null
+++ b/peebz.sh
@@ -0,0 +1,9 @@
+#!/usr/bin/env bash
+#
+# Run from a git checkout.
+#
+
+REAL_SCRIPT=$(realpath -e ${BASH_SOURCE[0]})
+SCRIPT_TOP="${SCRIPT_TOP:-$(dirname ${REAL_SCRIPT})}"
+
+exec env PYTHONPATH="${SCRIPT_TOP}" python3 "${SCRIPT_TOP}/peebz/command.py" "${@}"
diff --git a/peebz/__init__.py b/peebz/__init__.py
new file mode 100644
index 0000000..f2d2024
--- /dev/null
+++ b/peebz/__init__.py
@@ -0,0 +1,904 @@
+#!/usr/bin/env python3
+import email
+import email.message
+import email.utils
+import email.parser
+import requests
+import logging
+import base64
+import b4
+import sys
+import uuid
+import datetime
+
+import urllib.parse
+
+import sqlalchemy as sa
+
+from typing import Dict, Tuple, List, Set, Optional
+from fnmatch import fnmatch
+from string import Template
+
+from sqlalchemy.exc import NoSuchTableError
+
+# Policy we use for saving mail locally
+emlpolicy = email.policy.EmailPolicy(utf8=True, cte_type='8bit', max_line_length=None,
+ message_factory=email.message.EmailMessage)
+
+REQSESSION = None
+CONFIG = dict()
+
+__APPNAME__ = 'peebz'
+__VERSION__ = '0.1'
+
+__DBSCHEMA__ = 1
+
+logger = logging.getLogger(__APPNAME__)
+b4.logger = logger
+
+SACONN = None
+SAENGINE = None
+REST_CACHE = dict()
+MAINT_CACHE = dict()
+
+
+def get_msg_from_stdin() -> Optional[email.message.EmailMessage]:
+ if not sys.stdin.isatty():
+ msg = email.parser.BytesParser(policy=emlpolicy).parsebytes(sys.stdin.buffer.read())
+ return msg # noqa
+ return None
+
+
+def get_requests_session():
+ global REQSESSION
+ if REQSESSION is None:
+ REQSESSION = requests.session()
+ REQSESSION.headers.update({'User-Agent': f'{__APPNAME__}/{__VERSION__}'})
+ return REQSESSION
+
+
+def get_config() -> Dict:
+ return CONFIG
+
+
+def get_component_config(product: str, component: str) -> Dict:
+ config = get_config()
+ return config['components'][product][component]
+
+
+def get_template_by_bid(tptname: str, bid: int) -> Template:
+ product, component = bz_get_product_component_by_bid(bid)
+ return get_template_by_product_component(tptname, product, component)
+
+
+def get_template_by_product_component(tptname: str, product: str, component: str) -> Template:
+ config = get_config()
+ try:
+ return Template(config['components'][product][component]['templates'][tptname])
+ except KeyError:
+ pass
+ return Template(config['templates'][tptname])
+
+
+def get_msgid_link(msgid: str) -> str:
+ config = get_config()
+ linkmask = config['bugzilla'].get('linkmask', 'https://lore.kernel.org/{msgid}')
+ return linkmask.format(msgid=msgid)
+
+
+def bz_rest(path: str, payload: Dict = None, params: Dict = None, method: str = 'GET') -> Dict:
+ global REST_CACHE
+ # We only cache GETs without any params
+ if method == 'GET' and not params and path in REST_CACHE:
+ logger.debug('Using cached data')
+ return REST_CACHE[path]
+
+ config = get_config()
+ url = '{BZURL}/{path}'.format(BZURL=config['bugzilla'].get('resturl').rstrip('/'), path=path)
+ logger.debug('Querying url=%s', url)
+ myparams = dict() if params is None else dict(params)
+ myparams['api_key'] = config['bugzilla'].get('apikey', '')
+ ses = get_requests_session()
+ if method == 'GET':
+ res = ses.get(url, params=myparams)
+ elif method == 'POST':
+ res = ses.post(url, params=myparams, json=payload)
+ elif method == 'PUT':
+ res = ses.put(url, params=myparams, json=payload)
+ else:
+ logger.critical('CRITICAL: Unknown method=%s', method)
+ raise RuntimeError('Unknown method %s' % method)
+
+ # logger.debug('res=%s', res.text)
+ if res.status_code == 404:
+ raise LookupError('Bugzilla returned 404: %s' % res.text)
+
+ # for every other error, we just BT for now
+ res.raise_for_status()
+ rdata = res.json()
+ if method == 'GET' and not params:
+ # Cache it
+ REST_CACHE[path] = rdata
+ elif method != 'GET':
+ # We changed something, so nuke our cache
+ logger.debug('Clearing cache')
+ REST_CACHE = dict()
+
+ return rdata
+
+
+def bz_add_atts_to_bug(bid: int, atts: List[Dict]) -> List[int]:
+ aids = list()
+ for att in atts:
+ att['ids'] = [bid]
+ rdata = bz_rest(f'bug/{bid}/attachment', att, method='POST')
+ logger.debug('Created new attachment %s', ', '.join(rdata['ids']))
+ aids += rdata['ids']
+ return aids
+
+
+def bz_add_new_bug(payload: Dict) -> Tuple[int, int]:
+ if 'version' not in payload:
+ payload['version'] = 'unspecified'
+ rdata = bz_rest('bug', payload, method='POST')
+ bid = rdata['id']
+ logger.debug('Created new bug %s', bid)
+ # Apparently, we don't get comment-id info from new bug creation
+ cid = bz_get_cid_by_bid_count(bid)
+ return bid, cid
+
+
+def bz_add_new_comment(bid: int, comment: str) -> int:
+ payload = {
+ 'id': bid,
+ 'comment': comment
+ }
+ rdata = bz_rest(f'bug/{bid}/comment', payload, method='POST')
+ cid = rdata['id']
+ logger.debug('Created new comment %s', cid)
+ return cid
+
+
+def bz_get_bug(bid: int, resolve_dupes=False) -> Dict:
+ if resolve_dupes:
+ bid = bz_dedupe_bid(bid)
+
+ path = f'bug/{bid}'
+ rdata = bz_rest(path)
+ for bdata in rdata['bugs']:
+ if bdata['id'] == bid:
+ return bdata
+ raise RuntimeError('Could not get bug info for %s' % bid)
+
+
+def bz_get_user(username: str) -> Dict:
+ path = f'user/{username}'
+ try:
+ rdata = bz_rest(path)
+ for udata in rdata['users']:
+ if udata['name'] == username:
+ return udata
+ except LookupError:
+ pass
+
+ raise LookupError('Could not get user info for %s' % username)
+
+
+def bz_get_user_groups(username: str) -> Set:
+ udata = bz_get_user(username)
+ return set([x['name'] for x in udata['groups']])
+
+
+def bz_get_cid_by_bid_count(bid: int, count: int = 0) -> int:
+ bdata = bz_rest(f'bug/{bid}/comment')
+ for rbid, rbdata in bdata['bugs'].items():
+ if int(rbid) != bid:
+ continue
+ for comment in rbdata['comments']:
+ if comment['count'] == count:
+ logger.debug('cid for %s/c%s is %s', bid, count, comment['id'])
+ return comment['id']
+ raise LookupError('No cid matching bid=%s count=%s' % (bid, count))
+
+
+def bz_get_count_by_bid_cid(bid: int, cid: int) -> int:
+ bdata = bz_rest(f'bug/{bid}/comment')
+ for rbid, rbdata in bdata['bugs'].items():
+ if int(rbid) != bid:
+ continue
+ for comment in rbdata['comments']:
+ if comment['id'] == cid:
+ logger.debug('count for %s/%s is c%s', bid, cid, comment['count'])
+ return comment['count']
+ raise LookupError('No match for bid=%s cid=%s', bid, cid)
+
+
+def bz_dedupe_bid(bid: int) -> int:
+ bdata = bz_get_bug(bid)
+ if bdata.get('dupe_of'):
+ # Nothing wrong with recursion
+ return bz_dedupe_bid(bdata['dupe_of'])
+ return bid
+
+
+def bz_check_user_allowed(uid: str, product: str, component: str) -> bool:
+ cconf = get_component_config(product, component)
+ mustgroups = cconf.get('pi_must_bz_groups')
+ if mustgroups is None:
+ # No restrictions, anyone can do anything they like
+ return True
+ try:
+ udata = bz_get_user(uid)
+ except LookupError:
+ logger.debug('Could not find user %s in bugzilla', uid)
+ return False
+
+ for mustgroup in mustgroups:
+ for ugroup in udata.get('groups', list()):
+ if ugroup['name'] == mustgroup:
+ logger.debug('%s mustgroup matches %s', uid, mustgroup)
+ return True
+
+ logger.debug('%s not member of %s', uid, mustgroups)
+ return False
+
+
+def bz_assign_bug(bid: int, uid: str) -> None:
+ logger.info('Assigning bug %s to %s', bid, uid)
+ path = f'bug/{bid}'
+ payload = {
+ 'assigned_to': uid,
+ }
+ bz_rest(path, payload=payload, method='PUT')
+
+
+def db_get_query_last_check(product: str, component: str) -> str:
+ key = f'query_{product}_{component}'
+ return db_get_meta_value(key)
+
+
+def db_store_query_last_check(product: str, component: str, last_check: str) -> None:
+ key = f'query_{product}_{component}'
+ return db_store_meta_value(key, last_check)
+
+
+def db_get_msgid_by_bid_cid(bid: int, cid: Optional[int]) -> str:
+ engine, dbconn = db_get_sa()
+ md = sa.MetaData()
+ t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine)
+ if cid:
+ q = sa.select([t_bmap.c.message_id]).where(t_bmap.c.bug_id == bid, t_bmap.c.comment_id == cid)
+ else:
+ # If cid is not defined, we get all mappings and use the lowest cid
+ q = sa.select([t_bmap.c.message_id]).where(t_bmap.c.bug_id == bid).order_by(t_bmap.c.comment_id)
+
+ rp = dbconn.execute(q)
+ fa = rp.fetchall()
+ if len(fa):
+ logger.debug('query results for bid=%s, cid=%s: %s', bid, cid, fa)
+ return fa[0][0]
+
+ raise LookupError('No message-id matching bid=%s, cid=%s' % (bid, cid))
+
+
+def db_get_bid_cid_by_msgid(msgid: str) -> Tuple[int, Optional[int]]:
+ engine, dbconn = db_get_sa()
+ md = sa.MetaData()
+ t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine)
+ logger.debug('Querying db for msgid=%s', msgid)
+ q = sa.select([t_bmap.c.bug_id, t_bmap.c.comment_id]).where(t_bmap.c.message_id == msgid)
+ rp = dbconn.execute(q)
+ fa = rp.fetchall()
+ if not len(fa):
+ raise LookupError('msgid %s not known' % msgid)
+ bid, cid = fa[0]
+ dbid = bz_dedupe_bid(bid)
+ if dbid != bid:
+ cid = None
+ logger.debug(' matching bid=%s, cid=%s', bid, cid)
+ return bid, cid
+
+
+def db_store_msgid_bid_cid(msgid: str, bid: int, cid: int) -> None:
+ msgid = msgid.strip('<>')
+ engine, dbconn = db_get_sa()
+ md = sa.MetaData()
+ t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine)
+ q = sa.insert(t_bmap).values(message_id=msgid, bug_id=bid, comment_id=cid)
+ dbconn.execute(q)
+ logger.info('Created new mapping for %s: %s/%s', msgid, bid, cid)
+
+
+def db_get_recipients(bid: int) -> Set[str]:
+ engine, dbconn = db_get_sa()
+ md = sa.MetaData()
+ t_recip = sa.Table('recipients', md, autoload=True, autoload_with=engine)
+ logger.debug('Querying recipients for bid=%s', bid)
+ q = sa.select([t_recip.c.email]).where(t_recip.c.bug_id == bid)
+ rp = dbconn.execute(q)
+ fa = rp.fetchall()
+ if not len(fa):
+ raise LookupError('bid %s not known' % bid)
+ return set(x[0] for x in fa)
+
+
+def db_store_recipients(bid: int, recipients: Set[str]) -> None:
+ # TODO: add ability to unsubscribe?
+ try:
+ stored = db_get_recipients(bid)
+ except LookupError:
+ stored = set()
+ # Any new ones to store?
+ extras = recipients - stored
+ if not extras:
+ return
+
+ engine, dbconn = db_get_sa()
+ md = sa.MetaData()
+ t_recip = sa.Table('recipients', md, autoload=True, autoload_with=engine)
+ logger.debug('Storing new recipients for bid=%s', bid)
+ for addr in extras:
+ q = sa.insert(t_recip).values(bug_id=bid, email=addr)
+ dbconn.execute(q)
+ logger.debug(' Added %s', addr)
+ return
+
+
+def db_get_meta_value(key: str) -> str:
+ engine, dbconn = db_get_sa()
+ md = sa.MetaData()
+ t_meta = sa.Table('meta', md, autoload=True, autoload_with=engine)
+ q = sa.select([t_meta.c.var_value]).where(t_meta.c.var_key == key)
+ rp = dbconn.execute(q)
+ fa = rp.fetchall()
+ if not len(fa):
+ raise LookupError('meta key %s not known' % key)
+ return fa[0][0]
+
+
+def db_store_meta_value(key: str, value: str) -> None:
+ engine, dbconn = db_get_sa()
+ md = sa.MetaData()
+ t_meta = sa.Table('meta', md, autoload=True, autoload_with=engine)
+ with engine.begin() as dbconn:
+ q = sa.delete(t_meta).where(t_meta.c.var_key == key)
+ dbconn.execute(q)
+ q = sa.insert(t_meta).values(var_key=key, var_value=value)
+ dbconn.execute(q)
+
+
+def db_store_notify_last_check(bid: int, when: str):
+ key = f'notify_bug_{bid}'
+ return db_store_meta_value(key, when)
+
+
+def db_get_notify_last_check(bid: int) -> str:
+ key = f'notify_bug_{bid}'
+ return db_get_meta_value(key)
+
+
+def bz_get_changed_bugs(since: str, include_untracked: bool = False) -> List:
+ logger.debug('Querying for changed bugs since %s', since)
+ params = {
+ 'chfieldfrom': since,
+ 'include_fields': 'id,summary',
+ }
+ rdata = bz_rest('bug', params=params)
+ if include_untracked:
+ return rdata['bugs']
+ bids = tuple([x['id'] for x in rdata['bugs']])
+ engine, dbconn = db_get_sa()
+ md = sa.MetaData()
+ t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine)
+ q = sa.select([t_bmap.c.bug_id]).where(t_bmap.c.bug_id.in_(bids)).distinct()
+ rp = dbconn.execute(q)
+ fa = rp.fetchall()
+ if not fa:
+ return list()
+ tracked = set([x[0] for x in fa])
+ bugs = list()
+ for bdata in rdata['bugs']:
+ if bdata['id'] in tracked:
+ bugs.append(bdata)
+
+ return bugs
+
+
+def bz_quicksearch_bugs(query: str) -> Dict:
+ params = {
+ 'include_fields': 'id,summary',
+ 'quicksearch': query,
+ }
+ return bz_rest('bug', params=params)
+
+
+def bz_get_query_bugs(params: Dict, exclude: Set[int]) -> List[int]:
+ if 'include_fields' not in params:
+ params['include_fields'] = 'id,summary'
+ rdata = bz_rest('bug', params=params)
+ bids = list()
+ for bdata in rdata['bugs']:
+ if bdata['id'] in exclude:
+ continue
+ bids.append(bdata['id'])
+ return bids
+
+
+def get_human_size(size: int, decimals: int = 2) -> str:
+ units = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB']
+ while True:
+ unit = units.pop(0)
+ if size < 1024.0 or not len(units):
+ break
+ size /= 1024.0
+
+ return f'{size:.{decimals}f} {unit}'
+
+
+def bz_get_attachment_by_aid(aid: int, include_fields: Optional[str] = None) -> Dict:
+ if include_fields is None:
+ rdata = bz_rest(f'bug/attachment/{aid}')
+ else:
+ params = {
+ 'include_fields': include_fields,
+ }
+ rdata = bz_rest(f'bug/attachment/{aid}', params=params)
+
+ for raid, radata in rdata['attachments'].items():
+ if int(raid) == aid:
+ if 'size' in radata:
+ radata['human_size'] = get_human_size(radata['size'])
+ return radata
+
+ raise LookupError('No matching attachment_id found: %s', aid)
+
+
+def bz_get_all_comments_by_bid(bid: int) -> List[Dict]:
+ bdata = bz_rest(f'bug/{bid}/comment')
+ for rbid, rbdata in bdata['bugs'].items():
+ if int(rbid) != bid:
+ continue
+ return rbdata['comments']
+
+
+def bz_get_comments_for_bid_since(bid: int, since: str) -> List[Dict]:
+ params = {
+ 'new_since': since,
+ 'id': bid,
+ }
+ bdata = bz_rest(f'bug/{bid}/comment', params=params)
+ for rbid, rbdata in bdata['bugs'].items():
+ if int(rbid) != bid:
+ continue
+ return rbdata['comments']
+
+
+def bz_get_newest_comments_for_bid(bid: int, include_private: bool = False) -> List[Dict]:
+ try:
+ when = db_get_notify_last_check(bid)
+ except LookupError:
+ # grab the highest cid we know about
+ engine, dbconn = db_get_sa()
+ md = sa.MetaData()
+ t_bmap = sa.Table('msgid_bug_mapping', md, autoload=True, autoload_with=engine)
+ q = sa.select(sa.func.max(t_bmap.c.comment_id)).where(t_bmap.c.bug_id == bid)
+ rp = dbconn.execute(q)
+ mfa = rp.fetchall()
+ c_max = None
+ if mfa:
+ c_max = mfa[0][0]
+ cdatas = bz_get_all_comments_by_bid(bid)
+ comments = list()
+ for cdata in cdatas:
+ if c_max and cdata['id'] <= c_max:
+ continue
+ if not include_private and cdata['is_private']:
+ continue
+ comments.append(cdata)
+ return comments
+
+ logger.debug('Getting newest comments since %s', when)
+ cdatas = bz_get_comments_for_bid_since(bid, when)
+ comments = list()
+ for cdata in cdatas:
+ if not include_private and cdata['is_private']:
+ continue
+ comments.append(cdata)
+ return comments
+
+
+def msg_get_inre_msgids(msg: email.message.EmailMessage) -> List[str]:
+ pairs = list()
+ if msg.get('In-Reply-To'):
+ pairs += email.utils.getaddresses([str(x) for x in msg.get_all('in-reply-to', [])])
+ if msg.get('References'):
+ pairs += email.utils.getaddresses([str(x) for x in msg.get_all('references', [])])
+
+ msgids = list()
+ for pair in pairs:
+ if pair[1] not in msgids:
+ msgids.append(pair[1])
+
+ return msgids
+
+
+def sort_msgs_by_received(msgs: List[email.message.EmailMessage]) -> List[email.message.EmailMessage]:
+ tosort = list()
+ for msg in msgs:
+ latest_rdt = None
+ for rhdr in msg.get_all('Received'):
+ # The received headers are pretty free-form, but generally will end with ; datetimeinfo
+ chunks = rhdr.rsplit(';', maxsplit=1)
+ if len(chunks) < 2:
+ continue
+ rdate = chunks[1].strip()
+ rdt = email.utils.parsedate_tz(rdate)
+ if rdt is None:
+ continue
+ # use the latest rdt
+ if not latest_rdt or latest_rdt < rdt:
+ latest_rdt = rdt
+ latest_rdt = None
+ if not latest_rdt and msg.get('Date'):
+ # Use the Date header as fallback
+ latest_rdt = email.utils.parsedate_to_datetime(msg.get('Date'))
+ if not latest_rdt:
+ logger.debug('Message without a date: %s!', msg.get('Message-ID'))
+ continue
+ tosort.append((latest_rdt, msg))
+
+ sortedmsgs = list()
+ for rdt, msg in sorted(tosort):
+ sortedmsgs.append(msg)
+
+ return sortedmsgs
+
+
+def msg_get_inre_bid_cid(msg: email.message.EmailMessage) -> Tuple[int, int]:
+ msgids = msg_get_inre_msgids(msg)
+ if not msgids:
+ logger.debug('No references in msg')
+ raise LookupError('No references found')
+
+ res = list()
+ for msgid in msgids:
+ try:
+ res.append(db_get_bid_cid_by_msgid(msgid))
+ except LookupError:
+ # Try the next one until we run out
+ pass
+ if not res:
+ logger.debug('No reference matched database records')
+ raise LookupError('Nothing matching in the db')
+
+ if len(res) > 1:
+ # Use the highest cid, which is going to be the latest matching comment
+ res.sort(key=lambda x: x[1], reverse=True)
+ return res[0]
+
+
+def msg_get_recipients(msg: email.message.EmailMessage) -> Set[str]:
+ pairs = list()
+ if msg.get('To'):
+ pairs += email.utils.getaddresses([str(x) for x in msg.get_all('to', [])])
+ if msg.get('Cc'):
+ pairs += email.utils.getaddresses([str(x) for x in msg.get_all('cc', [])])
+ if msg.get('From'):
+ pairs += email.utils.getaddresses([str(x) for x in msg.get_all('from', [])])
+
+ return set([x[1].lower() for x in pairs])
+
+
+def bz_get_product_component_by_bid(bid: int) -> Tuple[str, str]:
+ bdata = bz_get_bug(bid, resolve_dupes=True)
+ return bdata['product'], bdata['component']
+
+
+def get_product_component_by_recipients(recipients: Set[str]) -> Tuple[str, str]:
+ config = get_config()
+ if not config.get('components'):
+ logger.debug('No components found in config')
+ raise LookupError('No components defined in config')
+ for recipient in recipients:
+ logger.debug('Matching %s', recipient)
+ for bz_product, bz_components in config['components'].items():
+ for bz_component, c_config in bz_components.items():
+ if not c_config.get('recipients'):
+ continue
+ for addr in c_config['recipients']:
+ if fnmatch(recipient, addr):
+ logger.debug('Matched %s with product=%s, component=%s', recipient, bz_product, bz_component)
+ return bz_product, bz_component
+
+ raise LookupError('No matches for any recipients')
+
+
+def get_newbug_payload_by_product_component(product: str, component: str) -> Dict:
+ config = get_config()
+ try:
+ payload = config['components'][product][component]['payload']
+ except KeyError:
+ payload = dict()
+ payload['product'] = product
+ payload['component'] = component
+ return payload
+
+
+def msg_get_author(msg: email.message.EmailMessage) -> Tuple[str, str]:
+ author = ('', 'missing@address.local')
+ fh = msg.get('from')
+ if fh:
+ author = email.utils.getaddresses([fh])[0]
+
+ if not author[0]:
+ return 'Zorro Boogs', author[1]
+
+ return author
+
+
+def msg_parse_for_bug(msg: email.message.EmailMessage) -> Tuple[str, Tuple[str, str], str, str, List[Dict]]:
+ msgid = b4.LoreMessage.get_clean_msgid(msg)
+ mp = msg.get_body(preferencelist=('plain',))
+ bbody = mp.get_payload(decode=True)
+ cs = mp.get_content_charset()
+ if not cs:
+ cs = 'utf-8'
+ cpay = bbody.decode(cs, errors='replace')
+ # Strip signature if we find it
+ chunks = cpay.rsplit('-- ', maxsplit=1)
+ cbody = chunks[0]
+ lsub = b4.LoreSubject(msg.get('Subject', ''))
+ subject = lsub.subject
+ atts = msg_get_valid_attachments(msg)
+ author = msg_get_author(msg)
+
+ return msgid, author, subject, cbody, atts
+
+
+def msg_get_valid_attachments(msg: email.message.EmailMessage) -> List[Dict]:
+ # Get all good attachments
+ config = get_config()
+ atts = list()
+ for part in msg.walk():
+ if part.get_content_disposition() != 'attachment':
+ continue
+ ct = part.get_content_type()
+ mts = config.get('mimetypes')
+ allowed = True
+ if mts and 'deny' in mts:
+ for dmt in mts['deny']:
+ if fnmatch(ct, dmt):
+ logger.debug('Skipping denied mime-type attachement: %s', ct)
+ allowed = False
+ break
+ if allowed:
+ databytes = part.get_payload(decode=True)
+ data = base64.b64encode(databytes).decode()
+ filename = part.get_filename()
+ if not filename:
+ filename = 'unnamed.txt'
+ ct = 'text/plain'
+ summary = filename
+ payload = {
+ 'file_name': filename,
+ 'content_type': ct,
+ 'summary': summary,
+ 'data': data,
+ }
+
+ atts.append(payload)
+
+ return atts
+
+
+def get_recipients_by_product_component(product: str, component: str) -> Set[str]:
+ recip = set()
+ config = get_config()
+ try:
+ recip.update(config['notify']['alwayscc'])
+ logger.debug('added global alwayscc: %s', config['notify']['alwayscc'])
+ except KeyError:
+ pass
+ try:
+ recip.update(config['components'][product][component]['alwayscc'])
+ logger.debug('added %s/%s alwayscc: %s', product, component,
+ config['components'][product][component]['alwayscc'])
+ except KeyError:
+ pass
+ return recip
+
+
+def get_recipients_by_subsystem(subsystem: str) -> Set[str]:
+ global MAINT_CACHE
+ if not len(MAINT_CACHE):
+ config = get_config()
+ ses = get_requests_session()
+ murl = config['bugzilla']['maintainers_url']
+ if murl.startswith('file://'):
+ with open(murl.replace('file://', ''), 'r') as fh:
+ mdata = fh.read()
+ else:
+ res = ses.get(config['bugzilla']['maintainers_url'])
+ res.raise_for_status()
+ mdata = res.text
+ lookfor = ['M:', 'L:']
+ prevline = None
+ cur_sub = None
+ for line in mdata.splitlines():
+ if len(line) < 2 or not len(line[0].strip()):
+ cur_sub = None
+ continue
+ if line[:2] not in lookfor:
+ prevline = line
+ continue
+ if not cur_sub:
+ cur_sub = prevline
+ addr = email.utils.parseaddr(line[2:])
+ if cur_sub:
+ if cur_sub not in MAINT_CACHE:
+ MAINT_CACHE[cur_sub] = set()
+ MAINT_CACHE[cur_sub].add(addr[1])
+
+ try:
+ return MAINT_CACHE[subsystem]
+ except KeyError:
+ return set()
+
+
+def get_bug_recipients(bid: int) -> Set[str]:
+ # Get all db-stored recipients
+ # TODO: implement "onlyto"
+ allrecip = set()
+ try:
+ allrecip.update(db_get_recipients(bid))
+ except LookupError:
+ logger.debug('No in-database recipients for bid=%s', bid)
+ # Now get all bug cc recipients
+ bdata = bz_get_bug(bid, resolve_dupes=True)
+ bugr = set(bdata['cc'])
+ bugr.add(bdata['assigned_to'])
+ bugr.add(bdata['creator'])
+ bugr.update(get_recipients_by_product_component(bdata['product'], bdata['component']))
+ allrecip.update(bugr)
+ config = get_config()
+ subsystem_cf = config['bugzilla'].get('subsystem_cf')
+ if subsystem_cf:
+ subr = get_recipients_by_subsystem(bdata[subsystem_cf])
+ allrecip.update(subr)
+
+ # Remove "neverto" addresses
+ for mask in config['notify'].get('neverto', list()):
+ for addr in set(allrecip):
+ if fnmatch(addr, mask):
+ logger.debug('Removed %s because it matched neverto=%s', addr, mask)
+ allrecip.remove(addr)
+ return allrecip
+
+
+def make_msgid(bid: int, cid: Optional[int]) -> str:
+ config = get_config()
+ bzurl = config['bugzilla']['url']
+ bzloc = urllib.parse.urlparse(bzurl)
+ slug = f'b{bid}'
+ if bid and cid:
+ count = bz_get_count_by_bid_cid(bid, cid)
+ slug += f'c{count}'
+
+ msgid = '<%s-%s-%s@%s>' % (datetime.date.today().strftime('%Y%m%d'), slug, uuid.uuid4().hex[:12], bzloc.netloc)
+ return msgid
+
+
+def notify_bug(bid: int, cid: Optional[int], msg: email.message.EmailMessage, inre_cid: Optional[int] = None,
+ dry_run: bool = False) -> str:
+ bdata = bz_get_bug(bid, resolve_dupes=True)
+ config = get_config()
+ if not msg.get('From'):
+ msg['From'] = config['notify'].get('fromaddr')
+ if not msg.get('To'):
+ recipients = get_bug_recipients(bid)
+ msg['To'] = b4.format_addrs([('', x) for x in recipients])
+ if not msg.get('Message-ID'):
+ msg['Message-ID'] = make_msgid(bid, cid)
+ if not msg.get('In-Reply-To'):
+ inre_msgid = None
+ try:
+ inre_msgid = db_get_msgid_by_bid_cid(bid, inre_cid)
+ except LookupError:
+ logger.debug('Could not find msgid matching bid=%s, cid=%s', bid, inre_cid)
+ # Find anything
+ try:
+ inre_msgid = db_get_msgid_by_bid_cid(bid, None)
+ except LookupError:
+ pass
+ if inre_msgid:
+ msg['In-Reply-To'] = f'<{inre_msgid}>'
+ msg['References'] = f'<{inre_msgid}>'
+ if not msg.get('Subject'):
+ msg['Subject'] = 'Re: %s' % bdata['summary']
+ else:
+ msg['Subject'] = bdata['summary']
+
+ msg['X-Bugzilla-Product'] = bdata['product']
+ msg['X-Bugzilla-Component'] = bdata['component']
+ msg['X-Mailer'] = f'{__APPNAME__} {__VERSION__}'
+ # Should we add other X-B headers?
+ # If we have notify.smtpserver and notify.smtpport defined, use that,
+ # otherwise use b4's get_smtp method
+ smtpserver = config['notify'].get('smtpserver')
+ smtpport = int(config['notify'].get('smtpport', '25'))
+ if smtpserver and smtpport:
+ import smtplib
+ smtp = smtplib.SMTP(smtpserver, smtpport)
+ else:
+ smtp, fromaddr = b4.get_smtp(dryrun=dry_run)
+ b4.send_mail(smtp, [msg], fromaddr=config['notify'].get('fromaddr'), dryrun=dry_run)
+
+ return msg.get('Message-ID')
+
+
+def add_bot_signature(body: str) -> str:
+ config = get_config()
+ sigtpt = Template(config['templates'].get('botsig'))
+ sigvals = {
+ 'myname': config['bugzilla'].get('name'),
+ 'appname': __APPNAME__,
+ 'appver': __VERSION__,
+ }
+ body += sigtpt.safe_substitute(sigvals)
+ return body
+
+
+def db_get_sa() -> Tuple[sa.engine.Engine, sa.engine.Connection]:
+ global SACONN, SAENGINE
+ if SACONN is None:
+ config = get_config()
+ try:
+ dburl = config['db']['dburl']
+ except KeyError:
+ raise LookupError('CRITICAL: main.dburl not set in config file')
+
+ db_pool_recycle = int(config['db'].get('dbpoolrecycle', '300'))
+ SAENGINE = sa.create_engine(dburl, pool_recycle=db_pool_recycle)
+ SACONN = SAENGINE.connect()
+ if SAENGINE.driver == 'pysqlite':
+ md = sa.MetaData()
+ try:
+ t_meta = sa.Table('meta', md, autoload=True, autoload_with=SAENGINE)
+ q = sa.select([t_meta.c.var_value]).where(t_meta.c.var_key == 'schema')
+ rp = SACONN.execute(q)
+ dbver = rp.fetchone()[0]
+ # Future logic to upgrade database here
+ logger.debug('dbver=%s', dbver)
+ except NoSuchTableError:
+ db_init_sa_sqlite_db(SAENGINE, SACONN)
+
+ return SAENGINE, SACONN
+
+
+def db_init_sa_sqlite_db(engine: sa.engine.Engine, dbconn: sa.engine.Connection):
+ logger.info('Setting up SQLite database')
+ md = sa.MetaData()
+ meta = sa.Table('meta', md,
+ sa.Column('var_key', sa.Text()),
+ sa.Column('var_value', sa.Text()),
+ )
+ sa.Index('idx_meta_key_value', meta.c.var_key, meta.c.var_value, unique=True)
+ bmap = sa.Table('msgid_bug_mapping', md,
+ sa.Column('row_id', sa.Integer(), primary_key=True),
+ sa.Column('bug_id', sa.Integer(), nullable=False),
+ sa.Column('comment_id', sa.Integer(), nullable=False),
+ sa.Column('message_id', sa.Text(), nullable=False),
+ )
+ sa.Index('idx_msgid_bugid_commentid', bmap.c.message_id, bmap.c.bug_id, bmap.c.comment_id, unique=True)
+ sa.Index('idx_msgid_commentid', bmap.c.message_id, bmap.c.comment_id, unique=True)
+ sa.Index('idx_msgid_bugid', bmap.c.message_id, bmap.c.bug_id, unique=True)
+
+ recip = sa.Table('recipients', md,
+ sa.Column('row_id', sa.Integer(), primary_key=True),
+ sa.Column('bug_id', sa.Integer(), nullable=False),
+ sa.Column('email', sa.Text(), nullable=False),
+ )
+ sa.Index('idx_bugid_email', recip.c.bug_id, recip.c.email, unique=True)
+ md.create_all(engine)
+ q = sa.insert(meta).values(var_key='schema', var_value=str(__DBSCHEMA__))
+ dbconn.execute(q)
diff --git a/peebz/bz2pi.py b/peebz/bz2pi.py
new file mode 100644
index 0000000..b78a648
--- /dev/null
+++ b/peebz/bz2pi.py
@@ -0,0 +1,148 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 by the Linux Foundation
+
+import argparse
+import peebz
+import datetime
+import re
+import b4
+
+import email.message
+
+from fnmatch import fnmatch
+
+logger = peebz.logger
+
+
+def process_new_comments(bid: int, dry_run: bool = False):
+ config = peebz.get_config()
+ cdatas = peebz.bz_get_newest_comments_for_bid(bid)
+ for cdata in cdatas:
+ # Check if we've already notified about this bug
+ cid = cdata['id']
+ try:
+ peebz.db_get_msgid_by_bid_cid(bid, cid)
+ logger.debug('Skipping, msgid match for bid=%s, cid=%s', bid, cid)
+ continue
+ except LookupError:
+ pass
+ # Check if the creator is in never_if_creator
+ skip = False
+ for mask in config['notify'].get('never_if_creator', list()):
+ if fnmatch(cdata['creator'], mask):
+ logger.debug('Skipping cid=%s because it matched never_if_creator=%s', cid, mask)
+ skip = True
+ break
+ if skip:
+ continue
+ # Check if text is in never_if_text_matches
+ for mask in config['notify'].get('never_if_text_matches', list()):
+ if fnmatch(cdata['text'], mask):
+ logger.debug('Skipping cid=%s because it matched never_if_text_matches=%s', cid, mask)
+ skip = True
+ break
+ if skip:
+ continue
+ clines = cdata['text'].strip().splitlines()
+ inre_cid = None
+ bodyvals = {
+ 'bzname': config['bugzilla'].get('name'),
+ 'bug_url': config['bugzilla'].get('bugmask', '').format(bug_id=bid),
+ 'comment_url': config['bugzilla'].get('bugmask', '').format(bug_id=bid) + f"#c{cdata['count']}",
+ 'comment_author': cdata['creator'],
+ }
+ if cdata['attachment_id']:
+ logger.info('Processing new attachment for bug_id=%s, comment_id=%s', bid, cid)
+ adata = peebz.bz_get_attachment_by_aid(
+ cdata['attachment_id'],
+ include_fields='file_name,size,content_type,summary,is_patch,is_private',
+ )
+ if adata['is_private']:
+ logger.debug('Skipping attachment marked private')
+ continue
+ bodytpt = peebz.get_template_by_bid('new_attachment_notify', bid)
+ bodyvals.update(adata)
+ bodyvals['attachment_url'] = config['bugzilla'].get('attachmask', '').format(
+ attachment_id=cdata['attachment_id'])
+ else:
+ logger.info('Processing new comment for bug_id=%s, comment_id=%s', bid, cid)
+ fline = clines[0]
+ matches = re.search(r'\(In reply to.*from comment #(\d+)', fline, flags=re.I)
+ if matches:
+ inre_count = int(matches.groups()[0])
+ try:
+ inre_cid = peebz.bz_get_cid_by_bid_count(bid, inre_count)
+ except LookupError:
+ pass
+ bodyvals['comment_text'] = '\n'.join(clines)
+ bodytpt = peebz.get_template_by_bid('new_comment_notify', bid)
+
+ msg = email.message.EmailMessage()
+ msg['Reply-To'] = b4.format_addrs([('', cdata['creator'])])
+ body = bodytpt.safe_substitute(bodyvals)
+ body = peebz.add_bot_signature(body)
+ msg.set_payload(body, charset='utf-8')
+ msgid = peebz.notify_bug(bid, cid, msg, inre_cid=inre_cid, dry_run=dry_run)
+ if msgid and not dry_run:
+ peebz.db_store_msgid_bid_cid(msgid, bid, cid)
+ peebz.db_store_recipients(bid, {cdata['creator']})
+ # TODO: This assumes that comments are always in incremental order
+ lastcheck = cdata['creation_time'].replace('T', ' ').rstrip('Z')
+ peebz.db_store_notify_last_check(bid, lastcheck)
+
+
+def main(cmdargs: argparse.Namespace) -> None:
+ now = datetime.datetime.utcnow()
+ lastrun = now.strftime('%Y-%m-%d %H:%M:%S')
+ try:
+ # Get all new bugs that changed since last run
+ since = peebz.db_get_meta_value('notify_last_run')
+ except LookupError:
+ logger.debug('Got a LookupError, getting everything for the past hour')
+ # Assume it's the first run and get changes for the past hour
+ hourago = datetime.datetime.utcnow() - datetime.timedelta(hours=1)
+ since = hourago.strftime('%Y-%m-%d %H:%M:%S')
+
+ # first, process all changed bugs that we're tracking
+ logger.info('Getting a list of changed bugs since %s', since)
+ buglist = peebz.bz_get_changed_bugs(since)
+ seen = set()
+ if buglist:
+ for bdata in buglist:
+ logger.debug('Looking at %s: %s', bdata['id'], bdata['summary'])
+ bid = bdata['id']
+ seen.add(bid)
+ process_new_comments(bid, dry_run=cmdargs.dry_run)
+ else:
+ logger.info('No changes to any tracked bugs')
+
+ # Now go by product/component and handle new bug queries if defined
+ config = peebz.get_config()
+ for bz_product, bz_components in config['components'].items():
+ for bz_component in bz_components.keys():
+ cconf = peebz.get_component_config(bz_product, bz_component)
+ qs = cconf.get('bz_new_bugs_quicksearch')
+ if not qs:
+ logger.debug('No quicksearch defined for %s/%s', bz_product, bz_component)
+ continue
+ logger.info('Querying matching quicksearch results since %s for %s/%s, qs=%s', since, bz_product,
+ bz_component, qs)
+ params = {
+ 'chfieldfrom': since,
+ 'product': bz_product,
+ 'component': bz_component,
+ 'quicksearch': qs,
+ }
+ buglist = peebz.bz_get_query_bugs(params, exclude=seen)
+ if buglist:
+ logger.info('Processing %s matching quicksearch bugs', len(buglist))
+ for bid in buglist:
+ seen.add(bid)
+ process_new_comments(bid, dry_run=cmdargs.dry_run)
+ else:
+ logger.info('No changed bugs matching these parameters.')
+
+ if not cmdargs.dry_run:
+ peebz.db_store_meta_value(key='notify_last_run', value=lastrun)
diff --git a/peebz/command.py b/peebz/command.py
new file mode 100644
index 0000000..30046bd
--- /dev/null
+++ b/peebz/command.py
@@ -0,0 +1,157 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2020 by the Linux Foundation
+#
+
+import sys
+import argparse
+import b4
+import logging
+import logging.handlers
+import peebz
+
+logger = peebz.logger
+
+
+def cmd_parse(cmdargs):
+ import peebz.parse
+ peebz.parse.main(cmdargs)
+
+
+def cmd_bz2pi(cmdargs):
+ import peebz.bz2pi
+ peebz.bz2pi.main(cmdargs)
+
+
+def cmd_pi2bz(cmdargs):
+ import peebz.pi2bz
+ peebz.pi2bz.main(cmdargs)
+
+
+def cmd_bzdump(cmdargs):
+ import json
+ from pygments import highlight, lexers, formatters
+ if cmdargs.bug_id:
+ rdata = peebz.bz_get_bug(cmdargs.bug_id, resolve_dupes=cmdargs.resolve_dupes)
+ elif cmdargs.username:
+ rdata = peebz.bz_get_user(cmdargs.username)
+ elif cmdargs.quicksearch:
+ rdata = peebz.bz_quicksearch_bugs(query=cmdargs.quicksearch)
+ else:
+ sys.exit(1)
+
+ jdata = json.dumps(rdata, sort_keys=True, indent=4)
+ colorized = highlight(jdata, lexers.JsonLexer(), formatters.TerminalFormatter())
+ print(colorized)
+
+
+def setup_parser() -> argparse.ArgumentParser:
+ # noinspection PyTypeChecker
+ parser = argparse.ArgumentParser(
+ prog='peebz',
+ description='A tool to bridge public-inbox collections with bugzilla',
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+ )
+ parser.add_argument('--version', action='version', version=peebz.__VERSION__)
+ parser.add_argument('-d', '--debug', action='store_true', default=False,
+ help='Add more debugging info to the output')
+ parser.add_argument('-q', '--quiet', action='store_true', default=False,
+ help='Output critical information only')
+ parser.add_argument('-c', '--config', required=True,
+ help='config.toml to use')
+ parser.add_argument('--dry-run', action='store_true', default=False,
+ help='Dry run, do not make any changes')
+
+ subparsers = parser.add_subparsers(help='sub-command help', dest='subcmd')
+ # parse : process RFC2822 messages passed on stdin (testing mode)
+ sp_parse = subparsers.add_parser('parse', help='Parse messages passed via stdin')
+ sp_parse.add_argument('--product', help='Force this product instead of guessing by recipient')
+ sp_parse.add_argument('--component', help='Force this component instead of guessing by recipient')
+ sp_parse.set_defaults(func=cmd_parse)
+
+ # pi2bz : query public-inbox to find the latest treads that interest us
+ sp_pi2bz = subparsers.add_parser('pi2bz', help='Query public-inbox sources for updates')
+ sp_pi2bz.add_argument('--product', help='Only run queries for this product')
+ sp_pi2bz.add_argument('--component', help='Only run queries for this component')
+ sp_pi2bz.set_defaults(func=cmd_pi2bz)
+
+ # bz2pi: query bugzilla and sends out any mail updates
+ sp_bz2pi = subparsers.add_parser('bz2pi', help='Send emails about bugzilla-originated changes')
+ sp_bz2pi.set_defaults(func=cmd_bz2pi)
+
+ # show : command to show REST raw REST output
+ sp_bzdump = subparsers.add_parser('bzdump', help='Show colorized raw REST output from bugzilla API')
+ sp_bzdump.add_argument('-b', '--bug-id', type=int, help='Bug to show')
+ sp_bzdump.add_argument('-c', '--comment-id', help='Comment to show')
+ sp_bzdump.add_argument('-a', '--attachment-id', help='Attachment to show')
+ sp_bzdump.add_argument('-u', '--username', help='User to show')
+ sp_bzdump.add_argument('-q', '--quicksearch', help='Quicksearch query to run')
+
+ sp_bzdump.add_argument('--resolve-dupes', action='store_true', help='Resolve dupes')
+ sp_bzdump.set_defaults(func=cmd_bzdump)
+
+ return parser
+
+
+def cmd():
+ parser = setup_parser()
+ cmdargs = parser.parse_args()
+ logger.setLevel(logging.DEBUG)
+
+ ch = logging.StreamHandler()
+ formatter = logging.Formatter('%(message)s')
+ ch.setFormatter(formatter)
+
+ if cmdargs.quiet:
+ ch.setLevel(logging.CRITICAL)
+ elif cmdargs.debug:
+ ch.setLevel(logging.DEBUG)
+ else:
+ ch.setLevel(logging.INFO)
+
+ logger.addHandler(ch)
+
+ if 'func' not in cmdargs:
+ parser.print_help()
+ sys.exit(1)
+
+ with open(cmdargs.config, 'rb') as fh:
+ try:
+ import tomllib # noqa
+ peebz.CONFIG = tomllib.load(fh)
+ except ModuleNotFoundError:
+ import tomli # noqa
+ peebz.CONFIG = tomli.load(fh)
+
+ try:
+ logfile = peebz.CONFIG['logging']['logfile']
+ flh = logging.handlers.WatchedFileHandler(logfile)
+ fmt = '[%(process)d] %(asctime)s - ' + cmdargs.subcmd + ': %(message)s'
+ flh.setFormatter(logging.Formatter(fmt))
+ loglevel = peebz.CONFIG['logging'].get('loglevel', 'info')
+ if loglevel == 'debug':
+ flh.setLevel(logging.DEBUG)
+ else:
+ flh.setLevel(logging.INFO)
+ logger.addHandler(flh)
+ except KeyError:
+ # No file logging for you
+ pass
+
+ cmdargs.func(cmdargs)
+
+
+if __name__ == '__main__':
+ import os
+ # noinspection PyBroadException
+ try:
+ if peebz.__VERSION__.endswith('-dev'):
+ base = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
+ dotgit = os.path.join(base, '.git')
+ lines = b4.git_get_command_lines(dotgit, ['rev-parse', '--short', 'HEAD'])
+ if lines:
+ peebz.__VERSION__ = '%s-%.5s' % (peebz.__VERSION__, lines[0].strip())
+ except Exception as ex:
+ pass
+ cmd()
diff --git a/peebz/parse.py b/peebz/parse.py
new file mode 100644
index 0000000..07de9e5
--- /dev/null
+++ b/peebz/parse.py
@@ -0,0 +1,198 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 by the Linux Foundation
+
+import sys
+import argparse
+import peebz
+import b4
+import re
+
+from typing import Tuple, Dict
+
+import email.message
+import email.utils
+
+logger = peebz.logger
+
+
+def new_bug_notification(bid: int, inre_cid: int, dry_run: bool = False):
+ msg = email.message.EmailMessage()
+ config = peebz.get_config()
+ bodyvals = {
+ 'bzname': config['bugzilla'].get('name'),
+ 'bug_url': config['bugzilla'].get('bugmask', '').format(bug_id=bid),
+ }
+ bodytpt = peebz.get_template_by_bid('new_bug_notify', bid)
+ body = bodytpt.safe_substitute(bodyvals)
+ sigtpt = peebz.get_template_by_bid('botsig', bid)
+ sigvals = {
+ 'myname': config['bugzilla'].get('name'),
+ 'appname': peebz.__APPNAME__,
+ 'appver': peebz.__VERSION__,
+ }
+ body += sigtpt.safe_substitute(sigvals)
+ msg.set_payload(body, charset='utf-8')
+ peebz.notify_bug(bid, None, msg, inre_cid=inre_cid, dry_run=dry_run)
+
+
+def make_bug_desc_from_body(product: str, component: str, body: str, vals: Dict) -> str:
+ if 'comment_count' in vals and vals['comment_count']:
+ tpt_intro = peebz.get_template_by_product_component('parse_bug_intro_with_count', product, component)
+ else:
+ tpt_intro = peebz.get_template_by_product_component('parse_bug_intro', product, component)
+ tpt_outro = peebz.get_template_by_product_component('parse_bug_outro', product, component)
+ desc = ''
+ intro = tpt_intro.safe_substitute(vals)
+ if intro:
+ desc = intro + '\n\n'
+ desc += body.strip()
+ outro = tpt_outro.safe_substitute(vals)
+ if outro:
+ desc += '\n\n' + outro
+ desc += '\n'
+
+ return desc
+
+
+def new_bug_from_msg(msg: email.message.EmailMessage, product: str, component: str,
+ dry_run: bool = False) -> Tuple[int, int]:
+ msgid, author, subject, body, atts = peebz.msg_parse_for_bug(msg)
+ payload = peebz.get_newbug_payload_by_product_component(product, component)
+ summary = re.sub(r'^\s*(Re|Fwd):\s*', '', subject)
+ vals = {
+ 'author': b4.format_addrs([author]),
+ 'msgid_link': peebz.get_msgid_link(msgid),
+ }
+ desc = make_bug_desc_from_body(product, component, body, vals)
+
+ payload['summary'] = summary
+ payload['description'] = desc
+ if not dry_run:
+ bid, cid = peebz.bz_add_new_bug(payload)
+ logger.debug('new bug bid=%s, cid=%s', bid, cid)
+ recipients = peebz.msg_get_recipients(msg)
+ peebz.db_store_recipients(bid, recipients)
+ if atts:
+ peebz.bz_add_atts_to_bug(bid, atts)
+ else:
+ logger.info('--- DRY RUN ---')
+ logger.info('Would have created a new bug in %s/%s:', product, component)
+ logger.info('Summary: %s', payload['summary'])
+ logger.info('Description:')
+ logger.info(payload['description'])
+ bid = cid = None
+
+ return bid, cid
+
+
+def new_comment_from_msg(bid: int, cid: int, msg: email.message.EmailMessage, dry_run: bool = False) -> int:
+ msgid, author, subject, body, atts = peebz.msg_parse_for_bug(msg)
+ vals = {
+ 'author': b4.format_addrs([author]),
+ 'msgid_link': peebz.get_msgid_link(msgid),
+ }
+ if cid:
+ try:
+ vals['comment_count'] = peebz.bz_get_count_by_bid_cid(bid, cid)
+ except LookupError:
+ pass
+ product, component = peebz.bz_get_product_component_by_bid(bid)
+
+ desc = make_bug_desc_from_body(product, component, body, vals)
+
+ if not dry_run:
+ cid = peebz.bz_add_new_comment(bid, desc)
+ recipients = peebz.msg_get_recipients(msg)
+ peebz.db_store_recipients(bid, recipients)
+ if atts:
+ peebz.bz_add_atts_to_bug(bid, atts)
+ else:
+ logger.info('--- DRY RUN ---')
+ logger.info('Would have added this comment to %s', bid)
+ logger.info(desc)
+ cid = None
+ return cid
+
+
+def process_rfc2822(msg: email.message.EmailMessage, product: str, component: str,
+ dry_run: bool = False) -> None:
+ # Ignore any messages that have an X-Bugzilla-Product header,
+ # so we don't get into any loops
+ if msg.get('x-bugzilla-product'):
+ logger.debug('Skipping bugzilla-originating message')
+ return
+
+ cconf = peebz.get_component_config(product, component)
+ # Get the message-id
+ msgid = b4.LoreMessage.get_clean_msgid(msg)
+ try:
+ # If we have this exact msgid, then it's a dupe
+ bid, cid = peebz.db_get_bid_cid_by_msgid(msgid)
+ logger.info('Already recorded as bid=%s, cid=%s', bid, cid)
+ return
+ except LookupError:
+ pass
+
+ # Walk through references and in-reply-to and see if we know any of them
+ bid = cid = None
+ try:
+ bid, cid = peebz.msg_get_inre_bid_cid(msg)
+ except LookupError:
+ pass
+
+ if bid:
+ bdata = peebz.bz_get_bug(bid)
+ if not bdata['is_open']:
+ logger.info('Bug %s is closed, not adding comments', bid)
+ sys.exit(0)
+
+ cid = new_comment_from_msg(bid, cid, msg, dry_run=dry_run)
+ if not dry_run:
+ peebz.db_store_msgid_bid_cid(msgid, bid, cid)
+ else:
+ bid, cid = new_bug_from_msg(msg, product, component, dry_run=dry_run)
+ if not dry_run:
+ peebz.db_store_msgid_bid_cid(msgid, bid, cid)
+ if cconf.get('new_bug_send_notification'):
+ new_bug_notification(bid, cid, dry_run=dry_run)
+
+ # Do we have any assign triggers?
+ assign_re = cconf.get('pi_assign_regex')
+ if assign_re:
+ matches = re.search(assign_re, msg.as_string(), flags=re.I | re.M)
+ if matches:
+ author = peebz.msg_get_author(msg)
+ fromaddr = author[1]
+ if peebz.bz_check_user_allowed(fromaddr, product, component):
+ assign_to = matches.groups()[0]
+ if assign_to == 'me':
+ logger.debug('me=%s', fromaddr)
+ assign_to = fromaddr
+ # Does this user exist?
+ try:
+ peebz.bz_get_user(assign_to)
+ if not dry_run:
+ peebz.bz_assign_bug(bid, assign_to)
+ else:
+ logger.debug('---DRY RUN---')
+ logger.debug('Would have assigned bid=%s to %s', bid, assign_to)
+
+ except LookupError:
+ logger.info('Unable to assign %s to %s: no such user', bid, assign_to)
+
+
+def main(cmdargs: argparse.Namespace) -> None:
+ msg = peebz.get_msg_from_stdin()
+ product = cmdargs.product
+ component = cmdargs.component
+ if not (product and component):
+ recipients = peebz.msg_get_recipients(msg)
+ try:
+ product, component = peebz.get_product_component_by_recipients(recipients)
+ except LookupError as ex:
+ # TODO: fail properly here
+ logger.info(str(ex))
+ sys.exit(1)
+ process_rfc2822(msg, product, component, dry_run=cmdargs.dry_run)
diff --git a/peebz/pi2bz.py b/peebz/pi2bz.py
new file mode 100644
index 0000000..36f07d3
--- /dev/null
+++ b/peebz/pi2bz.py
@@ -0,0 +1,189 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2023 by the Linux Foundation
+
+import argparse
+import peebz
+import peebz.parse
+import b4
+import urllib.parse
+import email.message
+import gzip
+import datetime
+
+from typing import List, Set
+
+logger = peebz.logger
+b4.logger = logger
+# force b4 to use EmailMessage factory
+b4.emlpolicy = peebz.emlpolicy
+
+
+def get_query_results(query_url: str) -> List:
+ loc = urllib.parse.urlparse(query_url)
+ logger.debug('query=%s', query_url)
+ logger.debug('grabbing search results from %s', loc.netloc)
+ session = peebz.get_requests_session()
+ # For the query to retrieve a mbox file, we need to send a POST request
+ resp = session.post(query_url, data='')
+ if resp.status_code == 404:
+ raise LookupError('Nothing matching query=%s', query_url)
+ if resp.status_code != 200:
+ raise LookupError('Server returned an error for %s: %s' % (query_url, resp.status_code))
+ t_mbox = gzip.decompress(resp.content)
+ resp.close()
+ if not len(t_mbox):
+ raise LookupError('Nothing matching query=%s', query_url)
+ return b4.split_and_dedupe_pi_results(t_mbox)
+
+
+def get_sorted_thread(url: str, msgid: str) -> List[email.message.EmailMessage]:
+ loc = urllib.parse.urlparse(url)
+ mbox_url = url.rstrip('/') + '/' + urllib.parse.quote_plus(msgid) + '/t.mbox.gz'
+ logger.debug('mbox_url=%s', mbox_url)
+ logger.debug('grabbing thread from %s', loc.netloc)
+ session = peebz.get_requests_session()
+ resp = session.get(mbox_url)
+ if resp.status_code == 404:
+ raise LookupError('Nothing matching mbox_url=%s', mbox_url)
+ if resp.status_code != 200:
+ raise LookupError('Server returned an error for %s: %s' % (mbox_url, resp.status_code))
+ t_mbox = gzip.decompress(resp.content)
+ resp.close()
+
+ deduped = b4.split_and_dedupe_pi_results(t_mbox)
+ if not deduped:
+ raise LookupError('No messages matching mbox_url=%s' % mbox_url)
+ strict = b4.get_strict_thread(deduped, msgid)
+ return peebz.sort_msgs_by_received(strict)
+
+
+def get_new_msgs(msgs: List[email.message.EmailMessage]) -> List[email.message.EmailMessage]:
+ new_msgs = list()
+ for msg in msgs:
+ msgid = b4.LoreMessage.get_clean_msgid(msg)
+ try:
+ peebz.db_get_bid_cid_by_msgid(msgid)
+ continue
+ except LookupError:
+ new_msgs.append(msg)
+
+ return new_msgs
+
+
+def get_tracked_bug_msgids(product: str, component: str) -> Set[str]:
+ cconf = peebz.get_component_config(product, component)
+ params = {
+ 'include_fields': 'id',
+ 'product': product,
+ 'component': component,
+ 'quicksearch': 'OPEN',
+ 'chfieldfrom': '90d',
+ }
+ params.update(cconf.get('bz_query_params', dict()))
+ rdata = peebz.bz_rest('bug', params=params)
+ msgids = set()
+ for bdata in rdata.get('bugs', list()):
+ bid = bdata['id']
+ try:
+ msgid = peebz.db_get_msgid_by_bid_cid(bid, None)
+ logger.debug('bid=%s is tracked as msgid=%s', bid, msgid)
+ msgids.add(msgid)
+ except LookupError:
+ logger.debug('Not tracking bid=%s', bid)
+
+ return msgids
+
+
+def update_component(product: str, component: str, dry_run: bool = False):
+ logger.info('Running pi2bz for %s/%s, dry_run=%s', product, component, dry_run)
+ cconf = peebz.get_component_config(product, component)
+ tracked = get_tracked_bug_msgids(product, component)
+ url = cconf.get('pi_url').rstrip('/')
+ now = datetime.datetime.utcnow()
+
+ seen_msgids = set()
+ updates = list()
+ if len(tracked):
+ logger.info('Checking for updates in %s tracked threads', len(tracked))
+ for msgid in tracked:
+ try:
+ tmsgs = get_sorted_thread(url, msgid)
+ except LookupError:
+ logger.debug('No results returned for msgid=%s', msgid)
+ continue
+
+ for tmsg in tmsgs:
+ tmsgid = b4.LoreMessage.get_clean_msgid(tmsg)
+ if tmsgid in seen_msgids:
+ logger.debug('Already seen %s', tmsgid)
+ continue
+ seen_msgids.add(tmsgid)
+ try:
+ peebz.db_get_bid_cid_by_msgid(tmsgid)
+ logger.debug('%s has already been processed', tmsgid)
+ continue
+ except LookupError:
+ logger.debug('New message in tracked thread: %s', tmsgid)
+ updates.append(tmsg)
+
+ # Now grab the latest query matches
+ query = cconf.get('pi_query')
+ if query:
+ logger.info('Running query for %s/%s', product, component)
+ try:
+ last_check = peebz.db_get_query_last_check(product, component)
+ query += f' AND dt:{last_check}..'
+ except LookupError:
+ pass
+ qquery = urllib.parse.quote_plus(query)
+ query_url = url.rstrip('/') + f'/?x=m&q={qquery}'
+ lastdt = now.strftime('%Y%m%d%H%M%S')
+ try:
+ msgs = get_query_results(query_url)
+ for msg in msgs:
+ msgid = b4.LoreMessage.get_clean_msgid(msg)
+ if msgid in seen_msgids:
+ logger.debug('Already seen %s', msgid)
+ continue
+
+ # New thing to track!
+ seen_msgids.add(msgid)
+ author = peebz.msg_get_author(msg)
+ fromaddr = author[1]
+ if not peebz.bz_check_user_allowed(fromaddr, product, component):
+ logger.debug('skipping msg %s', msg.get('Subject'))
+ continue
+ # Retrieve and queue up the entire thread
+ try:
+ tmsgs = get_sorted_thread(url, msgid)
+ except LookupError:
+ logger.debug('No results returned for msgid=%s', msgid)
+ continue
+ for tmsg in tmsgs:
+ tmsgid = b4.LoreMessage.get_clean_msgid(tmsg)
+ seen_msgids.add(tmsgid)
+ updates.append(tmsg)
+
+ except LookupError:
+ logger.info('No new results for product=%s, component=%s', product, component)
+
+ if not dry_run:
+ peebz.db_store_query_last_check(product, component, lastdt)
+
+ if not updates:
+ logger.info('No new messages to add to bugzilla for %s/%s', product, component)
+ return
+
+ for msg in updates:
+ logger.debug('Recording %s', msg.get('Subject'))
+ peebz.parse.process_rfc2822(msg, product, component, dry_run=dry_run)
+
+
+def main(cmdargs: argparse.Namespace):
+ config = peebz.get_config()
+ # Iterate all components
+ for bz_product, bz_components in config['components'].items():
+ for bz_component in bz_components.keys():
+ update_component(bz_product, bz_component, dry_run=cmdargs.dry_run)
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..cab6f85
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,5 @@
+b4>=0.12.2,<1.0
+Pygments>=2.14.0,<3.0
+requests>=2.28
+sqlalchemy>=1.4,<2.0
+tomli>=2.0; python_version < '3.11'