#!/usr/bin/env python # Provided an index URL and a few project hints page this # will spit out a shiny HTML 5 W3C compliant releases page. # Copyright (C) 2012-2014 Luis R. Rodriguez # # This program is free software: you can redistribute it and/or modify # it under the terms of the GNU Affero General Public License as # published by the Free Software Foundation, either version 3 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Affero General Public License for more details. # # You should have received a copy of the GNU Affero General Public License # along with this program. If not, see . from HTMLParser import HTMLParser import urllib import urllib2 import ConfigParser import re import sys import os import getopt from operator import itemgetter from datetime import date debug = 0 def rel_html_license_verbose(): print '-----------------------------------------------------------------------' print 'Copyright (C) 2012-2014 Luis R. Rodriguez ' print '' print 'This program is free software: you can redistribute it and/or modify' print 'it under the terms of the GNU Affero General Public License as' print 'published by the Free Software Foundation, either version 3 of the' print 'License, or (at your option) any later version.' print '' print 'This program is distributed in the hope that it will be useful,' print 'but WITHOUT ANY WARRANTY; without even the implied warranty of' print 'MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the' print 'GNU Affero General Public License for more details.' print '' print 'You should have received a copy of the GNU Affero General Public License' print 'along with this program. If not, see .' print '-----------------------------------------------------------------------' def rel_html_license(): return "AGPL" def rel_html_href(): return 'rel-html' def __compute_rel_weight(rel_specs): weight = 0 extra = 0 sublevel = 0 relmod = 0 relmod_type = 0 if (debug): sys.stdout.write("VERSION = %s\n" % rel_specs['VERSION']) sys.stdout.write("PATCHLEVEL = %s\n" % rel_specs['PATCHLEVEL']) sys.stdout.write("SUBLEVEL = %s\n" % rel_specs['SUBLEVEL']) sys.stdout.write("EXTRAVERSION = %s\n" % rel_specs['EXTRAVERSION']) sys.stdout.write("RELMOD_UPDATE = %s\n" % rel_specs['RELMOD_UPDATE']) sys.stdout.write("RELMOD_TYPE = %s\n" % rel_specs['RELMOD_TYPE']) if (rel_specs['EXTRAVERSION'] != ''): if ("." in rel_specs['EXTRAVERSION'] or "rc" in rel_specs['EXTRAVERSION']): rc = rel_specs['EXTRAVERSION'].lstrip("-rc") if (rc == ""): rc = 0 else: rc = int(rc) - 20 extra = int(rc) else: extra = int(rel_specs['EXTRAVERSION']) + 10 if (rel_specs['SUBLEVEL'] != ''): sublevel = int(rel_specs['SUBLEVEL'].lstrip(".")) * 20 else: sublevel = 5 if (rel_specs['RELMOD_UPDATE'] != ''): mod = rel_specs['RELMOD_UPDATE'] if (mod == ""): mod = 0 else: mod = int(mod) relmod = int(mod) if (rel_specs['RELMOD_TYPE'] != ''): rtype = rel_specs['RELMOD_TYPE'] if ("c" in rtype): relmod_type = relmod_type + 6 if ("u" in rtype): relmod_type = relmod_type + 7 if ("p" in rtype): relmod_type = relmod_type + 8 if ("n" in rtype): relmod_type = relmod_type + 9 if ("s" in rtype): relmod_type = relmod_type + 10 weight = (int(rel_specs['VERSION']) << 32) + \ (int(rel_specs['PATCHLEVEL']) << 16) + \ (sublevel << 8 ) + \ (extra * 60) + (relmod * 2) + relmod_type return weight def get_rel_spec(rel): if ("rc" in rel): m = re.match(r"v*(?P\d+)\.+" "(?P\d+)[.]*" "(?P\d*)" "(?P[-rc]+\w*)\-*" "(?P\d*)[-]*" "(?P[usnpc]*)", rel) else: m = re.match(r"v*(?P\d+)\.+" "(?P\d+)[.]*" "(?P\d*)[.]*" "(?P\w*)\-*" "(?P\d*)[-]*" "(?P[usnpc]*)", rel) if (not m): return m rel_specs = m.groupdict() return rel_specs def compute_rel_weight(rel): rel_specs = get_rel_spec(rel) if (not rel_specs): return 0 return __compute_rel_weight(rel_specs) def __compute_rel_weight_next(rel_specs): weight = 0 date = 0 relmod_update = 0 relmod_type = 0 if (debug): sys.stdout.write("DATE_VERSION = %s\n" % rel_specs['DATE_VERSION']) sys.stdout.write("RELMOD_UPDATE = %s\n" % rel_specs['RELMOD_UPDATE']) sys.stdout.write("RELMOD_TYPE = %s\n" % rel_specs['RELMOD_TYPE']) if (rel_specs['DATE_VERSION'] != ''): date = rel_specs['DATE_VERSION'].lstrip("rc") date = int(date.translate(None, "-")) if (rel_specs['RELMOD_UPDATE'] != ''): mod = rel_specs['RELMOD_UPDATE'] if (mod == ""): mod = 0 else: mod = int(mod) relmod = int(mod) if (rel_specs['RELMOD_TYPE'] != ''): rtype = rel_specs['RELMOD_TYPE'] if ("c" in rtype): relmod_type = relmod_type + 6 if ("u" in rtype): relmod_type = relmod_type + 7 if ("p" in rtype): relmod_type = relmod_type + 8 if ("n" in rtype): relmod_type = relmod_type + 9 if ("s" in rtype): relmod_type = relmod_type + 10 weight = (999 << 32) + \ (int(date) << 16) + \ (int(relmod_update) << 8 ) + \ (int(relmod_type)) return weight def compute_rel_weight_next(rel): if (len(rel) < 4): return 0 if (rel[4] == "-"): m = re.match(r"v*(?P\w+-*\w*-*\w*)[-]*" "(?P\d*)[-]*" "(?P[usnpc]*)", rel) else: m = re.match(r"v*(?P\w+)[-]*" "(?P\d*)[-]*" "(?P[usnpc]*)", rel) if (not m): return 0 rel_specs = m.groupdict() return __compute_rel_weight_next(rel_specs) def sort_rels_weight(col): return sorted(col, key=lambda k: k['weight'], reverse=True) class index_tarball_hunter(HTMLParser): "Goes through an index page with releases and adds tarball targets to the index parser" def parse(self, html): "Parse the given string 's'." self.feed(html) self.close() def handle_decl(self, decl): pass def tarball_add_stable(self, t_new): s_new = t_new.get('specifics') for t_old in self.tarballs: s_old = t_old.get('specifics') idx = self.tarballs.index(t_old) if (s_old['VERSION'] != s_new['VERSION']): break if (s_old['PATCHLEVEL'] != s_new['PATCHLEVEL']): break if (s_old['EXTRAVERSION'] != '' and s_new['EXTRAVERSION'] != ''): if (s_new['EXTRAVERSION'] > s_old['EXTRAVERSION']): self.tarballs.remove(t_old) self.tarballs.insert(idx, t_new) return if (s_new['EXTRAVERSION'] < s_old['EXTRAVERSION']): return if (s_old['RELMOD_UPDATE'] != '' and s_new['RELMOD_UPDATE'] != ''): if (s_old['RELMOD_UPDATE'] == s_new['RELMOD_UPDATE']): if (s_new['RELMOD_TYPE'] == ''): self.tarballs.insert(idx - 1, t_new) return self.tarballs.append(t_new) return if (s_new['RELMOD_UPDATE'] > s_old['RELMOD_UPDATE']): self.tarballs.remove(t_old) self.tarballs.insert(idx, t_new) return else: return self.tarballs.append(t_new) def tarball_add_next(self, t_new): index_parser = self.index_parser s_new = t_new.get('specifics') for t_old in self.tarballs: s_old = t_old.get('specifics') idx = self.tarballs.index(t_old) for next_date in index_parser.next_rel_dates: if (next_date in t_old.get('rel')): self.tarballs.insert(idx - 1, t_new) return self.tarballs.append(t_new) def is_rel_eol(self, rel_specs): index_parser = self.index_parser for eol in index_parser.eol: m = re.match(r"v*(?P\d+)\.+" "(?P\d+)[.]*" "(?P\w*)[.-]*" "(?P\w*)", eol) if (not m): continue eol_specs = m.groupdict() if (eol_specs['VERSION'] == rel_specs['VERSION'] and eol_specs['PATCHLEVEL'] == rel_specs['PATCHLEVEL']): return True return False def get_rel_match(self, value): index_parser = self.index_parser rel_match = dict(m=None, rel_name="") for rel_name in index_parser.rel_names: m = re.match(r'' + rel_name + '-+' "v*(?P\d+)\.+" "(?P\d+)\.*" "(?P\w*)[.-]*" "(?P\w*)[-]*" "(?P\d*)[-]*" "(?P[usnpc]*)", value) if (m): rel_match['m'] = m rel_match['rel_name'] = rel_name return rel_match return rel_match def get_rel_match_next(self, value): index_parser = self.index_parser rel_match = dict(m=None, rel_name="") for rel_name in index_parser.rel_names: for next_date in index_parser.next_rel_dates: m = re.match(r'' + rel_name + '+' + '\-(?P' + next_date + '+)' + '\-*(?P\d*)' + '\-*(?P\w*)', value) if (m): rel_match['m'] = m rel_match['rel_name'] = rel_name return rel_match return rel_match def get_rel_name(self, value): for rel in self.releases: if (rel in value): return rel return "" def update_latest_tarball_stable(self, value): index_parser = self.index_parser release = self.get_rel_name(value) if (not release): return if ('tar.sign' in value): return if (index_parser.release_extension not in value): return rel_match = self.get_rel_match(value) if (not rel_match['m']): return rel_specifics = rel_match['m'].groupdict() supported = True if (self.is_rel_eol(rel_specifics)): supported = False p = re.compile(index_parser.release_extension + '$') rel_name = p.sub("", value) base_release = '' for r in index_parser.supported: if (r in value): base_release = r break ver = rel_name.lstrip(rel_match['rel_name'] + '-') p = re.compile('-[usnpc]*$') short_ver = p.sub("", ver) tmp_changelog = 'ChangeLog-' + short_ver tmp_changelog_signed = tmp_changelog + ".sign" if (index_parser.ignore_changelogs): chanlog_req = False else: if ("rc" in ver): chanlog_req = False else: chanlog_req = True w = compute_rel_weight(ver) if (not w): return tar = dict(version=short_ver, base_release=r, weight=w, rel=rel_name, specifics=rel_specifics, base_url=self.base_url, base_url_validated=False, url=self.base_url + '/' + value, maintained=supported, longterm=False, next_rel=False, tarball=value, tarball_exists=True, ignore_signature=index_parser.ignore_signatures, signed_tarball=rel_name + '.tar.sign', signed_tarball_exists=False, changelog=tmp_changelog, changelog_url=self.base_url + '/' + tmp_changelog, changelog_exists=False, changelog_required=chanlog_req, signed_changelog=tmp_changelog_signed, signed_changelog_exists=False, verified=False) self.tarball_add_stable(tar) def update_latest_tarball_next(self, value): index_parser = self.index_parser rel_match = self.get_rel_match_next(value) if (not rel_match['m']): return rel_specifics = rel_match['m'].groupdict() rel_name_next = rel_match[ 'rel_name'] + '-' + rel_specifics['DATE_VERSION'] next_version = rel_specifics['DATE_VERSION'] if (rel_specifics['EXTRAVERSION'] != ''): rel_name_next = rel_name_next + '-' + rel_specifics['EXTRAVERSION'] next_version = next_version + '-' + rel_specifics['EXTRAVERSION'] if (rel_specifics['RELMOD'] != ''): rel_name_next = rel_name_next + '-' + rel_specifics['RELMOD'] next_version = next_version + '-' + rel_specifics['RELMOD'] tar_next = rel_name_next + index_parser.release_extension s_tarball_next = rel_name_next + ".tar.sign" tmp_changelog = 'ChangeLog-' + next_version tmp_changelog_signed = 'ChangeLog-' + next_version + ".sign" w = compute_rel_weight_next(next_version) tar_next = dict(version=next_version, weight=w, rel=rel_name_next, url=self.base_url + '/' + tar_next, specifics=rel_specifics, base_url=self.base_url, base_url_validated=False, maintained=True, longterm=False, next_rel=True, tarball=tar_next, tarball_exists=True, ignore_signature=index_parser.ignore_signatures, signed_tarball=s_tarball_next, signed_tarball_exists=False, changelog=tmp_changelog, changelog_url=self.base_url + '/' + tmp_changelog, changelog_exists=False, changelog_required=False, signed_changelog=tmp_changelog_signed, signed_changelog_exists=False, verified=False) self.tarball_add_next(tar_next) def print_tarballs(self): for tar in self.tarballs: specifics = tar.get('specifics') sys.stdout.write("Tarball: %s
----extra: %s mod_update: %s mod_type: %s
" % (tar.get('url'), specifics['EXTRAVERSION'], specifics['RELMOD_UPDATE'], specifics['RELMOD_TYPE'])) def is_biggest_tarball(self, pivot_tar, tars): other_greater = 0 for tar in tars: if pivot_tar.get('rel') == tar.get('rel'): continue if pivot_tar.get('base_release') == tar.get('base_release'): if pivot_tar.get('weight') < tar.get('weight'): other_greater = other_greater + 1 if (other_greater == 0): return True return False def update_rel_candidates(self): index_parser = self.index_parser for tar in self.tarballs: index_parser.rel_html_rels.append(tar) for tar in index_parser.rel_html_rels: if not self.is_biggest_tarball(tar, index_parser.rel_html_rels): index_parser.rel_html_rels.remove(tar) def is_next_rel(self, value): index_parser = self.index_parser for next_date in index_parser.next_rel_dates: if (next_date != '' and next_date in value and index_parser.release_extension in value): return True return False def handle_starttag(self, tag, attributes): "Process a tags and its 'attributes'." index_parser = self.index_parser if tag != 'a': pass for name, value in attributes: if name != 'href': pass release = self.get_rel_name(value) if (release not in value): pass if (self.is_next_rel(value)): self.update_latest_tarball_next(value) pass self.update_latest_tarball_stable(value) def handle_endtag(self, tag): pass def handle_data(self, data): pass def handle_comment(self, data): pass def __init__(self, index_parser, releases, url): HTMLParser.__init__(self) self.index_parser = index_parser self.base_url = url.rstrip("/") self.releases = releases self.tarballs = [] class index_rel_inferrer(HTMLParser): "Goes through an index page with releases and update the inferred release" def parse(self, html): "Parse the given string 's'." self.feed(html) self.close() def handle_decl(self, decl): pass def revise_inference(self, rel, value, rel_name): index_parser = self.index_parser value = value.lstrip(rel_name + "-") p = re.compile(index_parser.release_extension + '$') value = p.sub("", value) base_specs = get_rel_spec(rel.get('base')) if (not base_specs): return inferred_specs = get_rel_spec(value) if (not inferred_specs): return if (inferred_specs['VERSION'] != base_specs['VERSION']): return if (inferred_specs['PATCHLEVEL'] != base_specs['PATCHLEVEL']): return if (base_specs['SUBLEVEL'] != ''): if (inferred_specs['SUBLEVEL'] != base_specs['SUBLEVEL']): return w = compute_rel_weight(value) if (not w): return inferred_rel = dict(base=rel, url=self.base_url, highest_release=value, weight=w) # XXX: better way to do this? if (rel.get('highest_release') == ''): rel['url'] = self.base_url rel['highest_release'] = value rel['weight'] = w if (rel.get('weight') < inferred_rel.get('weight')): rel['url'] = self.base_url rel['highest_release'] = value rel['weight'] = w def handle_starttag(self, tag, attributes): "Process a tags and its 'attributes'." index_parser = self.index_parser if tag != 'a': return for name, value in attributes: if name != 'href': return rel_name = index_parser.search_rel_name(value) if (not rel_name): return if (index_parser.release_extension not in value): return if (".sign" in value): return for rel in index_parser.inferred_releases: if (rel.get('base') not in value): continue self.revise_inference(rel, value, rel_name) def handle_endtag(self, tag): pass def handle_data(self, data): pass def handle_comment(self, data): pass def __init__(self, index_parser, url): HTMLParser.__init__(self) self.index_parser = index_parser self.base_url = url class stable_url_parser(HTMLParser): "Goes through an index page and returns a URL for a release" def parse(self, html): "Parse the given string 's'." self.feed(html) self.close() def update_url(self, rel_target, rel_target_string, r, r_string): rel_string = r_string.lstrip("/v") rel_string = rel_string.rstrip("/") w = compute_rel_weight(rel_string) rel = dict(release_base=rel_target_string, release=rel_string, weight=w, version=r['VERSION'], patchlevel=r['PATCHLEVEL'], extraversion=r['EXTRAVERSION'], sublevel=r['SUBLEVEL'], relmod=r['RELMOD_UPDATE'], rel_url=self.base_url.rstrip("/") + '/' + r_string.rstrip("/")) if (len(self.stable_urls) == 0): self.stable_urls.append(rel) return for r_tmp in self.stable_urls: if (r_tmp.get('release_base') != rel_target_string): continue if (r_tmp.get('release') == r_string): return if (r_tmp.get('weight') < w): self.stable_urls.remove(r_tmp) self.stable_urls.append(rel) return self.stable_urls.append(rel) def update_stable_urls(self, rel, value): rel_target = get_rel_spec(rel) if (not rel_target): return r = get_rel_spec(value) if (not r): return if (rel_target['VERSION'] == ''): return if (rel_target['VERSION'] != r['VERSION']): return if (rel_target['PATCHLEVEL'] == ''): return if (rel_target['PATCHLEVEL'] != r['PATCHLEVEL']): return self.update_url(rel_target, rel, r, value) def handle_decl(self, decl): pass def handle_starttag(self, tag, attributes): "Process a tags and its 'attributes'." if tag != 'a': pass for name, value in attributes: if name != 'href': pass for rel in self.index_parser.supported: if (rel in value): self.update_stable_urls(rel, value) def handle_endtag(self, tag): pass def handle_data(self, data): pass def handle_comment(self, data): pass def __init__(self, index_parser, url): HTMLParser.__init__(self) self.index_parser = index_parser self.base_url = url self.stable_urls = [] class index_parser(HTMLParser): "HTML index parser for software releases class." def parse(self, html, url): "Parse the given string 's'." self.feed(html) self.close() def __init__(self, config_file): HTMLParser.__init__(self) self.config = ConfigParser.SafeConfigParser() self.config.read(config_file) self.rel_html_proj = self.config.get("project", "rel_html_proj") if (self.config.has_option("project", "rel_html_proj_aliases")): self.rel_html_proj_aliases = self.config.get( "project", "rel_html_proj_aliases").split() else: self.rel_html_proj_aliases = list() self.rel_names = self.rel_html_proj_aliases self.rel_names.insert(0, self.rel_html_proj) self.inferred_releases = [] if (self.config.has_option("project", "supported")): self.supported = self.config.get("project", "supported").split() for rel in self.supported: inferred_rel = dict(base=rel, url='', highest_release='', weight=0) self.inferred_releases.append(inferred_rel) else: self.supported = list() if (self.config.has_option("project", "eol")): self.eol = self.config.get("project", "eol").split() for rel in self.eol: inferred_rel = dict(base=rel, url='', highest_release='', weight=0) self.inferred_releases.append(inferred_rel) else: self.eol = list() self.stable_urls = [] self.rel_html_release_urls = [] urls = self.config.get("project", "rel_html_url_releases").split() for url in urls: self.rel_html_release_urls.append(url.strip()) self.rel_html_rels = [] if (self.config.has_option("project", "ignore_signatures")): self.ignore_signatures = self.config.get( "project", "ignore_signatures") else: self.ignore_signatures = False if (self.config.has_option("project", "ignore_changelogs")): self.ignore_changelogs = self.config.get( "project", "ignore_changelogs") else: self.ignore_changelogs = False if (self.config.has_option("project", "release_extension")): self.release_extension = "." + \ self.config.get("project", "release_extension") else: self.release_extension = ".tar.xz" self.next_rel_day = 0 self.next_rel_month = 0 self.next_rel_url = '' self.next_rel_date = '' self.next_rel_date_rfc3339 = '' self.next_rel_dates = list() self.rel_license = self.config.get("project", "rel_license") self.html_title = self.config.get("html", "title") self.html_nav_dict = ({'url': self.config.get("html", "nav_01_url"), 'txt': self.config.get("html", "nav_01_txt")}, {'url': self.config.get("html", "nav_02_url"), 'txt': self.config.get("html", "nav_02_txt")}, {'url': self.config.get("html", "nav_03_url"), 'txt': self.config.get("html", "nav_03_txt")}) self.html_release_title = self.config.get("html", "release_title") if (self.config.has_option("html", "release_title_next")): self.html_release_title_next = self.config.get( "html", "release_title_next") else: self.html_release_title_next = '' self.html_about_title = self.config.get("html", "about_title") self.html_about = self.config.get("html", "about") self.rels = [] self.signed = False self.changelog = '' self.signed_changelog = False def get_stable_ext_urls(self, url): url_parser = stable_url_parser(self, url) try: rel = urllib2.urlopen(url) html = rel.read() url_parser.parse(html) self.stable_urls = url_parser.stable_urls except urllib2.HTTPError as error: return def search_rel_name(self, value): for rel_name in self.rel_names: if (rel_name in value): return rel_name return "" def search_stable_tarballs(self, ver, url): try: tarball_hunter = index_tarball_hunter(self, [ver], url) f = urllib2.urlopen(url) html = f.read() tarball_hunter.parse(html) tarball_hunter.update_rel_candidates() except urllib2.HTTPError as error: return except urllib2.URLError as e: return def evaluate_stable_ext_urls(self): for r in self.stable_urls: self.search_stable_tarballs(r.get('release'), r.get('rel_url')) def update_inferred_releases(self, url): try: rel_inferrer = index_rel_inferrer(self, url) f = urllib2.urlopen(url) html = f.read() rel_inferrer.parse(html) except urllib2.HTTPError as error: return except urllib2.URLError as e: return def evaluate_inferred_releases(self): for r in self.inferred_releases: if (r.get('url') == ''): continue self.search_stable_tarballs(r.get('highest_release'), r.get('url')) def __get_next_rel_page(self, url): r = urllib2.urlopen(url) html = r.read() num_parser = largest_num_href_parser() return num_parser.parse(html) def get_next_url(self, url): self.next_rel_month = self.__get_next_rel_page(url) self.next_rel_day = self.__get_next_rel_page( url + self.next_rel_month) self.next_rel_url = url + \ self.next_rel_month + '/' + self.next_rel_day self.next_rel_date_rfc3339 = str(date.today().year) + '-' + \ self.next_rel_month + '-' + self.next_rel_day self.next_rel_date = self.next_rel_date_rfc3339.replace("-", "") self.next_rel_dates.append(self.next_rel_date_rfc3339) self.next_rel_dates.append(self.next_rel_date) def evaluate_next_url(self): try: tarball_hunter = index_tarball_hunter(self, self.next_rel_dates, self.next_rel_url) f = urllib2.urlopen(self.next_rel_url) html = f.read() tarball_hunter.parse(html) tarball_hunter.update_rel_candidates() except urllib2.HTTPError as error: return def scrape_for_releases(self): for url in self.rel_html_release_urls: if url.endswith('stable/'): self.get_stable_ext_urls(url) self.evaluate_stable_ext_urls() elif url.endswith(str(date.today().year) + '/'): self.get_next_url(url) self.evaluate_next_url() else: self.update_inferred_releases(url) self.evaluate_inferred_releases() def review_base_url(self, ver, url): try: f_rel = urllib2.urlopen(url) html = f_rel.read() self.parse(html, url) except urllib2.HTTPError as error: return def validate_releases(self): for r in self.rel_html_rels: if (r.get('base_url_reviewed')): continue if (r.get('base_url') == ''): continue self.review_base_url(self, r.get('base_url')) self.rel_html_rels = sort_rels_weight(self.rel_html_rels) def handle_starttag(self, tag, attributes): "Process a tags and its 'attributes'." if tag != 'a': pass for name, value in attributes: if name != 'href': pass for r in self.rel_html_rels: # sys.stdout.write('%s
\n' % value) if r.get('version') not in value: continue if r.get('signed_tarball') in value: r['signed_tarball_exists'] = True elif r.get('tarball') in value: if "longerm" in value: r['longterm'] = True elif (r.get('changelog') == value): r['changelog_exists'] = True elif (r.get('signed_changelog') == value): r['signed_changelog_exists'] = True def handle_endtag(self, tag): pass def handle_data(self, data): pass def releases_verified(self): "Verify releases" all_verified = True for r in self.rel_html_rels: if (not r['tarball_exists']): all_verified = False sys.stdout.write('No tarball: %s
\n' % r['tarball']) break if (not r['ignore_signature']): if (not r['signed_tarball_exists']): all_verified = False sys.stdout.write( 'No signed tarball: %s
\n' % r['signed_tarball']) break if (r['changelog_required']): if (not (r['changelog_exists'])): all_verified = False sys.stdout.write( 'No changelog (%s): %s
\n' % (r['changelog'], r['version'])) break if (not (r['signed_changelog_exists'])): sys.stdout.write('No signed changelog (%s): %s
\n' % (r['signed_changelog'], r['version'])) all_verified = False break else: if (r['changelog_exists'] and (not (r['signed_changelog_exists']))): sys.stdout.write("Although a ChangeLog is not " "required for this release (%s), one does " "but it is not digitally signed. The " "file %s does not exist
" % (r['version'], r['signed_changelog'])) all_verified = False r['verified'] = True return all_verified class largest_num_href_parser(HTMLParser): "Will take an index page and return the highest numbered link" def parse(self, html): "Parse the given string 's'." self.feed(html) self.close() return self.number def handle_decl(self, decl): pass def handle_starttag(self, tag, attributes): "Process a tags and its 'attributes'." if tag != 'a': pass for name, value in attributes: if name != 'href': pass if (re.match(r'\d+', value)): number = re.sub(r'\D', "", value) if (number > self.number): self.number = number def handle_endtag(self, tag): pass def handle_data(self, data): pass def handle_comment(self, data): pass def __init__(self): HTMLParser.__init__(self) self.number = 0 # Example full html output parser, this can be used to # simply read and output a full HTML file, you can modify # this class to help you modify the contents. We do that # later. class html_base(HTMLParser): "HTML 5 generator from parsed index parser content." def parse(self, html): "Parse the given string 's'." self.feed(html) self.close() def handle_decl(self, decl): sys.stdout.write('' % decl) def handle_starttag(self, tag, attributes): sys.stdout.write('<%s' % tag) for name, value in attributes: sys.stdout.write(' %s="%s"' % (name, value)) sys.stdout.write('>') def handle_endtag(self, tag): sys.stdout.write('' % tag) def handle_data(self, data): sys.stdout.write('%s' % data) def handle_comment(self, data): sys.stdout.write('' % data) def __init__(self): HTMLParser.__init__(self) def license_url(license): if (license == 'GPLv2'): return "http://www.gnu.org/licenses/gpl-2.0.html" elif (license == 'ISC'): return "http://opensource.org/licenses/ISC" elif (license == 'AGPL'): return "http://www.gnu.org/licenses/agpl-3.0.html" elif (license == 'copyleft-next'): return "http://copyleft-next.org/" else: return "http://opensource.org/licenses/alphabetical" class rel_html_gen(HTMLParser): "HTML 5 generator from parsed index parser content." def __init__(self, parser): HTMLParser.__init__(self) self.parser = parser self.skip_endtag = False self.latest_stable = {} self.next_rels = [] self.next_rel_count = 0 def handle_title(self, tag, attributes): sys.stdout.write('<%s>%s' % (tag, self.parser.html_title)) def handle_def_start(self, tag, attributes): sys.stdout.write('<%s' % tag) for name, value in attributes: sys.stdout.write(' %s="%s"' % (name, value)) sys.stdout.write('>') def handle_h1_top(self, tag, attributes): self.skip_endtag = True sys.stdout.write('%s\n' % (self.parser.html_title)) sys.stdout.write('\t\t\n') def handle_h1_release(self, tag, attributes): self.skip_endtag = True sys.stdout.write('%s\n' % (self.parser.html_release_title)) sys.stdout.write('\t\t\t\n') count = 0 for r in self.parser.rel_html_rels: count += 1 if (not r.get('verified')): continue if (count == 2): latest_stable = r if (r.get('next_rel')): self.next_rels.append(r) self.next_rel_count = self.next_rel_count + 1 continue sys.stdout.write('\t\t\t\t') sys.stdout.write('\t\t\t\t\n' % (r.get('url'), r.get('rel'))) if (not r.get('ignore_signature')): sys.stdout.write('\t\t\t\t\n' % (r.get('base_url') + '/' + r.get('signed_tarball'))) else: sys.stdout.write('\t\t\t\t\n') if (r.get('maintained')): sys.stdout.write('\t\t\t\t\n') else: sys.stdout.write( '\t\t\t\t\n') if (not r.get('longterm')): sys.stdout.write('\t\t\t\t\n') else: sys.stdout.write( '\t\t\t\t\n') if (r.get('changelog_required')): sys.stdout.write('\t\t\t\t\n' % (r.get('changelog_url'), "ChangeLog")) else: sys.stdout.write('\t\t\t\t\n') sys.stdout.write('\t\t\t\t') sys.stdout.write('\t\t\t
%ssignedEOLLongterm%s
\n') def handle_h1_release_next(self, tag, attributes): if (self.next_rel_count <= 0): return if (not len(self.next_rels)): return sys.stdout.write('%s\n' % (self.parser.html_release_title_next)) sys.stdout.write('\t\t\t\n') for r in self.next_rels: if (not r.get('verified')): continue sys.stdout.write('\t\t\t\t') sys.stdout.write('\t\t\t\t\n' % (r.get('url'), r.get('rel'))) if (not r.get('ignore_signature')): sys.stdout.write('\t\t\t\t\n' % (r.get('base_url') + '/' + r.get('signed_tarball'))) else: sys.stdout.write('\t\t\t\t\n') if (r.get('maintained')): sys.stdout.write('\t\t\t\t\n') else: sys.stdout.write( '\t\t\t\t\n') if (not r.get('longterm')): sys.stdout.write('\t\t\t\t\n') else: sys.stdout.write( '\t\t\t\t\n') if (r.get('changelog_required')): sys.stdout.write('\t\t\t\t\n' % (r.get('changelog_url'), "ChangeLog")) else: sys.stdout.write('\t\t\t\t\n') sys.stdout.write('\t\t\t\t') sys.stdout.write('\t\t\t
%ssignedEOLLongterm%s
\n') def handle_h1_about(self, tag, attributes): self.skip_endtag = True sys.stdout.write('%s\n' % (self.parser.html_about_title)) sys.stdout.write('

%s

\n' % (self.parser.html_about)) def handle_h_license(self, tag, attributes): self.skip_endtag = True sys.stdout.write('License\n') sys.stdout.write('\t\t

%s is licensed under the %s. \n' % (self.parser.rel_html_proj, license_url(self.parser.rel_license), self.parser.rel_license)) sys.stdout.write('This page was generated by %s licensed under the %s

\n' % (rel_html_href(), license_url(rel_html_license()), rel_html_license())) def handle_h1_pass(self, tag, attributes): pass def handle_h(self, tag, attributes): def_run = self.handle_h1_pass for name, value in attributes: if (name == 'id'): if (value == 'top_content'): def_run = self.handle_h1_top elif (value == 'release_title'): def_run = self.handle_h1_release elif (value == 'release_title_next'): def_run = self.handle_h1_release_next elif (value == 'about'): def_run = self.handle_h1_about elif (value == 'license'): def_run = self.handle_h_license sys.stdout.write('<%s' % tag) for name, value in attributes: sys.stdout.write(' %s="%s"' % (name, value)) sys.stdout.write('>') def_run(tag, attributes) def parse(self, html): "Parse the given string 's'." self.feed(html) self.close() def handle_decl(self, decl): sys.stdout.write('' % decl) def handle_starttag(self, tag, attributes): self.skip_endtag = False if (tag == 'title'): self.handle_title(tag, attributes) elif (tag in {'h1', 'h2', 'h3'}): self.handle_h(tag, attributes) else: self.handle_def_start(tag, attributes) def handle_endtag(self, tag): if (self.skip_endtag): pass sys.stdout.write('' % tag) def handle_data(self, data): sys.stdout.write('%s' % data) def handle_comment(self, data): sys.stdout.write('' % data) def check_file(file_input): if not os.path.isfile(file_input): print 'File not found: %(file)s' % {"file": file_input} usage() def try_rel_next(rel): sys.stdout.write("----------------------------------------\n") sys.stdout.write("Rel: %s\n" % rel) w = compute_rel_weight_next(rel) sys.stdout.write("Weight: %s\n" % w) def try_rel(rel_dict): sys.stdout.write("----------------------------------------\n") sys.stdout.write("Rel: %s\n" % rel_dict.get('version')) rel_dict["weight"] = compute_rel_weight(rel_dict.get('version')) sys.stdout.write("Weight: %s\n" % rel_dict.get("weight")) def print_rels_weight(rels): for r in rels: sys.stdout.write("Rel: %20s\t%20s\n" % (r.get('version'), r.get('weight'))) def try_rels(rels): col = [] rsorted = [] max_weight = 0 for rel in rels: rel_d = dict(version=rel, weight=0) col.append(rel_d) for r in col: try_rel(r) col = sort_rels_weight(col) print_rels_weight(col) def debug_rel_tests(): try_rel_next("20130510-2-u") try_rel_next("2013-01-10-2-u") try_rel_next("20130110-2-u") try_rel_next("2013-03-07-u") try_rel_next("2013-03-07") rels = ["2.6.32.3", "3.8", "2.6.32.1", "2.6.32.40", "2.6.32", "3.8.2", "3.5.1", "3.2.1", "3.7.1", "3.8.2-1-usnpc", "3.8-rc1", "3.8-rc1-1-usnpc", "3.8-rc2-2-usnpc", "3.8-rc2-2-c", "3.8-rc2-2-s", "3.8-rc2-2", "3.8-rc3-1-u"] try_rels(rels) def main(): if (debug): debug_rel_tests() sys.exit(1) config_file = '' try: opts, args = getopt.getopt(sys.argv[1:], "hf:") except getopt.GetoptError as err: print str(err) usage() for o, a in opts: if o in ("-f"): check_file(a) config_file = a elif o in ("-h", "--help"): usage() if len(config_file) == 0: config_file = 'rel-html.cfg' parser = index_parser(config_file) html = "" # We go through two passes on the provided URLs: # # 1. Scraping for releases # 2. Validation # # The first pass is based on finding the # highest release extraversion tarball. We # require a second pass as validation entails # searching for a ChangeLog and signature file # for all known existing releases. parser.scrape_for_releases() parser.validate_releases() if (not parser.releases_verified()): sys.stdout.write("Releases not verified\n") sys.exit(1) gen = rel_html_gen(parser) f = open('html/template.html', 'r') html = f.read() gen.parse(html) def usage(): print '' print '%(cmd)s' % {"cmd": sys.argv[0]} print '' print 'Provided an index URL and a few project hints page this' print 'will spit out a shiny HTML 5 W3C compliant releases page.' print '' rel_html_license_verbose() print '' print 'This program can be run without arguments or with a project file passed' print 'as an argument. If no arguments are given it will assume you have the' print 'file rel-html.cfg present on your current directory.' print '' print 'Usage: %(cmd)s [ -f rel-html.cfg ]' % {"cmd": sys.argv[0]} sys.exit(2) if __name__ == "__main__": main()