#!/usr/bin/python # # This file can also be used as a module # # Copyright 2009, Red Hat, Inc. # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. # # Author: Vitezslav Humpa descr = """ A program that will obtain a list of people who participated on the Test Day and will try to search for these people in the Red Hat employees roster - to remove them from the list, and print only the Fedora community members. By default it will print both the complete and community list. Please note that: the search is far from perfect as it is limited by capabilities of the RH roster search page. By experimenting it was found that the best way is to base the checkup on fedoraproject/RH logins, which are usually same with the Red Hat employees. There will always be some false positives, so you might need to weed it out.""" note = """ You need to have access to the roster page (connected to RH network), otherwise just the basic tester grab will be available. """ import sys import re import urlgrabber import urllib2 class TestDayPeopleGrabber(object): """A simple class that can download a given test day page, obtain the list of participants and check whether these are possibly part of company, or if not, community. The roster search is currently quite limited so the roster is checked against the fedoraproject logins instead of full names. Fedoraproject logins of RedHat employees are usually the same as their internal logins and this seems to offer better results; """ people = [] def uniqify_list(self, list): unique_list = [] [unique_list.append(i) for i in list if not unique_list.count(i)] return unique_list def parse_people(self, url): f = urllib2.urlopen(url) page = f.read() m = re.compile(r'title="User:([a-z0-9\-]+)[ "][^<]*>([^<>]*)', re.IGNORECASE) self.people = m.findall(page) self.people = self.uniqify_list(self.people) def print_people(self): if len(self.people) == 0: print "None found" for (login, name) in self.people: if name.startswith('User:'): name = name.split(':')[-1] print ('%s: %s' % (login , name)) def query_company_roster(self, roster): m = re.compile(r'No results were returned', re.IGNORECASE) red_haters = [] for (login, name) in self.people[:]: f = urllib2.urlopen(roster + '0?ln=' + login) page = f.read() if m.search(page) is None: self.people.remove((login, name)) red_haters = (login, name) return red_haters if __name__ == "__main__": import argparse parser = argparse.ArgumentParser(prog='$ tdpeoplegrabber.py', description=descr, epilog=note) parser.add_argument('--url', default='http://fedoraproject.org/wiki/Test_Day:Current', help='URL of the test day page, current by default') parser.add_argument('--roster', required=True, help="""A new RH roster url. This is mandatory as the link cant be published in the code here.""") parser.add_argument('--all_testers_only', action="store_true", default=False, help='Will not check against RedHat employees and just print all the people') args = parser.parse_args() grabber = TestDayPeopleGrabber() grabber.parse_people(args.url) grabber.print_people() if args.all_testers_only is False: print "----------------------\nCommunity (non-RedHat guess only):\n----------------------" grabber.query_company_roster(args.roster) grabber.print_people()