~launchpad-pqm/launchpad/devel

8687.15.18 by Karl Fogel
Add the copyright header block to files under lib/canonical/.
1
# Copyright 2009 Canonical Ltd.  This software is licensed under the
2
# GNU Affero General Public License version 3 (see the file LICENSE).
7793.7.1 by Graham Binns
Adde SF script.
3
4
"""Utilities for the sfremoteproductfinder cronscript"""
5
6
__metaclass__ = type
7
__all__ = [
8
    'SourceForgeRemoteProductFinder',
9
    ]
10
7793.7.4 by Graham Binns
Added SourceForgeRemoteProductFinder.getRemoteProductFromSourceForge().
11
import urllib
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
12
from urllib2 import (
13
    HTTPError,
14
    urlopen,
15
    )
7793.7.4 by Graham Binns
Added SourceForgeRemoteProductFinder.getRemoteProductFromSourceForge().
16
17
from BeautifulSoup import BeautifulSoup
7793.7.1 by Graham Binns
Adde SF script.
18
from zope.component import getUtility
19
14565.2.13 by Curtis Hovey
Moved canonial.launchapd.script tests and modules to lp.services.script.
20
from lp.services.scripts.logger import log as default_log
11403.1.4 by Henning Eggers
Reformatted imports using format-imports script r32.
21
from canonical.launchpad.webapp import (
22
    urlappend,
23
    urlsplit,
24
    )
13130.1.12 by Curtis Hovey
Sorted imports.
25
from lp.app.interfaces.launchpad import ILaunchpadCelebrities
7675.110.3 by Curtis Hovey
Ran the migration script to move registry code to lp.registry.
26
from lp.registry.interfaces.product import IProductSet
7793.7.1 by Graham Binns
Adde SF script.
27
28
29
class SourceForgeRemoteProductFinder:
30
    """Responsible for finding the remote product of SourceForge projects."""
31
7793.7.3 by Graham Binns
Added some test data.
32
    def __init__(self, txn, logger=None):
33
        self.txn = txn
34
        self.logger = logger
35
        if logger is None:
36
            self.logger = default_log
37
38
        # We use the SourceForge celebrity to make sure that we're
39
        # always going to use the right URLs.
40
        self.sourceforge_baseurl = getUtility(
41
            ILaunchpadCelebrities).sourceforge_tracker.baseurl
42
7793.7.1 by Graham Binns
Adde SF script.
43
    def _getPage(self, page):
44
        """GET the specified page on the remote HTTP server."""
7793.7.3 by Graham Binns
Added some test data.
45
        page_url = urlappend(self.sourceforge_baseurl, page)
46
        return urlopen(page_url).read()
47
48
    def getRemoteProductFromSourceForge(self, sf_project):
49
        """Return the remote product of a SourceForge project.
50
51
        :return: The group_id and atid of the SourceForge project's bug
52
            tracker as an ampersand-separated string in the form
53
            'group_id&atid'.
54
        """
55
        # First, fetch the project page.
7793.7.10 by Graham Binns
Added error handling at Abel's request.
56
        try:
57
            soup = BeautifulSoup(self._getPage("projects/%s" % sf_project))
58
        except HTTPError, error:
59
            self.logger.error(
60
                "Error fetching project %s: %s" %
61
                (sf_project, error))
62
            return None
7793.7.4 by Graham Binns
Added SourceForgeRemoteProductFinder.getRemoteProductFromSourceForge().
63
64
        # Find the Tracker link and fetch that.
65
        tracker_link = soup.find('a', text='Tracker')
7793.7.10 by Graham Binns
Added error handling at Abel's request.
66
        if tracker_link is None:
67
            self.logger.error(
68
                "No tracker link for project '%s'" % sf_project)
69
            return None
70
7793.7.4 by Graham Binns
Added SourceForgeRemoteProductFinder.getRemoteProductFromSourceForge().
71
        tracker_url = tracker_link.findParent()['href']
7793.7.7 by Graham Binns
Fixed a problem with urlappend and paths with leading spaces.
72
73
        # Clean any leading '/' from tracker_url so that urlappend
74
        # doesn't choke on it.
75
        tracker_url = tracker_url.lstrip('/')
7793.7.10 by Graham Binns
Added error handling at Abel's request.
76
        try:
77
            soup = BeautifulSoup(self._getPage(tracker_url))
78
        except HTTPError, error:
79
            self.logger.error(
80
                "Error fetching project %s: %s" %
81
                (sf_project, error))
82
            return None
7793.7.4 by Graham Binns
Added SourceForgeRemoteProductFinder.getRemoteProductFromSourceForge().
83
84
        # Extract the group_id and atid from the bug tracker link.
85
        bugtracker_link = soup.find('a', text='Bugs')
7793.7.10 by Graham Binns
Added error handling at Abel's request.
86
        if bugtracker_link is None:
87
            self.logger.error(
88
                "No bug tracker link for project '%s'" % sf_project)
89
            return None
90
7793.7.4 by Graham Binns
Added SourceForgeRemoteProductFinder.getRemoteProductFromSourceForge().
91
        bugtracker_url = bugtracker_link.findParent()['href']
92
93
        # We need to replace encoded ampersands in the URL since
94
        # SourceForge usually encodes them.
95
        bugtracker_url = bugtracker_url.replace('&', '&')
96
        schema, host, path, query, fragment = urlsplit(bugtracker_url)
97
98
        query_dict = {}
99
        query_bits = query.split('&')
100
        for bit in query_bits:
101
            key, value = urllib.splitvalue(bit)
102
            query_dict[key] = value
103
104
        try:
105
            atid = int(query_dict.get('atid', None))
106
            group_id = int(query_dict.get('group_id', None))
107
        except ValueError:
108
            # If anything goes wrong when int()ing the IDs, just return
109
            # None.
110
            return None
111
7793.7.5 by Graham Binns
Added implmentation for SourceForgeRemoteProductFinder.setRemoteProductsFromSourceForge().
112
        return u'%s&%s' % (group_id, atid)
7793.7.3 by Graham Binns
Added some test data.
113
114
    def setRemoteProductsFromSourceForge(self):
115
        """Find and set the remote product for SF-linked Products."""
116
        products_to_update = getUtility(
117
            IProductSet).getSFLinkedProductsWithNoneRemoteProduct()
118
7793.7.6 by Graham Binns
Added cronscript for updating remote products from sourceforge.
119
        if products_to_update.count() == 0:
120
            self.logger.info("No Products to update.")
121
            return
122
7793.7.3 by Graham Binns
Added some test data.
123
        self.logger.info(
124
            "Updating %s Products using SourceForge project data" %
125
            products_to_update.count())
126
127
        for product in products_to_update:
7793.7.5 by Graham Binns
Added implmentation for SourceForgeRemoteProductFinder.setRemoteProductsFromSourceForge().
128
            self.txn.begin()
129
            self.logger.debug(
130
                "Updating remote_product for Product '%s'" % product.name)
131
            product.remote_product = self.getRemoteProductFromSourceForge(
132
                product.sourceforgeproject)
133
            self.txn.commit()