~launchpad-pqm/launchpad/devel

3944.1.1 by Francis J. Lacoste
Use system version python2.4 for scripts.
1
#!/usr/bin/python2.4
3485.3.3 by James Henstridge
add zope spec metadata importer
2
# Copyright 2006 Canonical Ltd.  All rights reserved.
3
4
# A script to import metadata about the Zope 3 specs into Launchpad
5
6
__metaclass__ = type
7
8
import itertools
9
import re
10
import sys
11
import urllib2
12
13
import _pythonpath
14
from zope.component import getUtility
15
from BeautifulSoup import BeautifulSoup
16
17
from canonical.lp import initZopeless
18
from canonical.lp.dbschema import (
19
    SpecificationStatus, SpecificationGoalStatus, SpecificationDelivery,
20
    SpecificationPriority)
21
from canonical.launchpad.scripts import execute_zcml_for_scripts
22
from canonical.launchpad.interfaces import (
23
    IPersonSet, IProductSet, ISpecificationSet)
24
3485.3.34 by James Henstridge
update import-zope-specs.py to work with current Launchpad and current Zope wiki
25
WIKI_BASE = 'http://wiki.zope.org/zope3/'
3485.3.3 by James Henstridge
add zope spec metadata importer
26
PROPOSAL_LISTS = ['Zope3Proposals', 'OldProposals', 'DraftProposals']
27
specroot = WIKI_BASE + 'Zope3Proposals'
28
29
at_replacements = ['_at_', '(at)', '@']
30
author_email_pat = re.compile('[-.A-Za-z0-9]+(?:@|%s)[-.A-Za-z0-9]+' %
31
                              '|'.join([re.escape(replacement)
32
                                        for replacement in at_replacements]))
33
34
def getTextContent(tag):
3485.3.28 by James Henstridge
local import-zope-specs.py changes from carbon.ubuntu.com
35
    if tag is None:
36
        return ''
3485.3.3 by James Henstridge
add zope spec metadata importer
37
    if isinstance(tag, basestring):
38
        return tag
39
    return ''.join([e for e in tag.recursiveChildGenerator()
40
                    if isinstance(e, basestring)])
41
42
43
class ZopeSpec:
44
45
    def __init__(self, url, title, summary):
46
        self.url = url
47
        self.name = self.url.split('/')[-1]
48
        self.title = title
49
        self.summary = summary
50
        self.authors = set()
51
        self.statuses = set()
52
53
    def parseAuthorEmails(self, text):
54
        author_email_list = author_email_pat.findall(text)
55
        for author in author_email_list:
56
            # unmangle at symbol in email:
57
            for replacement in at_replacements:
58
                author = author.replace(replacement, '@')
59
            self.authors.add(author)
60
61
    def parseStatuses(self, soup):
62
        wiki_badges = [
63
            'IsWorkInProgress',
64
65
            'IsProposal',
66
            'IsRejectedProposal',
67
            'IsSupercededProposal',
68
            'IsRetractedProposal',
69
            'IsAcceptedProposal',
70
            'IsImplementedProposal',
71
            'IsExpiredProposal',
72
            'IsDraftProposal',
73
74
            'IsPlanned',
75
            'IsResolved',
76
            'IsImplemented',
77
78
            'IsReplaced',
79
            'IsOutdated',
80
            'IsDraft',
81
            'IsEditedDraft',
82
            'IsRoughDraft',
83
            ]
84
        for badge in wiki_badges:
85
            url = WIKI_BASE + badge
86
            if soup.fetch('a', {'href': url}):
87
                self.statuses.add(badge)
88
89
    def parseSpec(self):
90
        contents = urllib2.urlopen(self.url).read()
91
        soup = BeautifulSoup(contents)
3485.3.34 by James Henstridge
update import-zope-specs.py to work with current Launchpad and current Zope wiki
92
        contentdivs = soup('div', {'class': 'content'})
3485.3.3 by James Henstridge
add zope spec metadata importer
93
        assert len(contentdivs) == 1
94
        contentdiv = contentdivs[0]
95
96
        # Specification statuses are represented by "wiki badges",
97
        # which are just hyperlinks to particular pages.
98
        self.parseStatuses(soup)
99
100
        # There are two styles of spec.  One of them has a table with
101
        # RFC-822 style headers in it.  The other has minor level headings
102
        # with text under the heading.
103
        tables = soup('table')
104
        # Every page has one table, for the main page layout.  So, if the page
105
        # has two tables, it means that it will be using the RFC-822 style.
106
        if len(tables) >= 2:
107
            # This is a spec with RFC-822 style headers.
108
            docinfo = tables[1]
109
            for row in docinfo('tr'):
110
                if len(row('th')) < 1 or len(row('td')) < 1:
111
                    continue
112
                key = row('th')[0].renderContents()
113
                if key.endswith(':'):
114
                    key = key[:-1]
115
                value = row('td')[0].renderContents()
116
117
                if 'Author' in key:
118
                    self.parseAuthorEmails(value)
119
        else:
120
            # This is a spec with minor level headings, or perhaps with no
121
            # headings at all.
122
123
            # Look for an author heading.
124
            author_headers = soup(text=re.compile('Author.*', re.I))
125
            if author_headers:
126
                author = author_headers[0].findNext().renderContents()
127
                self.parseAuthorEmails(author)
128
        
129
    @property
130
    def lpname(self):
131
        # add dashes before capitalised words
132
        name = re.sub(r'([^A-Z])([A-Z])', r'\1-\2', self.name)
133
        # lower case name
134
        name = name.lower()
135
        # remove leading dashes
136
        while name.startswith('-'):
137
            name = name[1:]
138
        # if name doesn't begin with an alphabetical character prefix it
139
        if not name[0].isalpha():
140
            name = 'x-' + name
141
        return name
142
143
    @property
144
    def lpstatus(self):
145
        # implemented and accepted specs => APPROVED
146
        for status in ['IsImplemented',
147
                       'IsImplementedProposal',
148
                       'IsAcceptedProposal']:
149
            if status in self.statuses:
150
                return SpecificationStatus.APPROVED
3485.3.28 by James Henstridge
local import-zope-specs.py changes from carbon.ubuntu.com
151
        # WIP => DISCUSSION
3485.3.3 by James Henstridge
add zope spec metadata importer
152
        if 'IsWorkInProgress' in self.statuses:
3485.3.28 by James Henstridge
local import-zope-specs.py changes from carbon.ubuntu.com
153
            return SpecificationStatus.DISCUSSION
3485.3.3 by James Henstridge
add zope spec metadata importer
154
        for status in ['IsSupercededProposal', 'IsReplaced']:
155
            if status in self.statuses:
156
                return SpecificationStatus.SUPERSEDED
157
        for status in ['IsExpiredProposal', 'IsOutdated']:
158
            if status in self.statuses:
159
                return SpecificationStatus.OBSOLETE
160
        # draft statuses:
161
        for status in ['IsDraftProposal',
162
                       'IsDraft',
163
                       'IsEditedDraft',
164
                       'IsRoughDraft']:
165
            if status in self.statuses:
166
                return SpecificationStatus.DRAFT
167
        # otherwise ...
168
        return SpecificationStatus.PENDINGREVIEW
169
170
    @property
171
    def lpgoalstatus(self):
172
        # implemented and accepted specs => ACCEPTED
173
        for status in ['IsImplemented',
174
                       'IsImplementedProposal',
175
                       'IsAcceptedProposal']:
176
            if status in self.statuses:
177
                return SpecificationGoalStatus.ACCEPTED
178
        # rejected or retracted => DECLINED
179
        for status in ['IsRetractedProposal', 'IsRejectedProposal']:
180
            if status in self.statuses:
181
                return SpecificationGoalStatus.DECLINED
182
183
        # otherwise ...
184
        return SpecificationGoalStatus.PROPOSED
185
186
    @property
187
    def lpdelivery(self):
188
        for status in ['IsImplemented',
189
                       'IsImplementedProposal']:
190
            if status in self.statuses:
191
                return SpecificationDelivery.IMPLEMENTED
192
        # otherwise ...
193
        return SpecificationDelivery.UNKNOWN
194
195
    def syncSpec(self):
3485.3.34 by James Henstridge
update import-zope-specs.py to work with current Launchpad and current Zope wiki
196
        zope = getUtility(IProductSet).getByName('zope')
197
        zope_dev = getUtility(IPersonSet).getByName('zope-dev')
3485.3.3 by James Henstridge
add zope spec metadata importer
198
        # has the spec been created?
199
        lpspec = getUtility(ISpecificationSet).getByURL(self.url)
200
        if not lpspec:
201
            lpspec = getUtility(ISpecificationSet).new(
202
                name=self.lpname,
203
                title=self.title,
204
                specurl=self.url,
205
                summary=self.summary,
206
                priority=SpecificationPriority.UNDEFINED,
3485.3.34 by James Henstridge
update import-zope-specs.py to work with current Launchpad and current Zope wiki
207
                status=SpecificationStatus.NEW,
3485.3.3 by James Henstridge
add zope spec metadata importer
208
                owner=zope_dev,
209
                product=zope)
210
211
        # synchronise
212
        lpspec.title = self.title
213
        lpspec.summary = self.summary
214
        lpspec.status = self.lpstatus
3485.3.28 by James Henstridge
local import-zope-specs.py changes from carbon.ubuntu.com
215
        newgoalstatus = self.lpgoalstatus
216
        if newgoalstatus != lpspec.goalstatus:
217
            if newgoalstatus == SpecificationGoalStatus.PROPOSED:
3485.3.34 by James Henstridge
update import-zope-specs.py to work with current Launchpad and current Zope wiki
218
                lpspec.proposeGoal(None, zope_dev)
3485.3.28 by James Henstridge
local import-zope-specs.py changes from carbon.ubuntu.com
219
            elif newgoalstatus == SpecificationGoalStatus.ACCEPTED:
3485.3.34 by James Henstridge
update import-zope-specs.py to work with current Launchpad and current Zope wiki
220
                lpspec.acceptBy(zope_dev)
3485.3.28 by James Henstridge
local import-zope-specs.py changes from carbon.ubuntu.com
221
            elif newgoalstatus == SpecificationGoalStatus.DECLINED:
3485.3.34 by James Henstridge
update import-zope-specs.py to work with current Launchpad and current Zope wiki
222
                lpspec.declineBy(zope_dev)
3485.3.3 by James Henstridge
add zope spec metadata importer
223
        lpspec.delivery = self.lpdelivery
3485.3.34 by James Henstridge
update import-zope-specs.py to work with current Launchpad and current Zope wiki
224
        lpspec.updateLifecycleStatus(zope_dev)
225
            
3485.3.3 by James Henstridge
add zope spec metadata importer
226
        # set the assignee to the first author email with an LP account
227
        for author in sorted(self.authors):
228
            person = getUtility(IPersonSet).getByEmail(author)
229
            if person is not None:
230
                lpspec.assignee = person
231
                break
232
233
234
def iter_spec_urls(url=specroot):
235
    contents = urllib2.urlopen(url)
236
    soup = BeautifulSoup(contents)
3485.3.34 by James Henstridge
update import-zope-specs.py to work with current Launchpad and current Zope wiki
237
    contentdivs = soup('div', {'class': 'content'})
3485.3.3 by James Henstridge
add zope spec metadata importer
238
    assert len(contentdivs) == 1
239
    contentdiv = contentdivs[0]
240
    listofspecs = contentdiv('ul')[0]
241
242
    for listitem in listofspecs('li', recursive=False):
243
        anchors = listitem('a')
244
        if not anchors:
245
            continue
246
        specanchor = anchors[0]
247
        href = specanchor['href']
248
        # broken wiki link => ignore
3485.3.34 by James Henstridge
update import-zope-specs.py to work with current Launchpad and current Zope wiki
249
        if 'createform?page=' in href:
3485.3.3 by James Henstridge
add zope spec metadata importer
250
            continue
251
        title = getTextContent(specanchor)
252
        summary = ''.join([getTextContent(tag)
253
                               for tag in specanchor.nextSiblingGenerator()])
254
        yield ZopeSpec(href, title, summary.strip())
255
256
        
257
def main(argv):
258
    execute_zcml_for_scripts()
259
    ztm = initZopeless()
260
261
    for spec in itertools.chain(*[iter_spec_urls(WIKI_BASE + page)
262
                                  for page in PROPOSAL_LISTS]):
263
        # parse extra information from the spec body
264
        spec.parseSpec()
265
        # add its metadata to LP
266
        print 'Synchronising', spec.name
267
        ztm.begin()
268
        try:
269
            spec.syncSpec()
270
            ztm.commit()
271
        except:
272
            ztm.abort()
273
            raise
274
275
if __name__ == '__main__':
276
    sys.exit(main(sys.argv))