]> rtime.felk.cvut.cz Git - coffee/buildroot.git/blob - utils/scanpypi
thumb2, passwd p for ssh, dhcp eth for nfs_check
[coffee/buildroot.git] / utils / scanpypi
1 #!/usr/bin/env python2
2 """
3
4 Utility for building Buildroot packages for existing PyPI packages
5
6 Any package built by scanpypi should be manually checked for
7 errors.
8 """
9 from __future__ import print_function
10 import argparse
11 import json
12 import urllib2
13 import sys
14 import os
15 import shutil
16 import StringIO
17 import tarfile
18 import zipfile
19 import errno
20 import hashlib
21 import re
22 import textwrap
23 import tempfile
24 import imp
25 from functools import wraps
26
27 BUF_SIZE = 65536
28
29 try:
30     import spdx_lookup as liclookup
31 except ImportError:
32     # spdx_lookup is not installed
33     print('spdx_lookup module is not installed. This can lead to an '
34           'inaccurate licence detection. Please install it via\n'
35           'pip install spdx_lookup')
36     liclookup = None
37
38
39 def setup_decorator(func, method):
40     """
41     Decorator for distutils.core.setup and setuptools.setup.
42     Puts the arguments with which setup is called as a dict
43     Add key 'method' which should be either 'setuptools' or 'distutils'.
44
45     Keyword arguments:
46     func -- either setuptools.setup or distutils.core.setup
47     method -- either 'setuptools' or 'distutils'
48     """
49
50     @wraps(func)
51     def closure(*args, **kwargs):
52         # Any python packages calls its setup function to be installed.
53         # Argument 'name' of this setup function is the package's name
54         BuildrootPackage.setup_args[kwargs['name']] = kwargs
55         BuildrootPackage.setup_args[kwargs['name']]['method'] = method
56     return closure
57
58 # monkey patch
59 import setuptools  # noqa E402
60 setuptools.setup = setup_decorator(setuptools.setup, 'setuptools')
61 import distutils   # noqa E402
62 distutils.core.setup = setup_decorator(setuptools.setup, 'distutils')
63
64
65 def find_file_upper_case(filenames, path='./'):
66     """
67     List generator:
68     Recursively find files that matches one of the specified filenames.
69     Returns a relative path starting with path argument.
70
71     Keyword arguments:
72     filenames -- List of filenames to be found
73     path -- Path to the directory to search
74     """
75     for root, dirs, files in os.walk(path):
76         for file in files:
77             if file.upper() in filenames:
78                 yield (os.path.join(root, file))
79
80
81 def pkg_buildroot_name(pkg_name):
82     """
83     Returns the Buildroot package name for the PyPI package pkg_name.
84     Remove all non alphanumeric characters except -
85     Also lowers the name and adds 'python-' suffix
86
87     Keyword arguments:
88     pkg_name -- String to rename
89     """
90     name = re.sub('[^\w-]', '', pkg_name.lower())
91     prefix = 'python-'
92     pattern = re.compile('^(?!' + prefix + ')(.+?)$')
93     name = pattern.sub(r'python-\1', name)
94     return name
95
96
97 class DownloadFailed(Exception):
98     pass
99
100
101 class BuildrootPackage():
102     """This class's methods are not meant to be used individually please
103     use them in the correct order:
104
105     __init__
106
107     download_package
108
109     extract_package
110
111     load_module
112
113     get_requirements
114
115     create_package_mk
116
117     create_hash_file
118
119     create_config_in
120
121     """
122     setup_args = {}
123
124     def __init__(self, real_name, pkg_folder):
125         self.real_name = real_name
126         self.buildroot_name = pkg_buildroot_name(self.real_name)
127         self.pkg_dir = os.path.join(pkg_folder, self.buildroot_name)
128         self.mk_name = self.buildroot_name.upper().replace('-', '_')
129         self.as_string = None
130         self.md5_sum = None
131         self.metadata = None
132         self.metadata_name = None
133         self.metadata_url = None
134         self.pkg_req = None
135         self.setup_metadata = None
136         self.tmp_extract = None
137         self.used_url = None
138         self.filename = None
139         self.url = None
140         self.version = None
141         self.license_files = []
142
143     def fetch_package_info(self):
144         """
145         Fetch a package's metadata from the python package index
146         """
147         self.metadata_url = 'https://pypi.org/pypi/{pkg}/json'.format(
148             pkg=self.real_name)
149         try:
150             pkg_json = urllib2.urlopen(self.metadata_url).read().decode()
151         except urllib2.HTTPError as error:
152             print('ERROR:', error.getcode(), error.msg, file=sys.stderr)
153             print('ERROR: Could not find package {pkg}.\n'
154                   'Check syntax inside the python package index:\n'
155                   'https://pypi.python.org/pypi/ '
156                   .format(pkg=self.real_name))
157             raise
158         except urllib2.URLError:
159             print('ERROR: Could not find package {pkg}.\n'
160                   'Check syntax inside the python package index:\n'
161                   'https://pypi.python.org/pypi/ '
162                   .format(pkg=self.real_name))
163             raise
164         self.metadata = json.loads(pkg_json)
165         self.version = self.metadata['info']['version']
166         self.metadata_name = self.metadata['info']['name']
167
168     def download_package(self):
169         """
170         Download a package using metadata from pypi
171         """
172         try:
173             self.metadata['urls'][0]['filename']
174         except IndexError:
175             print(
176                 'Non-conventional package, ',
177                 'please check carefully after creation')
178             self.metadata['urls'] = [{
179                 'packagetype': 'sdist',
180                 'url': self.metadata['info']['download_url'],
181                 'digests': None}]
182             # In this case, we can't get the name of the downloaded file
183             # from the pypi api, so we need to find it, this should work
184             urlpath = urllib2.urlparse.urlparse(
185                 self.metadata['info']['download_url']).path
186             # urlparse().path give something like
187             # /path/to/file-version.tar.gz
188             # We use basename to remove /path/to
189             self.metadata['urls'][0]['filename'] = os.path.basename(urlpath)
190         for download_url in self.metadata['urls']:
191             if 'bdist' in download_url['packagetype']:
192                 continue
193             try:
194                 print('Downloading package {pkg} from {url}...'.format(
195                       pkg=self.real_name, url=download_url['url']))
196                 download = urllib2.urlopen(download_url['url'])
197             except urllib2.HTTPError as http_error:
198                 download = http_error
199             else:
200                 self.used_url = download_url
201                 self.as_string = download.read()
202                 if not download_url['digests']['md5']:
203                     break
204                 self.md5_sum = hashlib.md5(self.as_string).hexdigest()
205                 if self.md5_sum == download_url['digests']['md5']:
206                     break
207         else:
208             if download.__class__ == urllib2.HTTPError:
209                 raise download
210             raise DownloadFailed('Failed to download package {pkg}'
211                                  .format(pkg=self.real_name))
212         self.filename = self.used_url['filename']
213         self.url = self.used_url['url']
214
215     def extract_package(self, tmp_path):
216         """
217         Extract the package contents into a directrory
218
219         Keyword arguments:
220         tmp_path -- directory where you want the package to be extracted
221         """
222         as_file = StringIO.StringIO(self.as_string)
223         if self.filename[-3:] == 'zip':
224             with zipfile.ZipFile(as_file) as as_zipfile:
225                 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
226                 try:
227                     os.makedirs(tmp_pkg)
228                 except OSError as exception:
229                     if exception.errno != errno.EEXIST:
230                         print("ERROR: ", exception.message, file=sys.stderr)
231                         return None, None
232                     print('WARNING:', exception.message, file=sys.stderr)
233                     print('Removing {pkg}...'.format(pkg=tmp_pkg))
234                     shutil.rmtree(tmp_pkg)
235                     os.makedirs(tmp_pkg)
236                 as_zipfile.extractall(tmp_pkg)
237         else:
238             with tarfile.open(fileobj=as_file) as as_tarfile:
239                 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
240                 try:
241                     os.makedirs(tmp_pkg)
242                 except OSError as exception:
243                     if exception.errno != errno.EEXIST:
244                         print("ERROR: ", exception.message, file=sys.stderr)
245                         return None, None
246                     print('WARNING:', exception.message, file=sys.stderr)
247                     print('Removing {pkg}...'.format(pkg=tmp_pkg))
248                     shutil.rmtree(tmp_pkg)
249                     os.makedirs(tmp_pkg)
250                 as_tarfile.extractall(tmp_pkg)
251
252         tmp_extract = '{folder}/{name}-{version}'
253         self.tmp_extract = tmp_extract.format(
254             folder=tmp_pkg,
255             name=self.metadata_name,
256             version=self.version)
257
258     def load_setup(self):
259         """
260         Loads the corresponding setup and store its metadata
261         """
262         current_dir = os.getcwd()
263         os.chdir(self.tmp_extract)
264         sys.path.append(self.tmp_extract)
265         s_file, s_path, s_desc = imp.find_module('setup', [self.tmp_extract])
266         setup = imp.load_module('setup', s_file, s_path, s_desc)
267         try:
268             self.setup_metadata = self.setup_args[self.metadata_name]
269         except KeyError:
270             # This means setup was not called which most likely mean that it is
271             # called through the if __name__ == '__main__' directive.
272             # In this case, we can only pray that it is called through a
273             # function called main() in setup.py.
274             setup.main()  # Will raise AttributeError if not found
275             self.setup_metadata = self.setup_args[self.metadata_name]
276         # Here we must remove the module the hard way.
277         # We must do this because of a very specific case: if a package calls
278         # setup from the __main__ but does not come with a 'main()' function,
279         # for some reason setup.main() will successfully call the main
280         # function of a previous package...
281         sys.modules.pop('setup', None)
282         del setup
283         os.chdir(current_dir)
284         sys.path.remove(self.tmp_extract)
285
286     def get_requirements(self, pkg_folder):
287         """
288         Retrieve dependencies from the metadata found in the setup.py script of
289         a pypi package.
290
291         Keyword Arguments:
292         pkg_folder -- location of the already created packages
293         """
294         if 'install_requires' not in self.setup_metadata:
295             self.pkg_req = None
296             return set()
297         self.pkg_req = self.setup_metadata['install_requires']
298         self.pkg_req = [re.sub('([-.\w]+).*', r'\1', req)
299                         for req in self.pkg_req]
300
301         # get rid of commented lines and also strip the package strings
302         self.pkg_req = [item.strip() for item in self.pkg_req
303                         if len(item) > 0 and item[0] != '#']
304
305         req_not_found = self.pkg_req
306         self.pkg_req = map(pkg_buildroot_name, self.pkg_req)
307         pkg_tuples = zip(req_not_found, self.pkg_req)
308         # pkg_tuples is a list of tuples that looks like
309         # ('werkzeug','python-werkzeug') because I need both when checking if
310         # dependencies already exist or are already in the download list
311         req_not_found = set(
312             pkg[0] for pkg in pkg_tuples
313             if not os.path.isdir(pkg[1])
314             )
315         return req_not_found
316
317     def __create_mk_header(self):
318         """
319         Create the header of the <package_name>.mk file
320         """
321         header = ['#' * 80 + '\n']
322         header.append('#\n')
323         header.append('# {name}\n'.format(name=self.buildroot_name))
324         header.append('#\n')
325         header.append('#' * 80 + '\n')
326         header.append('\n')
327         return header
328
329     def __create_mk_download_info(self):
330         """
331         Create the lines refering to the download information of the
332         <package_name>.mk file
333         """
334         lines = []
335         version_line = '{name}_VERSION = {version}\n'.format(
336             name=self.mk_name,
337             version=self.version)
338         lines.append(version_line)
339
340         targz = self.filename.replace(
341             self.version,
342             '$({name}_VERSION)'.format(name=self.mk_name))
343         targz_line = '{name}_SOURCE = {filename}\n'.format(
344             name=self.mk_name,
345             filename=targz)
346         lines.append(targz_line)
347
348         if self.filename not in self.url:
349             # Sometimes the filename is in the url, sometimes it's not
350             site_url = self.url
351         else:
352             site_url = self.url[:self.url.find(self.filename)]
353         site_line = '{name}_SITE = {url}'.format(name=self.mk_name,
354                                                  url=site_url)
355         site_line = site_line.rstrip('/') + '\n'
356         lines.append(site_line)
357         return lines
358
359     def __create_mk_setup(self):
360         """
361         Create the line refering to the setup method of the package of the
362         <package_name>.mk file
363
364         There are two things you can use to make an installer
365         for a python package: distutils or setuptools
366         distutils comes with python but does not support dependencies.
367         distutils is mostly still there for backward support.
368         setuptools is what smart people use,
369         but it is not shipped with python :(
370         """
371         lines = []
372         setup_type_line = '{name}_SETUP_TYPE = {method}\n'.format(
373             name=self.mk_name,
374             method=self.setup_metadata['method'])
375         lines.append(setup_type_line)
376         return lines
377
378     def __get_license_names(self, license_files):
379         """
380         Try to determine the related license name.
381
382         There are two possibilities. Either the scripts tries to
383         get license name from package's metadata or, if spdx_lookup
384         package is available, the script compares license files with
385         SPDX database.
386         """
387         license_line = ''
388         if liclookup is None:
389             license_dict = {
390                 'Apache Software License': 'Apache-2.0',
391                 'BSD License': 'BSD',
392                 'European Union Public Licence 1.0': 'EUPL-1.0',
393                 'European Union Public Licence 1.1': 'EUPL-1.1',
394                 "GNU General Public License": "GPL",
395                 "GNU General Public License v2": "GPL-2.0",
396                 "GNU General Public License v2 or later": "GPL-2.0+",
397                 "GNU General Public License v3": "GPL-3.0",
398                 "GNU General Public License v3 or later": "GPL-3.0+",
399                 "GNU Lesser General Public License v2": "LGPL-2.1",
400                 "GNU Lesser General Public License v2 or later": "LGPL-2.1+",
401                 "GNU Lesser General Public License v3": "LGPL-3.0",
402                 "GNU Lesser General Public License v3 or later": "LGPL-3.0+",
403                 "GNU Library or Lesser General Public License": "LGPL-2.0",
404                 "ISC License": "ISC",
405                 "MIT License": "MIT",
406                 "Mozilla Public License 1.0": "MPL-1.0",
407                 "Mozilla Public License 1.1": "MPL-1.1",
408                 "Mozilla Public License 2.0": "MPL-2.0",
409                 "Zope Public License": "ZPL"
410                 }
411             regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
412             classifiers_licenses = [regexp.sub(r"\1", lic)
413                                     for lic in self.metadata['info']['classifiers']
414                                     if regexp.match(lic)]
415             licenses = map(lambda x: license_dict[x] if x in license_dict else x,
416                            classifiers_licenses)
417             if not len(licenses):
418                 print('WARNING: License has been set to "{license}". It is most'
419                       ' likely wrong, please change it if need be'.format(
420                           license=', '.join(licenses)))
421                 licenses = [self.metadata['info']['license']]
422             license_line = '{name}_LICENSE = {license}\n'.format(
423                 name=self.mk_name,
424                 license=', '.join(licenses))
425         else:
426             license_names = []
427             for license_file in license_files:
428                 with open(license_file) as lic_file:
429                     match = liclookup.match(lic_file.read())
430                 if match.confidence >= 90.0:
431                     license_names.append(match.license.id)
432
433             if len(license_names) > 0:
434                 license_line = ('{name}_LICENSE ='
435                                 ' {names}\n'.format(
436                                     name=self.mk_name,
437                                     names=', '.join(license_names)))
438
439         return license_line
440
441     def __create_mk_license(self):
442         """
443         Create the lines referring to the package's license informations of the
444         <package_name>.mk file
445
446         The license's files are found by searching the package (case insensitive)
447         for files named license, license.txt etc. If more than one license file
448         is found, the user is asked to select which ones he wants to use.
449         """
450         lines = []
451
452         filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
453                      'COPYING', 'COPYING.TXT']
454         self.license_files = list(find_file_upper_case(filenames, self.tmp_extract))
455
456         lines.append(self.__get_license_names(self.license_files))
457
458         license_files = [license.replace(self.tmp_extract, '')[1:]
459                          for license in self.license_files]
460         if len(license_files) > 0:
461             if len(license_files) > 1:
462                 print('More than one file found for license:',
463                       ', '.join(license_files))
464             license_files = [filename
465                              for index, filename in enumerate(license_files)]
466             license_file_line = ('{name}_LICENSE_FILES ='
467                                  ' {files}\n'.format(
468                                      name=self.mk_name,
469                                      files=' '.join(license_files)))
470             lines.append(license_file_line)
471         else:
472             print('WARNING: No license file found,'
473                   ' please specify it manually afterwards')
474             license_file_line = '# No license file found\n'
475
476         return lines
477
478     def __create_mk_requirements(self):
479         """
480         Create the lines referring to the dependencies of the of the
481         <package_name>.mk file
482
483         Keyword Arguments:
484         pkg_name -- name of the package
485         pkg_req -- dependencies of the package
486         """
487         lines = []
488         dependencies_line = ('{name}_DEPENDENCIES ='
489                              ' {reqs}\n'.format(
490                                  name=self.mk_name,
491                                  reqs=' '.join(self.pkg_req)))
492         lines.append(dependencies_line)
493         return lines
494
495     def create_package_mk(self):
496         """
497         Create the lines corresponding to the <package_name>.mk file
498         """
499         pkg_mk = '{name}.mk'.format(name=self.buildroot_name)
500         path_to_mk = os.path.join(self.pkg_dir, pkg_mk)
501         print('Creating {file}...'.format(file=path_to_mk))
502         lines = self.__create_mk_header()
503         lines += self.__create_mk_download_info()
504         lines += self.__create_mk_setup()
505         lines += self.__create_mk_license()
506
507         lines.append('\n')
508         lines.append('$(eval $(python-package))')
509         lines.append('\n')
510         with open(path_to_mk, 'w') as mk_file:
511             mk_file.writelines(lines)
512
513     def create_hash_file(self):
514         """
515         Create the lines corresponding to the <package_name>.hash files
516         """
517         pkg_hash = '{name}.hash'.format(name=self.buildroot_name)
518         path_to_hash = os.path.join(self.pkg_dir, pkg_hash)
519         print('Creating {filename}...'.format(filename=path_to_hash))
520         lines = []
521         if self.used_url['digests']['md5'] and self.used_url['digests']['sha256']:
522             hash_header = '# md5, sha256 from {url}\n'.format(
523                 url=self.metadata_url)
524             lines.append(hash_header)
525             hash_line = '{method}\t{digest}  {filename}\n'.format(
526                 method='md5',
527                 digest=self.used_url['digests']['md5'],
528                 filename=self.filename)
529             lines.append(hash_line)
530             hash_line = '{method}\t{digest}  {filename}\n'.format(
531                 method='sha256',
532                 digest=self.used_url['digests']['sha256'],
533                 filename=self.filename)
534             lines.append(hash_line)
535
536         if self.license_files:
537             lines.append('# Locally computed sha256 checksums\n')
538         for license_file in self.license_files:
539             sha256 = hashlib.sha256()
540             with open(license_file, 'rb') as lic_f:
541                 while True:
542                     data = lic_f.read(BUF_SIZE)
543                     if not data:
544                         break
545                     sha256.update(data)
546             hash_line = '{method}\t{digest}  {filename}\n'.format(
547                 method='sha256',
548                 digest=sha256.hexdigest(),
549                 filename=os.path.basename(license_file))
550             lines.append(hash_line)
551
552         with open(path_to_hash, 'w') as hash_file:
553             hash_file.writelines(lines)
554
555     def create_config_in(self):
556         """
557         Creates the Config.in file of a package
558         """
559         path_to_config = os.path.join(self.pkg_dir, 'Config.in')
560         print('Creating {file}...'.format(file=path_to_config))
561         lines = []
562         config_line = 'config BR2_PACKAGE_{name}\n'.format(
563             name=self.mk_name)
564         lines.append(config_line)
565
566         bool_line = '\tbool "{name}"\n'.format(name=self.buildroot_name)
567         lines.append(bool_line)
568         if self.pkg_req:
569             for dep in self.pkg_req:
570                 dep_line = '\tselect BR2_PACKAGE_{req} # runtime\n'.format(
571                     req=dep.upper().replace('-', '_'))
572                 lines.append(dep_line)
573
574         lines.append('\thelp\n')
575
576         help_lines = textwrap.wrap(self.metadata['info']['summary'],
577                                    initial_indent='\t  ',
578                                    subsequent_indent='\t  ')
579
580         # make sure a help text is terminated with a full stop
581         if help_lines[-1][-1] != '.':
582             help_lines[-1] += '.'
583
584         # \t + two spaces is 3 char long
585         help_lines.append('')
586         help_lines.append('\t  ' + self.metadata['info']['home_page'])
587         help_lines = map(lambda x: x + '\n', help_lines)
588         lines += help_lines
589
590         with open(path_to_config, 'w') as config_file:
591             config_file.writelines(lines)
592
593
594 def main():
595     # Building the parser
596     parser = argparse.ArgumentParser(
597         description="Creates buildroot packages from the metadata of "
598                     "an existing PyPI packages and include it "
599                     "in menuconfig")
600     parser.add_argument("packages",
601                         help="list of packages to be created",
602                         nargs='+')
603     parser.add_argument("-o", "--output",
604                         help="""
605                         Output directory for packages.
606                         Default is ./package
607                         """,
608                         default='./package')
609
610     args = parser.parse_args()
611     packages = list(set(args.packages))
612
613     # tmp_path is where we'll extract the files later
614     tmp_prefix = 'scanpypi-'
615     pkg_folder = args.output
616     tmp_path = tempfile.mkdtemp(prefix=tmp_prefix)
617     try:
618         for real_pkg_name in packages:
619             package = BuildrootPackage(real_pkg_name, pkg_folder)
620             print('buildroot package name for {}:'.format(package.real_name),
621                   package.buildroot_name)
622             # First we download the package
623             # Most of the info we need can only be found inside the package
624             print('Package:', package.buildroot_name)
625             print('Fetching package', package.real_name)
626             try:
627                 package.fetch_package_info()
628             except (urllib2.URLError, urllib2.HTTPError):
629                 continue
630             if package.metadata_name.lower() == 'setuptools':
631                 # setuptools imports itself, that does not work very well
632                 # with the monkey path at the begining
633                 print('Error: setuptools cannot be built using scanPyPI')
634                 continue
635
636             try:
637                 package.download_package()
638             except urllib2.HTTPError as error:
639                 print('Error: {code} {reason}'.format(code=error.code,
640                                                       reason=error.reason))
641                 print('Error downloading package :', package.buildroot_name)
642                 print()
643                 continue
644
645             # extract the tarball
646             try:
647                 package.extract_package(tmp_path)
648             except (tarfile.ReadError, zipfile.BadZipfile):
649                 print('Error extracting package {}'.format(package.real_name))
650                 print()
651                 continue
652
653             # Loading the package install info from the package
654             try:
655                 package.load_setup()
656             except ImportError as err:
657                 if 'buildutils' in err.message:
658                     print('This package needs buildutils')
659                 else:
660                     raise
661                 continue
662             except AttributeError as error:
663                 print('Error: Could not install package {pkg}: {error}'.format(
664                     pkg=package.real_name, error=error))
665                 continue
666
667             # Package requirement are an argument of the setup function
668             req_not_found = package.get_requirements(pkg_folder)
669             req_not_found = req_not_found.difference(packages)
670
671             packages += req_not_found
672             if req_not_found:
673                 print('Added packages \'{pkgs}\' as dependencies of {pkg}'
674                       .format(pkgs=", ".join(req_not_found),
675                               pkg=package.buildroot_name))
676             print('Checking if package {name} already exists...'.format(
677                 name=package.pkg_dir))
678             try:
679                 os.makedirs(package.pkg_dir)
680             except OSError as exception:
681                 if exception.errno != errno.EEXIST:
682                     print("ERROR: ", exception.message, file=sys.stderr)
683                     continue
684                 print('Error: Package {name} already exists'
685                       .format(name=package.pkg_dir))
686                 del_pkg = raw_input(
687                     'Do you want to delete existing package ? [y/N]')
688                 if del_pkg.lower() == 'y':
689                     shutil.rmtree(package.pkg_dir)
690                     os.makedirs(package.pkg_dir)
691                 else:
692                     continue
693             package.create_package_mk()
694
695             package.create_hash_file()
696
697             package.create_config_in()
698             print()
699             # printing an empty line for visual confort
700     finally:
701         shutil.rmtree(tmp_path)
702
703
704 if __name__ == "__main__":
705     main()