4 Utility for building Buildroot packages for existing PyPI packages
6 Any package built by scanpypi should be manually checked for
9 from __future__ import print_function
25 from functools import wraps
30 import spdx_lookup as liclookup
32 # spdx_lookup is not installed
33 print('spdx_lookup module is not installed. This can lead to an '
34 'inaccurate licence detection. Please install it via\n'
35 'pip install spdx_lookup')
39 def setup_decorator(func, method):
41 Decorator for distutils.core.setup and setuptools.setup.
42 Puts the arguments with which setup is called as a dict
43 Add key 'method' which should be either 'setuptools' or 'distutils'.
46 func -- either setuptools.setup or distutils.core.setup
47 method -- either 'setuptools' or 'distutils'
51 def closure(*args, **kwargs):
52 # Any python packages calls its setup function to be installed.
53 # Argument 'name' of this setup function is the package's name
54 BuildrootPackage.setup_args[kwargs['name']] = kwargs
55 BuildrootPackage.setup_args[kwargs['name']]['method'] = method
59 import setuptools # noqa E402
60 setuptools.setup = setup_decorator(setuptools.setup, 'setuptools')
61 import distutils # noqa E402
62 distutils.core.setup = setup_decorator(setuptools.setup, 'distutils')
65 def find_file_upper_case(filenames, path='./'):
68 Recursively find files that matches one of the specified filenames.
69 Returns a relative path starting with path argument.
72 filenames -- List of filenames to be found
73 path -- Path to the directory to search
75 for root, dirs, files in os.walk(path):
77 if file.upper() in filenames:
78 yield (os.path.join(root, file))
81 def pkg_buildroot_name(pkg_name):
83 Returns the Buildroot package name for the PyPI package pkg_name.
84 Remove all non alphanumeric characters except -
85 Also lowers the name and adds 'python-' suffix
88 pkg_name -- String to rename
90 name = re.sub('[^\w-]', '', pkg_name.lower())
92 pattern = re.compile('^(?!' + prefix + ')(.+?)$')
93 name = pattern.sub(r'python-\1', name)
97 class DownloadFailed(Exception):
101 class BuildrootPackage():
102 """This class's methods are not meant to be used individually please
103 use them in the correct order:
124 def __init__(self, real_name, pkg_folder):
125 self.real_name = real_name
126 self.buildroot_name = pkg_buildroot_name(self.real_name)
127 self.pkg_dir = os.path.join(pkg_folder, self.buildroot_name)
128 self.mk_name = self.buildroot_name.upper().replace('-', '_')
129 self.as_string = None
132 self.metadata_name = None
133 self.metadata_url = None
135 self.setup_metadata = None
136 self.tmp_extract = None
141 self.license_files = []
143 def fetch_package_info(self):
145 Fetch a package's metadata from the python package index
147 self.metadata_url = 'https://pypi.org/pypi/{pkg}/json'.format(
150 pkg_json = urllib2.urlopen(self.metadata_url).read().decode()
151 except urllib2.HTTPError as error:
152 print('ERROR:', error.getcode(), error.msg, file=sys.stderr)
153 print('ERROR: Could not find package {pkg}.\n'
154 'Check syntax inside the python package index:\n'
155 'https://pypi.python.org/pypi/ '
156 .format(pkg=self.real_name))
158 except urllib2.URLError:
159 print('ERROR: Could not find package {pkg}.\n'
160 'Check syntax inside the python package index:\n'
161 'https://pypi.python.org/pypi/ '
162 .format(pkg=self.real_name))
164 self.metadata = json.loads(pkg_json)
165 self.version = self.metadata['info']['version']
166 self.metadata_name = self.metadata['info']['name']
168 def download_package(self):
170 Download a package using metadata from pypi
173 self.metadata['urls'][0]['filename']
176 'Non-conventional package, ',
177 'please check carefully after creation')
178 self.metadata['urls'] = [{
179 'packagetype': 'sdist',
180 'url': self.metadata['info']['download_url'],
182 # In this case, we can't get the name of the downloaded file
183 # from the pypi api, so we need to find it, this should work
184 urlpath = urllib2.urlparse.urlparse(
185 self.metadata['info']['download_url']).path
186 # urlparse().path give something like
187 # /path/to/file-version.tar.gz
188 # We use basename to remove /path/to
189 self.metadata['urls'][0]['filename'] = os.path.basename(urlpath)
190 for download_url in self.metadata['urls']:
191 if 'bdist' in download_url['packagetype']:
194 print('Downloading package {pkg} from {url}...'.format(
195 pkg=self.real_name, url=download_url['url']))
196 download = urllib2.urlopen(download_url['url'])
197 except urllib2.HTTPError as http_error:
198 download = http_error
200 self.used_url = download_url
201 self.as_string = download.read()
202 if not download_url['digests']['md5']:
204 self.md5_sum = hashlib.md5(self.as_string).hexdigest()
205 if self.md5_sum == download_url['digests']['md5']:
208 if download.__class__ == urllib2.HTTPError:
210 raise DownloadFailed('Failed to download package {pkg}'
211 .format(pkg=self.real_name))
212 self.filename = self.used_url['filename']
213 self.url = self.used_url['url']
215 def extract_package(self, tmp_path):
217 Extract the package contents into a directrory
220 tmp_path -- directory where you want the package to be extracted
222 as_file = StringIO.StringIO(self.as_string)
223 if self.filename[-3:] == 'zip':
224 with zipfile.ZipFile(as_file) as as_zipfile:
225 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
228 except OSError as exception:
229 if exception.errno != errno.EEXIST:
230 print("ERROR: ", exception.message, file=sys.stderr)
232 print('WARNING:', exception.message, file=sys.stderr)
233 print('Removing {pkg}...'.format(pkg=tmp_pkg))
234 shutil.rmtree(tmp_pkg)
236 as_zipfile.extractall(tmp_pkg)
238 with tarfile.open(fileobj=as_file) as as_tarfile:
239 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
242 except OSError as exception:
243 if exception.errno != errno.EEXIST:
244 print("ERROR: ", exception.message, file=sys.stderr)
246 print('WARNING:', exception.message, file=sys.stderr)
247 print('Removing {pkg}...'.format(pkg=tmp_pkg))
248 shutil.rmtree(tmp_pkg)
250 as_tarfile.extractall(tmp_pkg)
252 tmp_extract = '{folder}/{name}-{version}'
253 self.tmp_extract = tmp_extract.format(
255 name=self.metadata_name,
256 version=self.version)
258 def load_setup(self):
260 Loads the corresponding setup and store its metadata
262 current_dir = os.getcwd()
263 os.chdir(self.tmp_extract)
264 sys.path.append(self.tmp_extract)
265 s_file, s_path, s_desc = imp.find_module('setup', [self.tmp_extract])
266 setup = imp.load_module('setup', s_file, s_path, s_desc)
268 self.setup_metadata = self.setup_args[self.metadata_name]
270 # This means setup was not called which most likely mean that it is
271 # called through the if __name__ == '__main__' directive.
272 # In this case, we can only pray that it is called through a
273 # function called main() in setup.py.
274 setup.main() # Will raise AttributeError if not found
275 self.setup_metadata = self.setup_args[self.metadata_name]
276 # Here we must remove the module the hard way.
277 # We must do this because of a very specific case: if a package calls
278 # setup from the __main__ but does not come with a 'main()' function,
279 # for some reason setup.main() will successfully call the main
280 # function of a previous package...
281 sys.modules.pop('setup', None)
283 os.chdir(current_dir)
284 sys.path.remove(self.tmp_extract)
286 def get_requirements(self, pkg_folder):
288 Retrieve dependencies from the metadata found in the setup.py script of
292 pkg_folder -- location of the already created packages
294 if 'install_requires' not in self.setup_metadata:
297 self.pkg_req = self.setup_metadata['install_requires']
298 self.pkg_req = [re.sub('([-.\w]+).*', r'\1', req)
299 for req in self.pkg_req]
301 # get rid of commented lines and also strip the package strings
302 self.pkg_req = [item.strip() for item in self.pkg_req
303 if len(item) > 0 and item[0] != '#']
305 req_not_found = self.pkg_req
306 self.pkg_req = map(pkg_buildroot_name, self.pkg_req)
307 pkg_tuples = zip(req_not_found, self.pkg_req)
308 # pkg_tuples is a list of tuples that looks like
309 # ('werkzeug','python-werkzeug') because I need both when checking if
310 # dependencies already exist or are already in the download list
312 pkg[0] for pkg in pkg_tuples
313 if not os.path.isdir(pkg[1])
317 def __create_mk_header(self):
319 Create the header of the <package_name>.mk file
321 header = ['#' * 80 + '\n']
323 header.append('# {name}\n'.format(name=self.buildroot_name))
325 header.append('#' * 80 + '\n')
329 def __create_mk_download_info(self):
331 Create the lines refering to the download information of the
332 <package_name>.mk file
335 version_line = '{name}_VERSION = {version}\n'.format(
337 version=self.version)
338 lines.append(version_line)
340 targz = self.filename.replace(
342 '$({name}_VERSION)'.format(name=self.mk_name))
343 targz_line = '{name}_SOURCE = {filename}\n'.format(
346 lines.append(targz_line)
348 if self.filename not in self.url:
349 # Sometimes the filename is in the url, sometimes it's not
352 site_url = self.url[:self.url.find(self.filename)]
353 site_line = '{name}_SITE = {url}'.format(name=self.mk_name,
355 site_line = site_line.rstrip('/') + '\n'
356 lines.append(site_line)
359 def __create_mk_setup(self):
361 Create the line refering to the setup method of the package of the
362 <package_name>.mk file
364 There are two things you can use to make an installer
365 for a python package: distutils or setuptools
366 distutils comes with python but does not support dependencies.
367 distutils is mostly still there for backward support.
368 setuptools is what smart people use,
369 but it is not shipped with python :(
372 setup_type_line = '{name}_SETUP_TYPE = {method}\n'.format(
374 method=self.setup_metadata['method'])
375 lines.append(setup_type_line)
378 def __get_license_names(self, license_files):
380 Try to determine the related license name.
382 There are two possibilities. Either the scripts tries to
383 get license name from package's metadata or, if spdx_lookup
384 package is available, the script compares license files with
388 if liclookup is None:
390 'Apache Software License': 'Apache-2.0',
391 'BSD License': 'BSD',
392 'European Union Public Licence 1.0': 'EUPL-1.0',
393 'European Union Public Licence 1.1': 'EUPL-1.1',
394 "GNU General Public License": "GPL",
395 "GNU General Public License v2": "GPL-2.0",
396 "GNU General Public License v2 or later": "GPL-2.0+",
397 "GNU General Public License v3": "GPL-3.0",
398 "GNU General Public License v3 or later": "GPL-3.0+",
399 "GNU Lesser General Public License v2": "LGPL-2.1",
400 "GNU Lesser General Public License v2 or later": "LGPL-2.1+",
401 "GNU Lesser General Public License v3": "LGPL-3.0",
402 "GNU Lesser General Public License v3 or later": "LGPL-3.0+",
403 "GNU Library or Lesser General Public License": "LGPL-2.0",
404 "ISC License": "ISC",
405 "MIT License": "MIT",
406 "Mozilla Public License 1.0": "MPL-1.0",
407 "Mozilla Public License 1.1": "MPL-1.1",
408 "Mozilla Public License 2.0": "MPL-2.0",
409 "Zope Public License": "ZPL"
411 regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
412 classifiers_licenses = [regexp.sub(r"\1", lic)
413 for lic in self.metadata['info']['classifiers']
414 if regexp.match(lic)]
415 licenses = map(lambda x: license_dict[x] if x in license_dict else x,
416 classifiers_licenses)
417 if not len(licenses):
418 print('WARNING: License has been set to "{license}". It is most'
419 ' likely wrong, please change it if need be'.format(
420 license=', '.join(licenses)))
421 licenses = [self.metadata['info']['license']]
422 license_line = '{name}_LICENSE = {license}\n'.format(
424 license=', '.join(licenses))
427 for license_file in license_files:
428 with open(license_file) as lic_file:
429 match = liclookup.match(lic_file.read())
430 if match.confidence >= 90.0:
431 license_names.append(match.license.id)
433 if len(license_names) > 0:
434 license_line = ('{name}_LICENSE ='
437 names=', '.join(license_names)))
441 def __create_mk_license(self):
443 Create the lines referring to the package's license informations of the
444 <package_name>.mk file
446 The license's files are found by searching the package (case insensitive)
447 for files named license, license.txt etc. If more than one license file
448 is found, the user is asked to select which ones he wants to use.
452 filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
453 'COPYING', 'COPYING.TXT']
454 self.license_files = list(find_file_upper_case(filenames, self.tmp_extract))
456 lines.append(self.__get_license_names(self.license_files))
458 license_files = [license.replace(self.tmp_extract, '')[1:]
459 for license in self.license_files]
460 if len(license_files) > 0:
461 if len(license_files) > 1:
462 print('More than one file found for license:',
463 ', '.join(license_files))
464 license_files = [filename
465 for index, filename in enumerate(license_files)]
466 license_file_line = ('{name}_LICENSE_FILES ='
469 files=' '.join(license_files)))
470 lines.append(license_file_line)
472 print('WARNING: No license file found,'
473 ' please specify it manually afterwards')
474 license_file_line = '# No license file found\n'
478 def __create_mk_requirements(self):
480 Create the lines referring to the dependencies of the of the
481 <package_name>.mk file
484 pkg_name -- name of the package
485 pkg_req -- dependencies of the package
488 dependencies_line = ('{name}_DEPENDENCIES ='
491 reqs=' '.join(self.pkg_req)))
492 lines.append(dependencies_line)
495 def create_package_mk(self):
497 Create the lines corresponding to the <package_name>.mk file
499 pkg_mk = '{name}.mk'.format(name=self.buildroot_name)
500 path_to_mk = os.path.join(self.pkg_dir, pkg_mk)
501 print('Creating {file}...'.format(file=path_to_mk))
502 lines = self.__create_mk_header()
503 lines += self.__create_mk_download_info()
504 lines += self.__create_mk_setup()
505 lines += self.__create_mk_license()
508 lines.append('$(eval $(python-package))')
510 with open(path_to_mk, 'w') as mk_file:
511 mk_file.writelines(lines)
513 def create_hash_file(self):
515 Create the lines corresponding to the <package_name>.hash files
517 pkg_hash = '{name}.hash'.format(name=self.buildroot_name)
518 path_to_hash = os.path.join(self.pkg_dir, pkg_hash)
519 print('Creating {filename}...'.format(filename=path_to_hash))
521 if self.used_url['digests']['md5'] and self.used_url['digests']['sha256']:
522 hash_header = '# md5, sha256 from {url}\n'.format(
523 url=self.metadata_url)
524 lines.append(hash_header)
525 hash_line = '{method}\t{digest} {filename}\n'.format(
527 digest=self.used_url['digests']['md5'],
528 filename=self.filename)
529 lines.append(hash_line)
530 hash_line = '{method}\t{digest} {filename}\n'.format(
532 digest=self.used_url['digests']['sha256'],
533 filename=self.filename)
534 lines.append(hash_line)
536 if self.license_files:
537 lines.append('# Locally computed sha256 checksums\n')
538 for license_file in self.license_files:
539 sha256 = hashlib.sha256()
540 with open(license_file, 'rb') as lic_f:
542 data = lic_f.read(BUF_SIZE)
546 hash_line = '{method}\t{digest} {filename}\n'.format(
548 digest=sha256.hexdigest(),
549 filename=os.path.basename(license_file))
550 lines.append(hash_line)
552 with open(path_to_hash, 'w') as hash_file:
553 hash_file.writelines(lines)
555 def create_config_in(self):
557 Creates the Config.in file of a package
559 path_to_config = os.path.join(self.pkg_dir, 'Config.in')
560 print('Creating {file}...'.format(file=path_to_config))
562 config_line = 'config BR2_PACKAGE_{name}\n'.format(
564 lines.append(config_line)
566 bool_line = '\tbool "{name}"\n'.format(name=self.buildroot_name)
567 lines.append(bool_line)
569 for dep in self.pkg_req:
570 dep_line = '\tselect BR2_PACKAGE_{req} # runtime\n'.format(
571 req=dep.upper().replace('-', '_'))
572 lines.append(dep_line)
574 lines.append('\thelp\n')
576 help_lines = textwrap.wrap(self.metadata['info']['summary'],
577 initial_indent='\t ',
578 subsequent_indent='\t ')
580 # make sure a help text is terminated with a full stop
581 if help_lines[-1][-1] != '.':
582 help_lines[-1] += '.'
584 # \t + two spaces is 3 char long
585 help_lines.append('')
586 help_lines.append('\t ' + self.metadata['info']['home_page'])
587 help_lines = map(lambda x: x + '\n', help_lines)
590 with open(path_to_config, 'w') as config_file:
591 config_file.writelines(lines)
595 # Building the parser
596 parser = argparse.ArgumentParser(
597 description="Creates buildroot packages from the metadata of "
598 "an existing PyPI packages and include it "
600 parser.add_argument("packages",
601 help="list of packages to be created",
603 parser.add_argument("-o", "--output",
605 Output directory for packages.
610 args = parser.parse_args()
611 packages = list(set(args.packages))
613 # tmp_path is where we'll extract the files later
614 tmp_prefix = 'scanpypi-'
615 pkg_folder = args.output
616 tmp_path = tempfile.mkdtemp(prefix=tmp_prefix)
618 for real_pkg_name in packages:
619 package = BuildrootPackage(real_pkg_name, pkg_folder)
620 print('buildroot package name for {}:'.format(package.real_name),
621 package.buildroot_name)
622 # First we download the package
623 # Most of the info we need can only be found inside the package
624 print('Package:', package.buildroot_name)
625 print('Fetching package', package.real_name)
627 package.fetch_package_info()
628 except (urllib2.URLError, urllib2.HTTPError):
630 if package.metadata_name.lower() == 'setuptools':
631 # setuptools imports itself, that does not work very well
632 # with the monkey path at the begining
633 print('Error: setuptools cannot be built using scanPyPI')
637 package.download_package()
638 except urllib2.HTTPError as error:
639 print('Error: {code} {reason}'.format(code=error.code,
640 reason=error.reason))
641 print('Error downloading package :', package.buildroot_name)
645 # extract the tarball
647 package.extract_package(tmp_path)
648 except (tarfile.ReadError, zipfile.BadZipfile):
649 print('Error extracting package {}'.format(package.real_name))
653 # Loading the package install info from the package
656 except ImportError as err:
657 if 'buildutils' in err.message:
658 print('This package needs buildutils')
662 except AttributeError as error:
663 print('Error: Could not install package {pkg}: {error}'.format(
664 pkg=package.real_name, error=error))
667 # Package requirement are an argument of the setup function
668 req_not_found = package.get_requirements(pkg_folder)
669 req_not_found = req_not_found.difference(packages)
671 packages += req_not_found
673 print('Added packages \'{pkgs}\' as dependencies of {pkg}'
674 .format(pkgs=", ".join(req_not_found),
675 pkg=package.buildroot_name))
676 print('Checking if package {name} already exists...'.format(
677 name=package.pkg_dir))
679 os.makedirs(package.pkg_dir)
680 except OSError as exception:
681 if exception.errno != errno.EEXIST:
682 print("ERROR: ", exception.message, file=sys.stderr)
684 print('Error: Package {name} already exists'
685 .format(name=package.pkg_dir))
687 'Do you want to delete existing package ? [y/N]')
688 if del_pkg.lower() == 'y':
689 shutil.rmtree(package.pkg_dir)
690 os.makedirs(package.pkg_dir)
693 package.create_package_mk()
695 package.create_hash_file()
697 package.create_config_in()
699 # printing an empty line for visual confort
701 shutil.rmtree(tmp_path)
704 if __name__ == "__main__":