4 Utility for building Buildroot packages for existing PyPI packages
6 Any package built by scanpypi should be manually checked for
9 from __future__ import print_function
10 from __future__ import absolute_import
13 import six.moves.urllib.request, six.moves.urllib.error, six.moves.urllib.parse
25 from functools import wraps
26 from six.moves import map
27 from six.moves import zip
28 from six.moves import input
37 import spdx_lookup as liclookup
39 # spdx_lookup is not installed
40 print('spdx_lookup module is not installed. This can lead to an '
41 'inaccurate licence detection. Please install it via\n'
42 'pip install spdx_lookup')
46 def setup_decorator(func, method):
48 Decorator for distutils.core.setup and setuptools.setup.
49 Puts the arguments with which setup is called as a dict
50 Add key 'method' which should be either 'setuptools' or 'distutils'.
53 func -- either setuptools.setup or distutils.core.setup
54 method -- either 'setuptools' or 'distutils'
58 def closure(*args, **kwargs):
59 # Any python packages calls its setup function to be installed.
60 # Argument 'name' of this setup function is the package's name
61 BuildrootPackage.setup_args[kwargs['name']] = kwargs
62 BuildrootPackage.setup_args[kwargs['name']]['method'] = method
66 import setuptools # noqa E402
67 setuptools.setup = setup_decorator(setuptools.setup, 'setuptools')
68 import distutils # noqa E402
69 distutils.core.setup = setup_decorator(setuptools.setup, 'distutils')
72 def find_file_upper_case(filenames, path='./'):
75 Recursively find files that matches one of the specified filenames.
76 Returns a relative path starting with path argument.
79 filenames -- List of filenames to be found
80 path -- Path to the directory to search
82 for root, dirs, files in os.walk(path):
84 if file.upper() in filenames:
85 yield (os.path.join(root, file))
88 def pkg_buildroot_name(pkg_name):
90 Returns the Buildroot package name for the PyPI package pkg_name.
91 Remove all non alphanumeric characters except -
92 Also lowers the name and adds 'python-' suffix
95 pkg_name -- String to rename
97 name = re.sub('[^\w-]', '', pkg_name.lower())
99 pattern = re.compile('^(?!' + prefix + ')(.+?)$')
100 name = pattern.sub(r'python-\1', name)
104 class DownloadFailed(Exception):
108 class BuildrootPackage():
109 """This class's methods are not meant to be used individually please
110 use them in the correct order:
131 def __init__(self, real_name, pkg_folder):
132 self.real_name = real_name
133 self.buildroot_name = pkg_buildroot_name(self.real_name)
134 self.pkg_dir = os.path.join(pkg_folder, self.buildroot_name)
135 self.mk_name = self.buildroot_name.upper().replace('-', '_')
136 self.as_string = None
139 self.metadata_name = None
140 self.metadata_url = None
142 self.setup_metadata = None
143 self.tmp_extract = None
148 self.license_files = []
150 def fetch_package_info(self):
152 Fetch a package's metadata from the python package index
154 self.metadata_url = 'https://pypi.python.org/pypi/{pkg}/json'.format(
157 pkg_json = six.moves.urllib.request.urlopen(self.metadata_url).read().decode()
158 except six.moves.urllib.error.HTTPError as error:
159 print('ERROR:', error.getcode(), error.msg, file=sys.stderr)
160 print('ERROR: Could not find package {pkg}.\n'
161 'Check syntax inside the python package index:\n'
162 'https://pypi.python.org/pypi/ '
163 .format(pkg=self.real_name))
165 except six.moves.urllib.error.URLError:
166 print('ERROR: Could not find package {pkg}.\n'
167 'Check syntax inside the python package index:\n'
168 'https://pypi.python.org/pypi/ '
169 .format(pkg=self.real_name))
171 self.metadata = json.loads(pkg_json)
172 self.version = self.metadata['info']['version']
173 self.metadata_name = self.metadata['info']['name']
175 def download_package(self):
177 Download a package using metadata from pypi
180 self.metadata['urls'][0]['filename']
183 'Non-conventional package, ',
184 'please check carefully after creation')
185 self.metadata['urls'] = [{
186 'packagetype': 'sdist',
187 'url': self.metadata['info']['download_url'],
189 # In this case, we can't get the name of the downloaded file
190 # from the pypi api, so we need to find it, this should work
191 urlpath = six.moves.urllib.parse.urlparse(
192 self.metadata['info']['download_url']).path
193 # urlparse().path give something like
194 # /path/to/file-version.tar.gz
195 # We use basename to remove /path/to
196 self.metadata['urls'][0]['filename'] = os.path.basename(urlpath)
197 for download_url in self.metadata['urls']:
198 if 'bdist' in download_url['packagetype']:
201 print('Downloading package {pkg} from {url}...'.format(
202 pkg=self.real_name, url=download_url['url']))
203 download = six.moves.urllib.request.urlopen(download_url['url'])
204 except six.moves.urllib.error.HTTPError as http_error:
205 download = http_error
207 self.used_url = download_url
208 self.as_string = download.read()
209 if not download_url['md5_digest']:
211 self.md5_sum = hashlib.md5(self.as_string).hexdigest()
212 if self.md5_sum == download_url['md5_digest']:
215 if download.__class__ == six.moves.urllib.error.HTTPError:
217 raise DownloadFailed('Failed to download package {pkg}'
218 .format(pkg=self.real_name))
219 self.filename = self.used_url['filename']
220 self.url = self.used_url['url']
222 def extract_package(self, tmp_path):
224 Extract the package contents into a directrory
227 tmp_path -- directory where you want the package to be extracted
230 as_file = StringIO.StringIO(self.as_string)
232 as_file = io.BytesIO(self.as_string)
233 if self.filename[-3:] == 'zip':
234 with zipfile.ZipFile(as_file) as as_zipfile:
235 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
238 except OSError as exception:
239 if exception.errno != errno.EEXIST:
240 print("ERROR: ", exception.message, file=sys.stderr)
242 print('WARNING:', exception.message, file=sys.stderr)
243 print('Removing {pkg}...'.format(pkg=tmp_pkg))
244 shutil.rmtree(tmp_pkg)
246 as_zipfile.extractall(tmp_pkg)
248 with tarfile.open(fileobj=as_file) as as_tarfile:
249 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
252 except OSError as exception:
253 if exception.errno != errno.EEXIST:
254 print("ERROR: ", exception.message, file=sys.stderr)
256 print('WARNING:', exception.message, file=sys.stderr)
257 print('Removing {pkg}...'.format(pkg=tmp_pkg))
258 shutil.rmtree(tmp_pkg)
260 as_tarfile.extractall(tmp_pkg)
262 tmp_extract = '{folder}/{name}-{version}'
263 self.tmp_extract = tmp_extract.format(
265 name=self.metadata_name,
266 version=self.version)
268 def load_setup(self):
270 Loads the corresponding setup and store its metadata
272 current_dir = os.getcwd()
273 os.chdir(self.tmp_extract)
274 sys.path.append(self.tmp_extract)
275 s_file, s_path, s_desc = imp.find_module('setup', [self.tmp_extract])
276 setup = imp.load_module('setup', s_file, s_path, s_desc)
278 self.setup_metadata = self.setup_args[self.metadata_name]
280 # This means setup was not called which most likely mean that it is
281 # called through the if __name__ == '__main__' directive.
282 # In this case, we can only pray that it is called through a
283 # function called main() in setup.py.
284 setup.main() # Will raise AttributeError if not found
285 self.setup_metadata = self.setup_args[self.metadata_name]
286 # Here we must remove the module the hard way.
287 # We must do this because of a very specific case: if a package calls
288 # setup from the __main__ but does not come with a 'main()' function,
289 # for some reason setup.main() will successfully call the main
290 # function of a previous package...
291 sys.modules.pop('setup', None)
293 os.chdir(current_dir)
294 sys.path.remove(self.tmp_extract)
296 def get_requirements(self, pkg_folder):
298 Retrieve dependencies from the metadata found in the setup.py script of
302 pkg_folder -- location of the already created packages
304 if 'install_requires' not in self.setup_metadata:
307 self.pkg_req = self.setup_metadata['install_requires']
308 self.pkg_req = [re.sub('([-.\w]+).*', r'\1', req)
309 for req in self.pkg_req]
311 # get rid of commented lines and also strip the package strings
312 self.pkg_req = [item.strip() for item in self.pkg_req
313 if len(item) > 0 and item[0] != '#']
315 req_not_found = self.pkg_req
316 self.pkg_req = list(map(pkg_buildroot_name, self.pkg_req))
317 pkg_tuples = list(zip(req_not_found, self.pkg_req))
318 # pkg_tuples is a list of tuples that looks like
319 # ('werkzeug','python-werkzeug') because I need both when checking if
320 # dependencies already exist or are already in the download list
322 pkg[0] for pkg in pkg_tuples
323 if not os.path.isdir(pkg[1])
327 def __create_mk_header(self):
329 Create the header of the <package_name>.mk file
331 header = ['#' * 80 + '\n']
333 header.append('# {name}\n'.format(name=self.buildroot_name))
335 header.append('#' * 80 + '\n')
339 def __create_mk_download_info(self):
341 Create the lines refering to the download information of the
342 <package_name>.mk file
345 version_line = '{name}_VERSION = {version}\n'.format(
347 version=self.version)
348 lines.append(version_line)
350 targz = self.filename.replace(
352 '$({name}_VERSION)'.format(name=self.mk_name))
353 targz_line = '{name}_SOURCE = {filename}\n'.format(
356 lines.append(targz_line)
358 if self.filename not in self.url:
359 # Sometimes the filename is in the url, sometimes it's not
362 site_url = self.url[:self.url.find(self.filename)]
363 site_line = '{name}_SITE = {url}'.format(name=self.mk_name,
365 site_line = site_line.rstrip('/') + '\n'
366 lines.append(site_line)
369 def __create_mk_setup(self):
371 Create the line refering to the setup method of the package of the
372 <package_name>.mk file
374 There are two things you can use to make an installer
375 for a python package: distutils or setuptools
376 distutils comes with python but does not support dependencies.
377 distutils is mostly still there for backward support.
378 setuptools is what smart people use,
379 but it is not shipped with python :(
382 setup_type_line = '{name}_SETUP_TYPE = {method}\n'.format(
384 method=self.setup_metadata['method'])
385 lines.append(setup_type_line)
388 def __get_license_names(self, license_files):
390 Try to determine the related license name.
392 There are two possibilities. Either the scripts tries to
393 get license name from package's metadata or, if spdx_lookup
394 package is available, the script compares license files with
398 if liclookup is None:
400 'Apache Software License': 'Apache-2.0',
401 'BSD License': 'BSD',
402 'European Union Public Licence 1.0': 'EUPL-1.0',
403 'European Union Public Licence 1.1': 'EUPL-1.1',
404 "GNU General Public License": "GPL",
405 "GNU General Public License v2": "GPL-2.0",
406 "GNU General Public License v2 or later": "GPL-2.0+",
407 "GNU General Public License v3": "GPL-3.0",
408 "GNU General Public License v3 or later": "GPL-3.0+",
409 "GNU Lesser General Public License v2": "LGPL-2.1",
410 "GNU Lesser General Public License v2 or later": "LGPL-2.1+",
411 "GNU Lesser General Public License v3": "LGPL-3.0",
412 "GNU Lesser General Public License v3 or later": "LGPL-3.0+",
413 "GNU Library or Lesser General Public License": "LGPL-2.0",
414 "ISC License": "ISC",
415 "MIT License": "MIT",
416 "Mozilla Public License 1.0": "MPL-1.0",
417 "Mozilla Public License 1.1": "MPL-1.1",
418 "Mozilla Public License 2.0": "MPL-2.0",
419 "Zope Public License": "ZPL"
421 regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
422 classifiers_licenses = [regexp.sub(r"\1", lic)
423 for lic in self.metadata['info']['classifiers']
424 if regexp.match(lic)]
425 licenses = [license_dict[x] if x in license_dict else x for x in classifiers_licenses]
426 if not len(licenses):
427 print('WARNING: License has been set to "{license}". It is most'
428 ' likely wrong, please change it if need be'.format(
429 license=', '.join(licenses)))
430 licenses = [self.metadata['info']['license']]
431 license_line = '{name}_LICENSE = {license}\n'.format(
433 license=', '.join(licenses))
436 for license_file in license_files:
437 with open(license_file) as lic_file:
438 match = liclookup.match(lic_file.read())
439 if match is not None and match.confidence >= 90.0:
440 license_names.append(match.license.id)
442 if len(license_names) > 0:
443 license_line = ('{name}_LICENSE ='
446 names=', '.join(license_names)))
450 def __create_mk_license(self):
452 Create the lines referring to the package's license informations of the
453 <package_name>.mk file
455 The license's files are found by searching the package (case insensitive)
456 for files named license, license.txt etc. If more than one license file
457 is found, the user is asked to select which ones he wants to use.
461 filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
462 'COPYING', 'COPYING.TXT']
463 self.license_files = list(find_file_upper_case(filenames, self.tmp_extract))
465 lines.append(self.__get_license_names(self.license_files))
467 license_files = [license.replace(self.tmp_extract, '')[1:]
468 for license in self.license_files]
469 if len(license_files) > 0:
470 if len(license_files) > 1:
471 print('More than one file found for license:',
472 ', '.join(license_files))
473 license_files = [filename
474 for index, filename in enumerate(license_files)]
475 license_file_line = ('{name}_LICENSE_FILES ='
478 files=' '.join(license_files)))
479 lines.append(license_file_line)
481 print('WARNING: No license file found,'
482 ' please specify it manually afterwards')
483 license_file_line = '# No license file found\n'
487 def __create_mk_requirements(self):
489 Create the lines referring to the dependencies of the of the
490 <package_name>.mk file
493 pkg_name -- name of the package
494 pkg_req -- dependencies of the package
497 dependencies_line = ('{name}_DEPENDENCIES ='
500 reqs=' '.join(self.pkg_req)))
501 lines.append(dependencies_line)
504 def create_package_mk(self):
506 Create the lines corresponding to the <package_name>.mk file
508 pkg_mk = '{name}.mk'.format(name=self.buildroot_name)
509 path_to_mk = os.path.join(self.pkg_dir, pkg_mk)
510 print('Creating {file}...'.format(file=path_to_mk))
511 lines = self.__create_mk_header()
512 lines += self.__create_mk_download_info()
513 lines += self.__create_mk_setup()
514 lines += self.__create_mk_license()
517 lines.append('$(eval $(python-package))')
519 with open(path_to_mk, 'w') as mk_file:
520 mk_file.writelines(lines)
522 def create_hash_file(self):
524 Create the lines corresponding to the <package_name>.hash files
526 pkg_hash = '{name}.hash'.format(name=self.buildroot_name)
527 path_to_hash = os.path.join(self.pkg_dir, pkg_hash)
528 print('Creating {filename}...'.format(filename=path_to_hash))
530 if self.used_url['md5_digest']:
531 md5_comment = '# md5 from {url}, sha256 locally computed\n'.format(
532 url=self.metadata_url)
533 lines.append(md5_comment)
534 hash_line = '{method}\t{digest} {filename}\n'.format(
536 digest=self.used_url['md5_digest'],
537 filename=self.filename)
538 lines.append(hash_line)
539 digest = hashlib.sha256(self.as_string).hexdigest()
540 hash_line = '{method}\t{digest} {filename}\n'.format(
543 filename=self.filename)
544 lines.append(hash_line)
546 for license_file in self.license_files:
547 sha256 = hashlib.sha256()
548 with open(license_file, 'rb') as lic_f:
550 data = lic_f.read(BUF_SIZE)
554 hash_line = '{method}\t{digest} {filename}\n'.format(
556 digest=sha256.hexdigest(),
557 filename=license_file.replace(self.tmp_extract, '')[1:])
558 lines.append(hash_line)
560 with open(path_to_hash, 'w') as hash_file:
561 hash_file.writelines(lines)
563 def create_config_in(self):
565 Creates the Config.in file of a package
567 path_to_config = os.path.join(self.pkg_dir, 'Config.in')
568 print('Creating {file}...'.format(file=path_to_config))
570 config_line = 'config BR2_PACKAGE_{name}\n'.format(
572 lines.append(config_line)
574 bool_line = '\tbool "{name}"\n'.format(name=self.buildroot_name)
575 lines.append(bool_line)
577 for dep in self.pkg_req:
578 dep_line = '\tselect BR2_PACKAGE_{req} # runtime\n'.format(
579 req=dep.upper().replace('-', '_'))
580 lines.append(dep_line)
582 lines.append('\thelp\n')
584 help_lines = textwrap.wrap(self.metadata['info']['summary'],
585 initial_indent='\t ',
586 subsequent_indent='\t ')
588 # make sure a help text is terminated with a full stop
589 if help_lines[-1][-1] != '.':
590 help_lines[-1] += '.'
592 # \t + two spaces is 3 char long
593 help_lines.append('')
594 help_lines.append('\t ' + self.metadata['info']['home_page'])
595 help_lines = [x + '\n' for x in help_lines]
598 with open(path_to_config, 'w') as config_file:
599 config_file.writelines(lines)
603 # Building the parser
604 parser = argparse.ArgumentParser(
605 description="Creates buildroot packages from the metadata of "
606 "an existing PyPI packages and include it "
608 parser.add_argument("packages",
609 help="list of packages to be created",
611 parser.add_argument("-o", "--output",
613 Output directory for packages.
618 args = parser.parse_args()
619 packages = list(set(args.packages))
621 # tmp_path is where we'll extract the files later
622 tmp_prefix = 'scanpypi-'
623 pkg_folder = args.output
624 tmp_path = tempfile.mkdtemp(prefix=tmp_prefix)
626 for real_pkg_name in packages:
627 package = BuildrootPackage(real_pkg_name, pkg_folder)
628 print('buildroot package name for {}:'.format(package.real_name),
629 package.buildroot_name)
630 # First we download the package
631 # Most of the info we need can only be found inside the package
632 print('Package:', package.buildroot_name)
633 print('Fetching package', package.real_name)
635 package.fetch_package_info()
636 except (six.moves.urllib.error.URLError, six.moves.urllib.error.HTTPError):
638 if package.metadata_name.lower() == 'setuptools':
639 # setuptools imports itself, that does not work very well
640 # with the monkey path at the begining
641 print('Error: setuptools cannot be built using scanPyPI')
645 package.download_package()
646 except six.moves.urllib.error.HTTPError as error:
647 print('Error: {code} {reason}'.format(code=error.code,
648 reason=error.reason))
649 print('Error downloading package :', package.buildroot_name)
653 # extract the tarball
655 package.extract_package(tmp_path)
656 except (tarfile.ReadError, zipfile.BadZipfile):
657 print('Error extracting package {}'.format(package.real_name))
661 # Loading the package install info from the package
664 except ImportError as err:
665 if 'buildutils' in err.message:
666 print('This package needs buildutils')
670 except AttributeError as error:
671 print('Error: Could not install package {pkg}: {error}'.format(
672 pkg=package.real_name, error=error))
675 # Package requirement are an argument of the setup function
676 req_not_found = package.get_requirements(pkg_folder)
677 req_not_found = req_not_found.difference(packages)
679 packages += req_not_found
681 print('Added packages \'{pkgs}\' as dependencies of {pkg}'
682 .format(pkgs=", ".join(req_not_found),
683 pkg=package.buildroot_name))
684 print('Checking if package {name} already exists...'.format(
685 name=package.pkg_dir))
687 os.makedirs(package.pkg_dir)
688 except OSError as exception:
689 if exception.errno != errno.EEXIST:
690 print("ERROR: ", exception.message, file=sys.stderr)
692 print('Error: Package {name} already exists'
693 .format(name=package.pkg_dir))
695 'Do you want to delete existing package ? [y/N]')
696 if del_pkg.lower() == 'y':
697 shutil.rmtree(package.pkg_dir)
698 os.makedirs(package.pkg_dir)
701 package.create_package_mk()
703 package.create_hash_file()
705 package.create_config_in()
707 # printing an empty line for visual confort
709 shutil.rmtree(tmp_path)
712 if __name__ == "__main__":