4 Utility for building Buildroot packages for existing PyPI packages
6 Any package built by scanpypi should be manually checked for
9 from __future__ import print_function
10 from __future__ import absolute_import
13 import six.moves.urllib.request
14 import six.moves.urllib.error
15 import six.moves.urllib.parse
27 from functools import wraps
28 from six.moves import map
29 from six.moves import zip
30 from six.moves import input
39 import spdx_lookup as liclookup
41 # spdx_lookup is not installed
42 print('spdx_lookup module is not installed. This can lead to an '
43 'inaccurate licence detection. Please install it via\n'
44 'pip install spdx_lookup')
48 def setup_decorator(func, method):
50 Decorator for distutils.core.setup and setuptools.setup.
51 Puts the arguments with which setup is called as a dict
52 Add key 'method' which should be either 'setuptools' or 'distutils'.
55 func -- either setuptools.setup or distutils.core.setup
56 method -- either 'setuptools' or 'distutils'
60 def closure(*args, **kwargs):
61 # Any python packages calls its setup function to be installed.
62 # Argument 'name' of this setup function is the package's name
63 BuildrootPackage.setup_args[kwargs['name']] = kwargs
64 BuildrootPackage.setup_args[kwargs['name']]['method'] = method
68 import setuptools # noqa E402
69 setuptools.setup = setup_decorator(setuptools.setup, 'setuptools')
70 import distutils # noqa E402
71 distutils.core.setup = setup_decorator(setuptools.setup, 'distutils')
74 def find_file_upper_case(filenames, path='./'):
77 Recursively find files that matches one of the specified filenames.
78 Returns a relative path starting with path argument.
81 filenames -- List of filenames to be found
82 path -- Path to the directory to search
84 for root, dirs, files in os.walk(path):
86 if file.upper() in filenames:
87 yield (os.path.join(root, file))
90 def pkg_buildroot_name(pkg_name):
92 Returns the Buildroot package name for the PyPI package pkg_name.
93 Remove all non alphanumeric characters except -
94 Also lowers the name and adds 'python-' suffix
97 pkg_name -- String to rename
99 name = re.sub('[^\w-]', '', pkg_name.lower())
101 pattern = re.compile('^(?!' + prefix + ')(.+?)$')
102 name = pattern.sub(r'python-\1', name)
106 class DownloadFailed(Exception):
110 class BuildrootPackage():
111 """This class's methods are not meant to be used individually please
112 use them in the correct order:
133 def __init__(self, real_name, pkg_folder):
134 self.real_name = real_name
135 self.buildroot_name = pkg_buildroot_name(self.real_name)
136 self.pkg_dir = os.path.join(pkg_folder, self.buildroot_name)
137 self.mk_name = self.buildroot_name.upper().replace('-', '_')
138 self.as_string = None
141 self.metadata_name = None
142 self.metadata_url = None
144 self.setup_metadata = None
145 self.tmp_extract = None
150 self.license_files = []
152 def fetch_package_info(self):
154 Fetch a package's metadata from the python package index
156 self.metadata_url = 'https://pypi.python.org/pypi/{pkg}/json'.format(
159 pkg_json = six.moves.urllib.request.urlopen(self.metadata_url).read().decode()
160 except six.moves.urllib.error.HTTPError as error:
161 print('ERROR:', error.getcode(), error.msg, file=sys.stderr)
162 print('ERROR: Could not find package {pkg}.\n'
163 'Check syntax inside the python package index:\n'
164 'https://pypi.python.org/pypi/ '
165 .format(pkg=self.real_name))
167 except six.moves.urllib.error.URLError:
168 print('ERROR: Could not find package {pkg}.\n'
169 'Check syntax inside the python package index:\n'
170 'https://pypi.python.org/pypi/ '
171 .format(pkg=self.real_name))
173 self.metadata = json.loads(pkg_json)
174 self.version = self.metadata['info']['version']
175 self.metadata_name = self.metadata['info']['name']
177 def download_package(self):
179 Download a package using metadata from pypi
182 self.metadata['urls'][0]['filename']
185 'Non-conventional package, ',
186 'please check carefully after creation')
187 self.metadata['urls'] = [{
188 'packagetype': 'sdist',
189 'url': self.metadata['info']['download_url'],
191 # In this case, we can't get the name of the downloaded file
192 # from the pypi api, so we need to find it, this should work
193 urlpath = six.moves.urllib.parse.urlparse(
194 self.metadata['info']['download_url']).path
195 # urlparse().path give something like
196 # /path/to/file-version.tar.gz
197 # We use basename to remove /path/to
198 self.metadata['urls'][0]['filename'] = os.path.basename(urlpath)
199 for download_url in self.metadata['urls']:
200 if 'bdist' in download_url['packagetype']:
203 print('Downloading package {pkg} from {url}...'.format(
204 pkg=self.real_name, url=download_url['url']))
205 download = six.moves.urllib.request.urlopen(download_url['url'])
206 except six.moves.urllib.error.HTTPError as http_error:
207 download = http_error
209 self.used_url = download_url
210 self.as_string = download.read()
211 if not download_url['md5_digest']:
213 self.md5_sum = hashlib.md5(self.as_string).hexdigest()
214 if self.md5_sum == download_url['md5_digest']:
217 if download.__class__ == six.moves.urllib.error.HTTPError:
219 raise DownloadFailed('Failed to download package {pkg}'
220 .format(pkg=self.real_name))
221 self.filename = self.used_url['filename']
222 self.url = self.used_url['url']
224 def extract_package(self, tmp_path):
226 Extract the package contents into a directrory
229 tmp_path -- directory where you want the package to be extracted
232 as_file = StringIO.StringIO(self.as_string)
234 as_file = io.BytesIO(self.as_string)
235 if self.filename[-3:] == 'zip':
236 with zipfile.ZipFile(as_file) as as_zipfile:
237 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
240 except OSError as exception:
241 if exception.errno != errno.EEXIST:
242 print("ERROR: ", exception.message, file=sys.stderr)
244 print('WARNING:', exception.message, file=sys.stderr)
245 print('Removing {pkg}...'.format(pkg=tmp_pkg))
246 shutil.rmtree(tmp_pkg)
248 as_zipfile.extractall(tmp_pkg)
250 with tarfile.open(fileobj=as_file) as as_tarfile:
251 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
254 except OSError as exception:
255 if exception.errno != errno.EEXIST:
256 print("ERROR: ", exception.message, file=sys.stderr)
258 print('WARNING:', exception.message, file=sys.stderr)
259 print('Removing {pkg}...'.format(pkg=tmp_pkg))
260 shutil.rmtree(tmp_pkg)
262 as_tarfile.extractall(tmp_pkg)
264 tmp_extract = '{folder}/{name}-{version}'
265 self.tmp_extract = tmp_extract.format(
267 name=self.metadata_name,
268 version=self.version)
270 def load_setup(self):
272 Loads the corresponding setup and store its metadata
274 current_dir = os.getcwd()
275 os.chdir(self.tmp_extract)
276 sys.path.append(self.tmp_extract)
277 s_file, s_path, s_desc = imp.find_module('setup', [self.tmp_extract])
278 setup = imp.load_module('setup', s_file, s_path, s_desc)
280 self.setup_metadata = self.setup_args[self.metadata_name]
282 # This means setup was not called which most likely mean that it is
283 # called through the if __name__ == '__main__' directive.
284 # In this case, we can only pray that it is called through a
285 # function called main() in setup.py.
286 setup.main() # Will raise AttributeError if not found
287 self.setup_metadata = self.setup_args[self.metadata_name]
288 # Here we must remove the module the hard way.
289 # We must do this because of a very specific case: if a package calls
290 # setup from the __main__ but does not come with a 'main()' function,
291 # for some reason setup.main() will successfully call the main
292 # function of a previous package...
293 sys.modules.pop('setup', None)
295 os.chdir(current_dir)
296 sys.path.remove(self.tmp_extract)
298 def get_requirements(self, pkg_folder):
300 Retrieve dependencies from the metadata found in the setup.py script of
304 pkg_folder -- location of the already created packages
306 if 'install_requires' not in self.setup_metadata:
309 self.pkg_req = self.setup_metadata['install_requires']
310 self.pkg_req = [re.sub('([-.\w]+).*', r'\1', req)
311 for req in self.pkg_req]
313 # get rid of commented lines and also strip the package strings
314 self.pkg_req = [item.strip() for item in self.pkg_req
315 if len(item) > 0 and item[0] != '#']
317 req_not_found = self.pkg_req
318 self.pkg_req = list(map(pkg_buildroot_name, self.pkg_req))
319 pkg_tuples = list(zip(req_not_found, self.pkg_req))
320 # pkg_tuples is a list of tuples that looks like
321 # ('werkzeug','python-werkzeug') because I need both when checking if
322 # dependencies already exist or are already in the download list
324 pkg[0] for pkg in pkg_tuples
325 if not os.path.isdir(pkg[1])
329 def __create_mk_header(self):
331 Create the header of the <package_name>.mk file
333 header = ['#' * 80 + '\n']
335 header.append('# {name}\n'.format(name=self.buildroot_name))
337 header.append('#' * 80 + '\n')
341 def __create_mk_download_info(self):
343 Create the lines refering to the download information of the
344 <package_name>.mk file
347 version_line = '{name}_VERSION = {version}\n'.format(
349 version=self.version)
350 lines.append(version_line)
352 targz = self.filename.replace(
354 '$({name}_VERSION)'.format(name=self.mk_name))
355 targz_line = '{name}_SOURCE = {filename}\n'.format(
358 lines.append(targz_line)
360 if self.filename not in self.url:
361 # Sometimes the filename is in the url, sometimes it's not
364 site_url = self.url[:self.url.find(self.filename)]
365 site_line = '{name}_SITE = {url}'.format(name=self.mk_name,
367 site_line = site_line.rstrip('/') + '\n'
368 lines.append(site_line)
371 def __create_mk_setup(self):
373 Create the line refering to the setup method of the package of the
374 <package_name>.mk file
376 There are two things you can use to make an installer
377 for a python package: distutils or setuptools
378 distutils comes with python but does not support dependencies.
379 distutils is mostly still there for backward support.
380 setuptools is what smart people use,
381 but it is not shipped with python :(
384 setup_type_line = '{name}_SETUP_TYPE = {method}\n'.format(
386 method=self.setup_metadata['method'])
387 lines.append(setup_type_line)
390 def __get_license_names(self, license_files):
392 Try to determine the related license name.
394 There are two possibilities. Either the scripts tries to
395 get license name from package's metadata or, if spdx_lookup
396 package is available, the script compares license files with
400 if liclookup is None:
402 'Apache Software License': 'Apache-2.0',
403 'BSD License': 'BSD',
404 'European Union Public Licence 1.0': 'EUPL-1.0',
405 'European Union Public Licence 1.1': 'EUPL-1.1',
406 "GNU General Public License": "GPL",
407 "GNU General Public License v2": "GPL-2.0",
408 "GNU General Public License v2 or later": "GPL-2.0+",
409 "GNU General Public License v3": "GPL-3.0",
410 "GNU General Public License v3 or later": "GPL-3.0+",
411 "GNU Lesser General Public License v2": "LGPL-2.1",
412 "GNU Lesser General Public License v2 or later": "LGPL-2.1+",
413 "GNU Lesser General Public License v3": "LGPL-3.0",
414 "GNU Lesser General Public License v3 or later": "LGPL-3.0+",
415 "GNU Library or Lesser General Public License": "LGPL-2.0",
416 "ISC License": "ISC",
417 "MIT License": "MIT",
418 "Mozilla Public License 1.0": "MPL-1.0",
419 "Mozilla Public License 1.1": "MPL-1.1",
420 "Mozilla Public License 2.0": "MPL-2.0",
421 "Zope Public License": "ZPL"
423 regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
424 classifiers_licenses = [regexp.sub(r"\1", lic)
425 for lic in self.metadata['info']['classifiers']
426 if regexp.match(lic)]
427 licenses = [license_dict[x] if x in license_dict else x for x in classifiers_licenses]
428 if not len(licenses):
429 print('WARNING: License has been set to "{license}". It is most'
430 ' likely wrong, please change it if need be'.format(
431 license=', '.join(licenses)))
432 licenses = [self.metadata['info']['license']]
433 license_line = '{name}_LICENSE = {license}\n'.format(
435 license=', '.join(licenses))
438 for license_file in license_files:
439 with open(license_file) as lic_file:
440 match = liclookup.match(lic_file.read())
441 if match is not None and match.confidence >= 90.0:
442 license_names.append(match.license.id)
444 if len(license_names) > 0:
445 license_line = ('{name}_LICENSE ='
448 names=', '.join(license_names)))
452 def __create_mk_license(self):
454 Create the lines referring to the package's license informations of the
455 <package_name>.mk file
457 The license's files are found by searching the package (case insensitive)
458 for files named license, license.txt etc. If more than one license file
459 is found, the user is asked to select which ones he wants to use.
463 filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
464 'COPYING', 'COPYING.TXT']
465 self.license_files = list(find_file_upper_case(filenames, self.tmp_extract))
467 lines.append(self.__get_license_names(self.license_files))
469 license_files = [license.replace(self.tmp_extract, '')[1:]
470 for license in self.license_files]
471 if len(license_files) > 0:
472 if len(license_files) > 1:
473 print('More than one file found for license:',
474 ', '.join(license_files))
475 license_files = [filename
476 for index, filename in enumerate(license_files)]
477 license_file_line = ('{name}_LICENSE_FILES ='
480 files=' '.join(license_files)))
481 lines.append(license_file_line)
483 print('WARNING: No license file found,'
484 ' please specify it manually afterwards')
485 license_file_line = '# No license file found\n'
489 def __create_mk_requirements(self):
491 Create the lines referring to the dependencies of the of the
492 <package_name>.mk file
495 pkg_name -- name of the package
496 pkg_req -- dependencies of the package
499 dependencies_line = ('{name}_DEPENDENCIES ='
502 reqs=' '.join(self.pkg_req)))
503 lines.append(dependencies_line)
506 def create_package_mk(self):
508 Create the lines corresponding to the <package_name>.mk file
510 pkg_mk = '{name}.mk'.format(name=self.buildroot_name)
511 path_to_mk = os.path.join(self.pkg_dir, pkg_mk)
512 print('Creating {file}...'.format(file=path_to_mk))
513 lines = self.__create_mk_header()
514 lines += self.__create_mk_download_info()
515 lines += self.__create_mk_setup()
516 lines += self.__create_mk_license()
519 lines.append('$(eval $(python-package))')
521 with open(path_to_mk, 'w') as mk_file:
522 mk_file.writelines(lines)
524 def create_hash_file(self):
526 Create the lines corresponding to the <package_name>.hash files
528 pkg_hash = '{name}.hash'.format(name=self.buildroot_name)
529 path_to_hash = os.path.join(self.pkg_dir, pkg_hash)
530 print('Creating {filename}...'.format(filename=path_to_hash))
532 if self.used_url['md5_digest']:
533 md5_comment = '# md5 from {url}, sha256 locally computed\n'.format(
534 url=self.metadata_url)
535 lines.append(md5_comment)
536 hash_line = '{method}\t{digest} {filename}\n'.format(
538 digest=self.used_url['md5_digest'],
539 filename=self.filename)
540 lines.append(hash_line)
541 digest = hashlib.sha256(self.as_string).hexdigest()
542 hash_line = '{method}\t{digest} {filename}\n'.format(
545 filename=self.filename)
546 lines.append(hash_line)
548 for license_file in self.license_files:
549 sha256 = hashlib.sha256()
550 with open(license_file, 'rb') as lic_f:
552 data = lic_f.read(BUF_SIZE)
556 hash_line = '{method}\t{digest} {filename}\n'.format(
558 digest=sha256.hexdigest(),
559 filename=license_file.replace(self.tmp_extract, '')[1:])
560 lines.append(hash_line)
562 with open(path_to_hash, 'w') as hash_file:
563 hash_file.writelines(lines)
565 def create_config_in(self):
567 Creates the Config.in file of a package
569 path_to_config = os.path.join(self.pkg_dir, 'Config.in')
570 print('Creating {file}...'.format(file=path_to_config))
572 config_line = 'config BR2_PACKAGE_{name}\n'.format(
574 lines.append(config_line)
576 bool_line = '\tbool "{name}"\n'.format(name=self.buildroot_name)
577 lines.append(bool_line)
579 for dep in self.pkg_req:
580 dep_line = '\tselect BR2_PACKAGE_{req} # runtime\n'.format(
581 req=dep.upper().replace('-', '_'))
582 lines.append(dep_line)
584 lines.append('\thelp\n')
586 help_lines = textwrap.wrap(self.metadata['info']['summary'],
587 initial_indent='\t ',
588 subsequent_indent='\t ')
590 # make sure a help text is terminated with a full stop
591 if help_lines[-1][-1] != '.':
592 help_lines[-1] += '.'
594 # \t + two spaces is 3 char long
595 help_lines.append('')
596 help_lines.append('\t ' + self.metadata['info']['home_page'])
597 help_lines = [x + '\n' for x in help_lines]
600 with open(path_to_config, 'w') as config_file:
601 config_file.writelines(lines)
605 # Building the parser
606 parser = argparse.ArgumentParser(
607 description="Creates buildroot packages from the metadata of "
608 "an existing PyPI packages and include it "
610 parser.add_argument("packages",
611 help="list of packages to be created",
613 parser.add_argument("-o", "--output",
615 Output directory for packages.
620 args = parser.parse_args()
621 packages = list(set(args.packages))
623 # tmp_path is where we'll extract the files later
624 tmp_prefix = 'scanpypi-'
625 pkg_folder = args.output
626 tmp_path = tempfile.mkdtemp(prefix=tmp_prefix)
628 for real_pkg_name in packages:
629 package = BuildrootPackage(real_pkg_name, pkg_folder)
630 print('buildroot package name for {}:'.format(package.real_name),
631 package.buildroot_name)
632 # First we download the package
633 # Most of the info we need can only be found inside the package
634 print('Package:', package.buildroot_name)
635 print('Fetching package', package.real_name)
637 package.fetch_package_info()
638 except (six.moves.urllib.error.URLError, six.moves.urllib.error.HTTPError):
640 if package.metadata_name.lower() == 'setuptools':
641 # setuptools imports itself, that does not work very well
642 # with the monkey path at the begining
643 print('Error: setuptools cannot be built using scanPyPI')
647 package.download_package()
648 except six.moves.urllib.error.HTTPError as error:
649 print('Error: {code} {reason}'.format(code=error.code,
650 reason=error.reason))
651 print('Error downloading package :', package.buildroot_name)
655 # extract the tarball
657 package.extract_package(tmp_path)
658 except (tarfile.ReadError, zipfile.BadZipfile):
659 print('Error extracting package {}'.format(package.real_name))
663 # Loading the package install info from the package
666 except ImportError as err:
667 if 'buildutils' in err.message:
668 print('This package needs buildutils')
672 except AttributeError as error:
673 print('Error: Could not install package {pkg}: {error}'.format(
674 pkg=package.real_name, error=error))
677 # Package requirement are an argument of the setup function
678 req_not_found = package.get_requirements(pkg_folder)
679 req_not_found = req_not_found.difference(packages)
681 packages += req_not_found
683 print('Added packages \'{pkgs}\' as dependencies of {pkg}'
684 .format(pkgs=", ".join(req_not_found),
685 pkg=package.buildroot_name))
686 print('Checking if package {name} already exists...'.format(
687 name=package.pkg_dir))
689 os.makedirs(package.pkg_dir)
690 except OSError as exception:
691 if exception.errno != errno.EEXIST:
692 print("ERROR: ", exception.message, file=sys.stderr)
694 print('Error: Package {name} already exists'
695 .format(name=package.pkg_dir))
697 'Do you want to delete existing package ? [y/N]')
698 if del_pkg.lower() == 'y':
699 shutil.rmtree(package.pkg_dir)
700 os.makedirs(package.pkg_dir)
703 package.create_package_mk()
705 package.create_hash_file()
707 package.create_config_in()
709 # printing an empty line for visual confort
711 shutil.rmtree(tmp_path)
714 if __name__ == "__main__":