4 Utility for building Buildroot packages for existing PyPI packages
6 Any package built by scanpypi should be manually checked for
9 from __future__ import print_function
10 from __future__ import absolute_import
13 import six.moves.urllib.request
14 import six.moves.urllib.error
15 import six.moves.urllib.parse
27 from functools import wraps
28 from six.moves import map
29 from six.moves import zip
30 from six.moves import input
39 import spdx_lookup as liclookup
41 # spdx_lookup is not installed
42 print('spdx_lookup module is not installed. This can lead to an '
43 'inaccurate licence detection. Please install it via\n'
44 'pip install spdx_lookup')
48 def setup_decorator(func, method):
50 Decorator for distutils.core.setup and setuptools.setup.
51 Puts the arguments with which setup is called as a dict
52 Add key 'method' which should be either 'setuptools' or 'distutils'.
55 func -- either setuptools.setup or distutils.core.setup
56 method -- either 'setuptools' or 'distutils'
60 def closure(*args, **kwargs):
61 # Any python packages calls its setup function to be installed.
62 # Argument 'name' of this setup function is the package's name
63 BuildrootPackage.setup_args[kwargs['name']] = kwargs
64 BuildrootPackage.setup_args[kwargs['name']]['method'] = method
68 import setuptools # noqa E402
69 setuptools.setup = setup_decorator(setuptools.setup, 'setuptools')
70 import distutils # noqa E402
71 distutils.core.setup = setup_decorator(setuptools.setup, 'distutils')
74 def find_file_upper_case(filenames, path='./'):
77 Recursively find files that matches one of the specified filenames.
78 Returns a relative path starting with path argument.
81 filenames -- List of filenames to be found
82 path -- Path to the directory to search
84 for root, dirs, files in os.walk(path):
86 if file.upper() in filenames:
87 yield (os.path.join(root, file))
90 def pkg_buildroot_name(pkg_name):
92 Returns the Buildroot package name for the PyPI package pkg_name.
93 Remove all non alphanumeric characters except -
94 Also lowers the name and adds 'python-' suffix
97 pkg_name -- String to rename
99 name = re.sub('[^\w-]', '', pkg_name.lower())
101 pattern = re.compile('^(?!' + prefix + ')(.+?)$')
102 name = pattern.sub(r'python-\1', name)
106 class DownloadFailed(Exception):
110 class BuildrootPackage():
111 """This class's methods are not meant to be used individually please
112 use them in the correct order:
133 def __init__(self, real_name, pkg_folder):
134 self.real_name = real_name
135 self.buildroot_name = pkg_buildroot_name(self.real_name)
136 self.pkg_dir = os.path.join(pkg_folder, self.buildroot_name)
137 self.mk_name = self.buildroot_name.upper().replace('-', '_')
138 self.as_string = None
141 self.metadata_name = None
142 self.metadata_url = None
144 self.setup_metadata = None
145 self.tmp_extract = None
150 self.license_files = []
152 def fetch_package_info(self):
154 Fetch a package's metadata from the python package index
156 self.metadata_url = 'https://pypi.org/pypi/{pkg}/json'.format(
159 pkg_json = six.moves.urllib.request.urlopen(self.metadata_url).read().decode()
160 except six.moves.urllib.error.HTTPError as error:
161 print('ERROR:', error.getcode(), error.msg, file=sys.stderr)
162 print('ERROR: Could not find package {pkg}.\n'
163 'Check syntax inside the python package index:\n'
164 'https://pypi.python.org/pypi/ '
165 .format(pkg=self.real_name))
167 except six.moves.urllib.error.URLError:
168 print('ERROR: Could not find package {pkg}.\n'
169 'Check syntax inside the python package index:\n'
170 'https://pypi.python.org/pypi/ '
171 .format(pkg=self.real_name))
173 self.metadata = json.loads(pkg_json)
174 self.version = self.metadata['info']['version']
175 self.metadata_name = self.metadata['info']['name']
177 def download_package(self):
179 Download a package using metadata from pypi
182 self.metadata['urls'][0]['filename']
185 'Non-conventional package, ',
186 'please check carefully after creation')
187 self.metadata['urls'] = [{
188 'packagetype': 'sdist',
189 'url': self.metadata['info']['download_url'],
191 # In this case, we can't get the name of the downloaded file
192 # from the pypi api, so we need to find it, this should work
193 urlpath = six.moves.urllib.parse.urlparse(
194 self.metadata['info']['download_url']).path
195 # urlparse().path give something like
196 # /path/to/file-version.tar.gz
197 # We use basename to remove /path/to
198 self.metadata['urls'][0]['filename'] = os.path.basename(urlpath)
199 for download_url in self.metadata['urls']:
200 if 'bdist' in download_url['packagetype']:
203 print('Downloading package {pkg} from {url}...'.format(
204 pkg=self.real_name, url=download_url['url']))
205 download = six.moves.urllib.request.urlopen(download_url['url'])
206 except six.moves.urllib.error.HTTPError as http_error:
207 download = http_error
209 self.used_url = download_url
210 self.as_string = download.read()
211 if not download_url['digests']['md5']:
213 self.md5_sum = hashlib.md5(self.as_string).hexdigest()
214 if self.md5_sum == download_url['digests']['md5']:
217 if download.__class__ == six.moves.urllib.error.HTTPError:
219 raise DownloadFailed('Failed to download package {pkg}'
220 .format(pkg=self.real_name))
221 self.filename = self.used_url['filename']
222 self.url = self.used_url['url']
224 def extract_package(self, tmp_path):
226 Extract the package contents into a directrory
229 tmp_path -- directory where you want the package to be extracted
232 as_file = StringIO.StringIO(self.as_string)
234 as_file = io.BytesIO(self.as_string)
235 if self.filename[-3:] == 'zip':
236 with zipfile.ZipFile(as_file) as as_zipfile:
237 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
240 except OSError as exception:
241 if exception.errno != errno.EEXIST:
242 print("ERROR: ", exception.message, file=sys.stderr)
244 print('WARNING:', exception.message, file=sys.stderr)
245 print('Removing {pkg}...'.format(pkg=tmp_pkg))
246 shutil.rmtree(tmp_pkg)
248 as_zipfile.extractall(tmp_pkg)
250 with tarfile.open(fileobj=as_file) as as_tarfile:
251 tmp_pkg = os.path.join(tmp_path, self.buildroot_name)
254 except OSError as exception:
255 if exception.errno != errno.EEXIST:
256 print("ERROR: ", exception.message, file=sys.stderr)
258 print('WARNING:', exception.message, file=sys.stderr)
259 print('Removing {pkg}...'.format(pkg=tmp_pkg))
260 shutil.rmtree(tmp_pkg)
262 as_tarfile.extractall(tmp_pkg)
264 tmp_extract = '{folder}/{name}-{version}'
265 self.tmp_extract = tmp_extract.format(
267 name=self.metadata_name,
268 version=self.version)
270 def load_setup(self):
272 Loads the corresponding setup and store its metadata
274 current_dir = os.getcwd()
275 os.chdir(self.tmp_extract)
276 sys.path.append(self.tmp_extract)
277 s_file, s_path, s_desc = imp.find_module('setup', [self.tmp_extract])
278 setup = imp.load_module('setup', s_file, s_path, s_desc)
280 self.setup_metadata = self.setup_args[self.metadata_name]
282 # This means setup was not called which most likely mean that it is
283 # called through the if __name__ == '__main__' directive.
284 # In this case, we can only pray that it is called through a
285 # function called main() in setup.py.
286 setup.main() # Will raise AttributeError if not found
287 self.setup_metadata = self.setup_args[self.metadata_name]
288 # Here we must remove the module the hard way.
289 # We must do this because of a very specific case: if a package calls
290 # setup from the __main__ but does not come with a 'main()' function,
291 # for some reason setup.main() will successfully call the main
292 # function of a previous package...
293 sys.modules.pop('setup', None)
295 os.chdir(current_dir)
296 sys.path.remove(self.tmp_extract)
298 def get_requirements(self, pkg_folder):
300 Retrieve dependencies from the metadata found in the setup.py script of
304 pkg_folder -- location of the already created packages
306 if 'install_requires' not in self.setup_metadata:
309 self.pkg_req = self.setup_metadata['install_requires']
310 self.pkg_req = [re.sub('([-.\w]+).*', r'\1', req)
311 for req in self.pkg_req]
313 # get rid of commented lines and also strip the package strings
314 self.pkg_req = [item.strip() for item in self.pkg_req
315 if len(item) > 0 and item[0] != '#']
317 req_not_found = self.pkg_req
318 self.pkg_req = list(map(pkg_buildroot_name, self.pkg_req))
319 pkg_tuples = list(zip(req_not_found, self.pkg_req))
320 # pkg_tuples is a list of tuples that looks like
321 # ('werkzeug','python-werkzeug') because I need both when checking if
322 # dependencies already exist or are already in the download list
324 pkg[0] for pkg in pkg_tuples
325 if not os.path.isdir(pkg[1])
329 def __create_mk_header(self):
331 Create the header of the <package_name>.mk file
333 header = ['#' * 80 + '\n']
335 header.append('# {name}\n'.format(name=self.buildroot_name))
337 header.append('#' * 80 + '\n')
341 def __create_mk_download_info(self):
343 Create the lines refering to the download information of the
344 <package_name>.mk file
347 version_line = '{name}_VERSION = {version}\n'.format(
349 version=self.version)
350 lines.append(version_line)
352 targz = self.filename.replace(
354 '$({name}_VERSION)'.format(name=self.mk_name))
355 targz_line = '{name}_SOURCE = {filename}\n'.format(
358 lines.append(targz_line)
360 if self.filename not in self.url:
361 # Sometimes the filename is in the url, sometimes it's not
364 site_url = self.url[:self.url.find(self.filename)]
365 site_line = '{name}_SITE = {url}'.format(name=self.mk_name,
367 site_line = site_line.rstrip('/') + '\n'
368 lines.append(site_line)
371 def __create_mk_setup(self):
373 Create the line refering to the setup method of the package of the
374 <package_name>.mk file
376 There are two things you can use to make an installer
377 for a python package: distutils or setuptools
378 distutils comes with python but does not support dependencies.
379 distutils is mostly still there for backward support.
380 setuptools is what smart people use,
381 but it is not shipped with python :(
384 setup_type_line = '{name}_SETUP_TYPE = {method}\n'.format(
386 method=self.setup_metadata['method'])
387 lines.append(setup_type_line)
390 def __get_license_names(self, license_files):
392 Try to determine the related license name.
394 There are two possibilities. Either the scripts tries to
395 get license name from package's metadata or, if spdx_lookup
396 package is available, the script compares license files with
400 if liclookup is None:
402 'Apache Software License': 'Apache-2.0',
403 'BSD License': 'BSD',
404 'European Union Public Licence 1.0': 'EUPL-1.0',
405 'European Union Public Licence 1.1': 'EUPL-1.1',
406 "GNU General Public License": "GPL",
407 "GNU General Public License v2": "GPL-2.0",
408 "GNU General Public License v2 or later": "GPL-2.0+",
409 "GNU General Public License v3": "GPL-3.0",
410 "GNU General Public License v3 or later": "GPL-3.0+",
411 "GNU Lesser General Public License v2": "LGPL-2.1",
412 "GNU Lesser General Public License v2 or later": "LGPL-2.1+",
413 "GNU Lesser General Public License v3": "LGPL-3.0",
414 "GNU Lesser General Public License v3 or later": "LGPL-3.0+",
415 "GNU Library or Lesser General Public License": "LGPL-2.0",
416 "ISC License": "ISC",
417 "MIT License": "MIT",
418 "Mozilla Public License 1.0": "MPL-1.0",
419 "Mozilla Public License 1.1": "MPL-1.1",
420 "Mozilla Public License 2.0": "MPL-2.0",
421 "Zope Public License": "ZPL"
423 regexp = re.compile('^License :* *.* *:+ (.*)( \(.*\))?$')
424 classifiers_licenses = [regexp.sub(r"\1", lic)
425 for lic in self.metadata['info']['classifiers']
426 if regexp.match(lic)]
427 licenses = [license_dict[x] if x in license_dict else x for x in classifiers_licenses]
428 if not len(licenses):
429 print('WARNING: License has been set to "{license}". It is most'
430 ' likely wrong, please change it if need be'.format(
431 license=', '.join(licenses)))
432 licenses = [self.metadata['info']['license']]
433 license_line = '{name}_LICENSE = {license}\n'.format(
435 license=', '.join(licenses))
438 for license_file in license_files:
439 with open(license_file) as lic_file:
440 match = liclookup.match(lic_file.read())
441 if match is not None and match.confidence >= 90.0:
442 license_names.append(match.license.id)
444 if len(license_names) > 0:
445 license_line = ('{name}_LICENSE ='
448 names=', '.join(license_names)))
452 def __create_mk_license(self):
454 Create the lines referring to the package's license informations of the
455 <package_name>.mk file
457 The license's files are found by searching the package (case insensitive)
458 for files named license, license.txt etc. If more than one license file
459 is found, the user is asked to select which ones he wants to use.
463 filenames = ['LICENCE', 'LICENSE', 'LICENSE.RST', 'LICENSE.TXT',
464 'COPYING', 'COPYING.TXT']
465 self.license_files = list(find_file_upper_case(filenames, self.tmp_extract))
467 lines.append(self.__get_license_names(self.license_files))
469 license_files = [license.replace(self.tmp_extract, '')[1:]
470 for license in self.license_files]
471 if len(license_files) > 0:
472 if len(license_files) > 1:
473 print('More than one file found for license:',
474 ', '.join(license_files))
475 license_files = [filename
476 for index, filename in enumerate(license_files)]
477 license_file_line = ('{name}_LICENSE_FILES ='
480 files=' '.join(license_files)))
481 lines.append(license_file_line)
483 print('WARNING: No license file found,'
484 ' please specify it manually afterwards')
485 license_file_line = '# No license file found\n'
489 def __create_mk_requirements(self):
491 Create the lines referring to the dependencies of the of the
492 <package_name>.mk file
495 pkg_name -- name of the package
496 pkg_req -- dependencies of the package
499 dependencies_line = ('{name}_DEPENDENCIES ='
502 reqs=' '.join(self.pkg_req)))
503 lines.append(dependencies_line)
506 def create_package_mk(self):
508 Create the lines corresponding to the <package_name>.mk file
510 pkg_mk = '{name}.mk'.format(name=self.buildroot_name)
511 path_to_mk = os.path.join(self.pkg_dir, pkg_mk)
512 print('Creating {file}...'.format(file=path_to_mk))
513 lines = self.__create_mk_header()
514 lines += self.__create_mk_download_info()
515 lines += self.__create_mk_setup()
516 lines += self.__create_mk_license()
519 lines.append('$(eval $(python-package))')
521 with open(path_to_mk, 'w') as mk_file:
522 mk_file.writelines(lines)
524 def create_hash_file(self):
526 Create the lines corresponding to the <package_name>.hash files
528 pkg_hash = '{name}.hash'.format(name=self.buildroot_name)
529 path_to_hash = os.path.join(self.pkg_dir, pkg_hash)
530 print('Creating {filename}...'.format(filename=path_to_hash))
532 if self.used_url['digests']['md5'] and self.used_url['digests']['sha256']:
533 hash_header = '# md5, sha256 from {url}\n'.format(
534 url=self.metadata_url)
535 lines.append(hash_header)
536 hash_line = '{method}\t{digest} {filename}\n'.format(
538 digest=self.used_url['digests']['md5'],
539 filename=self.filename)
540 lines.append(hash_line)
541 hash_line = '{method}\t{digest} {filename}\n'.format(
543 digest=self.used_url['digests']['sha256'],
544 filename=self.filename)
545 lines.append(hash_line)
547 if self.license_files:
548 lines.append('# Locally computed sha256 checksums\n')
549 for license_file in self.license_files:
550 sha256 = hashlib.sha256()
551 with open(license_file, 'rb') as lic_f:
553 data = lic_f.read(BUF_SIZE)
557 hash_line = '{method}\t{digest} {filename}\n'.format(
559 digest=sha256.hexdigest(),
560 filename=license_file.replace(self.tmp_extract, '')[1:])
561 lines.append(hash_line)
563 with open(path_to_hash, 'w') as hash_file:
564 hash_file.writelines(lines)
566 def create_config_in(self):
568 Creates the Config.in file of a package
570 path_to_config = os.path.join(self.pkg_dir, 'Config.in')
571 print('Creating {file}...'.format(file=path_to_config))
573 config_line = 'config BR2_PACKAGE_{name}\n'.format(
575 lines.append(config_line)
577 bool_line = '\tbool "{name}"\n'.format(name=self.buildroot_name)
578 lines.append(bool_line)
580 for dep in self.pkg_req:
581 dep_line = '\tselect BR2_PACKAGE_{req} # runtime\n'.format(
582 req=dep.upper().replace('-', '_'))
583 lines.append(dep_line)
585 lines.append('\thelp\n')
587 help_lines = textwrap.wrap(self.metadata['info']['summary'], 62,
588 initial_indent='\t ',
589 subsequent_indent='\t ')
591 # make sure a help text is terminated with a full stop
592 if help_lines[-1][-1] != '.':
593 help_lines[-1] += '.'
595 # \t + two spaces is 3 char long
596 help_lines.append('')
597 help_lines.append('\t ' + self.metadata['info']['home_page'])
598 help_lines = [x + '\n' for x in help_lines]
601 with open(path_to_config, 'w') as config_file:
602 config_file.writelines(lines)
606 # Building the parser
607 parser = argparse.ArgumentParser(
608 description="Creates buildroot packages from the metadata of "
609 "an existing PyPI packages and include it "
611 parser.add_argument("packages",
612 help="list of packages to be created",
614 parser.add_argument("-o", "--output",
616 Output directory for packages.
621 args = parser.parse_args()
622 packages = list(set(args.packages))
624 # tmp_path is where we'll extract the files later
625 tmp_prefix = 'scanpypi-'
626 pkg_folder = args.output
627 tmp_path = tempfile.mkdtemp(prefix=tmp_prefix)
629 for real_pkg_name in packages:
630 package = BuildrootPackage(real_pkg_name, pkg_folder)
631 print('buildroot package name for {}:'.format(package.real_name),
632 package.buildroot_name)
633 # First we download the package
634 # Most of the info we need can only be found inside the package
635 print('Package:', package.buildroot_name)
636 print('Fetching package', package.real_name)
638 package.fetch_package_info()
639 except (six.moves.urllib.error.URLError, six.moves.urllib.error.HTTPError):
641 if package.metadata_name.lower() == 'setuptools':
642 # setuptools imports itself, that does not work very well
643 # with the monkey path at the begining
644 print('Error: setuptools cannot be built using scanPyPI')
648 package.download_package()
649 except six.moves.urllib.error.HTTPError as error:
650 print('Error: {code} {reason}'.format(code=error.code,
651 reason=error.reason))
652 print('Error downloading package :', package.buildroot_name)
656 # extract the tarball
658 package.extract_package(tmp_path)
659 except (tarfile.ReadError, zipfile.BadZipfile):
660 print('Error extracting package {}'.format(package.real_name))
664 # Loading the package install info from the package
667 except ImportError as err:
668 if 'buildutils' in err.message:
669 print('This package needs buildutils')
673 except AttributeError as error:
674 print('Error: Could not install package {pkg}: {error}'.format(
675 pkg=package.real_name, error=error))
678 # Package requirement are an argument of the setup function
679 req_not_found = package.get_requirements(pkg_folder)
680 req_not_found = req_not_found.difference(packages)
682 packages += req_not_found
684 print('Added packages \'{pkgs}\' as dependencies of {pkg}'
685 .format(pkgs=", ".join(req_not_found),
686 pkg=package.buildroot_name))
687 print('Checking if package {name} already exists...'.format(
688 name=package.pkg_dir))
690 os.makedirs(package.pkg_dir)
691 except OSError as exception:
692 if exception.errno != errno.EEXIST:
693 print("ERROR: ", exception.message, file=sys.stderr)
695 print('Error: Package {name} already exists'
696 .format(name=package.pkg_dir))
698 'Do you want to delete existing package ? [y/N]')
699 if del_pkg.lower() == 'y':
700 shutil.rmtree(package.pkg_dir)
701 os.makedirs(package.pkg_dir)
704 package.create_package_mk()
706 package.create_hash_file()
708 package.create_config_in()
710 # printing an empty line for visual confort
712 shutil.rmtree(tmp_path)
715 if __name__ == "__main__":